In [1]:
from cedne import utils
from cedne import Worm, NervousSystem
import os
import pickle
import pandas as pd
import numpy as np
import sys
import time
from collections import Counter
import matplotlib.pyplot as plt

In [2]:
RECURSION_LIMIT = 5000
sys.setrecursionlimit(RECURSION_LIMIT)
today = time.strftime("%Y-%m-%d")

In [5]:
# Hyperparams
framerate = 5
time_per_frame = 1./framerate
baseline_points=(100,150) # 20-30 seconds
min_timepoints = 450

In [None]:
suffixes = ['D', 'V', 'L', 'R', 'DL', 'DR', 'VL', 'VR', '01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13']
exception_neurs = {'DB1': 'DB01', 'DB2': 'DB02', 'VB1': 'VB01', 'VB2': 'VB02'}
topDir = "/Users/sahilmoza/Dropbox_rclone/Lu-Zhang lab share/learning data/Imaging/"

In [7]:
genotypes = ['N2', 'inx-7-mutant', 'inx-7-rescue']
promoters = ['ncs-1', 'inx-4', 'glr-1', 'acr-5', 'inx-4+mbr-1', 'flp-3p+flp-7p+nmr-1p+sro-1p', 'odr-2(2b)+odr-2(18)p']
stimuli = ['OP-PA-OP', 'OP-Buffer-OP', 'Buffer-PA-Buffer', 'Buffer-Buffer-Buffer', 'OP-gacA-OP']

In [8]:
all_worms = {genotype: {stimulus: {promoter: {'naive': [], 'trained': []} for promoter in promoters} for stimulus in stimuli} for genotype in genotypes}

# N2
## OP-PA-OP
### ncs-1p
all_worms['N2']['OP-PA-OP']['ncs-1']['naive'] = [f.path for f in os.scandir(topDir + 'OP-PA/ncs-1p/Analysis/Naive/') if not f.name.startswith('~') and not f.name.startswith('.')]
all_worms['N2']['OP-PA-OP']['ncs-1']['trained'] = [f.path for f in os.scandir(topDir + 'OP-PA/ncs-1p/Analysis/Trained/') if not f.name.startswith('~') and not f.name.startswith('.')]

###inx-4p
all_worms['N2']['OP-PA-OP']['inx-4']['naive'] = [f.path for f in os.scandir(topDir + 'OP-PA/inx-4p/Analysis/Naive/') if f.name.startswith('inx-4') ]
all_worms['N2']['OP-PA-OP']['inx-4']['trained'] = [f.path for f in os.scandir(topDir + 'OP-PA/inx-4p/Analysis/Trained/') if f.name.startswith('inx-4')]

###glr-1p
all_worms['N2']['OP-PA-OP']['glr-1']['naive'] = [f.path for f in os.scandir(topDir + 'OP-PA/glr-1p/Analysis/Naive/') if not f.name.startswith('~') and not f.name.startswith('.')]
all_worms['N2']['OP-PA-OP']['glr-1']['trained'] = [f.path for f in os.scandir(topDir + 'OP-PA/glr-1p/Analysis/Trained/') if not f.name.startswith('~') and not f.name.startswith('.')]

###acr-5p
all_worms['N2']['OP-PA-OP']['acr-5']['naive'] = [f.path for f in os.scandir(topDir + 'OP-PA/acr-5p/Analysis/OP-PA-excel summary/') if f.name.startswith('acr-5') and 'naive' in f.name]
all_worms['N2']['OP-PA-OP']['acr-5']['trained'] = [f.path for f in os.scandir(topDir + 'OP-PA/acr-5p/Analysis/OP-PA-excel summary/') if f.name.startswith('acr-5') and 'train' in f.name]

###inx-4+mbr-1p
all_worms['N2']['OP-PA-OP']['inx-4+mbr-1']['naive'] = [f.path for f in os.scandir(topDir + 'OP-PA/inx-4p+mbr-1p/Analysis/Naive/') if not f.name.startswith('~') and not f.name.startswith('.')]
all_worms['N2']['OP-PA-OP']['inx-4+mbr-1']['trained'] = [f.path for f in os.scandir(topDir + 'OP-PA/inx-4p+mbr-1p/Analysis/Trained/') if not f.name.startswith('~') and not f.name.startswith('.')]

### flp-3p+flp-7p+nmr-1p+sro-1p
all_worms['N2']['OP-PA-OP']['flp-3p+flp-7p+nmr-1p+sro-1p']['naive'] = [f.path for f in os.scandir(topDir + 'OP-PA/flp-3p+flp-7p+sro-1p+nmr-1p/Analysis/csv-files/Naive/') if not f.name.startswith('~') and not f.name.startswith('.')]
all_worms['N2']['OP-PA-OP']['flp-3p+flp-7p+nmr-1p+sro-1p']['trained'] = [f.path for f in os.scandir(topDir + 'OP-PA/flp-3p+flp-7p+sro-1p+nmr-1p/Analysis/csv-files/Trained/') if not f.name.startswith('~') and not f.name.startswith('.')]

### odr-2(2b)+odr-2(18)p
all_worms['N2']['OP-PA-OP']['odr-2(2b)+odr-2(18)p']['naive'] = [f.path for f in os.scandir(topDir + 'OP-PA/odr-2(2b)+odr-2(18)p/Analysis/csv-files/Naive/') if not f.name.startswith('~') and not f.name.startswith('.')]
all_worms['N2']['OP-PA-OP']['odr-2(2b)+odr-2(18)p']['trained'] = [f.path for f in os.scandir(topDir + 'OP-PA/odr-2(2b)+odr-2(18)p/Analysis/csv-files/Trained/') if not f.name.startswith('~') and not f.name.startswith('.')]

## OP-Buffer-OP
###ncs-1p
all_worms['N2']['OP-Buffer-OP']['ncs-1']['naive'] = [f.path for f in os.scandir(topDir + 'OP-buffer-OP/Analysis/Summary/Naive/') if 'naive' in f.name]
all_worms['N2']['OP-Buffer-OP']['ncs-1']['trained'] = [f.path for f in os.scandir(topDir + 'OP-buffer-OP/Analysis/Summary/Trained/') if 'train' in f.name]

## Buffer-PA-Buffer
###ncs-1p
all_worms['N2']['Buffer-PA-Buffer']['ncs-1']['naive'] = [f.path for f in os.scandir(topDir + 'PA-buffer/ncs-1p/Analysis/Naive/') if f.name.startswith('ncs-1') and 'naive' in f.name]
all_worms['N2']['Buffer-PA-Buffer']['ncs-1']['trained'] = [f.path for f in os.scandir(topDir + 'PA-buffer/ncs-1p/Analysis/Trained/') if f.name.startswith('ncs-1') and 'train' in f.name]

###inx-4p
all_worms['N2']['Buffer-PA-Buffer']['inx-4']['naive'] = [f.path for f in os.scandir(topDir + 'PA-buffer/inx-4p/Analysis/Naive/') if f.name.startswith('inx-4')]
all_worms['N2']['Buffer-PA-Buffer']['inx-4']['trained'] = [f.path for f in os.scandir(topDir + 'PA-buffer/inx-4p/Analysis/Trained/') if f.name.startswith('inx-4')]

###glr-1p
all_worms['N2']['Buffer-PA-Buffer']['glr-1']['naive'] = [f.path for f in os.scandir(topDir + 'PA-buffer/glr-1p/Analysis/Naive/') if ('glr-1' in f.name or 'worm' in f.name) and 'naive' in f.name and not f.name.startswith('~') and not f.name.startswith('.')]
all_worms['N2']['Buffer-PA-Buffer']['glr-1']['trained'] = [f.path for f in os.scandir(topDir + 'PA-buffer/glr-1p/Analysis/Trained/') if ('glr-1' in f.name or 'worm' in f.name) and 'train' in f.name and not f.name.startswith('~') and not f.name.startswith('.')]

###acr-5p
all_worms['N2']['Buffer-PA-Buffer']['acr-5']['naive'] = [f.path for f in os.scandir(topDir + 'PA-buffer/acr-5p/Analysis/PA-Buffer/') if f.name.startswith('acr-5') and 'naive' in f.name]
all_worms['N2']['Buffer-PA-Buffer']['acr-5']['trained'] = [f.path for f in os.scandir(topDir + 'PA-buffer/acr-5p/Analysis/PA-Buffer/') if f.name.startswith('acr-5') and 'train' in f.name]

###inx-4+mbr-1p
all_worms['N2']['Buffer-PA-Buffer']['inx-4+mbr-1']['naive'] = [f.path for f in os.scandir(topDir + 'PA-buffer/inx-4p+mbr-1p/Summary_both_csv_and_excel/Naive/') if not f.name.startswith('~') and not f.name.startswith('.')]
all_worms['N2']['Buffer-PA-Buffer']['inx-4+mbr-1']['trained'] = [f.path for f in os.scandir(topDir + 'PA-buffer/inx-4p+mbr-1p/Summary_both_csv_and_excel/Trained/') if not f.name.startswith('~') and not f.name.startswith('.')]

### flp-3p+flp-7p+nmr-1p+sro-1p
all_worms['N2']['Buffer-PA-Buffer']['flp-3p+flp-7p+nmr-1p+sro-1p']['naive'] = [f.path for f in os.scandir(topDir + 'PA-buffer/flp-3p+flp-7p+nmr-1p+sro-1p/Analysis/combined_excel/Naive') if not f.name.startswith('~') and not f.name.startswith('.')]
all_worms['N2']['Buffer-PA-Buffer']['flp-3p+flp-7p+nmr-1p+sro-1p']['trained'] = [f.path for f in os.scandir(topDir + 'PA-buffer/flp-3p+flp-7p+nmr-1p+sro-1p/Analysis/combined_excel/Trained') if not f.name.startswith('~') and not f.name.startswith('.')]

### odr-2(2b)+odr-2(18)p
all_worms['N2']['Buffer-PA-Buffer']['odr-2(2b)+odr-2(18)p']['naive'] = [f.path for f in os.scandir(topDir + 'PA-Buffer/odr-2(2b)+odr-2(18)p/Analysis/csv_files/Naive/') if not f.name.startswith('~') and not f.name.startswith('.')]
all_worms['N2']['Buffer-PA-Buffer']['odr-2(2b)+odr-2(18)p']['trained'] = [f.path for f in os.scandir(topDir + 'PA-Buffer/odr-2(2b)+odr-2(18)p/Analysis/csv_files/Trained/') if not f.name.startswith('~') and not f.name.startswith('.')]

## Buffer-Buffer-Buffer
###ncs-1p
all_worms['N2']['Buffer-Buffer-Buffer']['ncs-1']['naive'] = [f.path for f in os.scandir(topDir + 'Buffer-buffer data/ncs-1p/Analysis/csv_files/Naive') if 'naive' in f.name and not f.name.startswith('~') and not f.name.startswith('.')]
all_worms['N2']['Buffer-Buffer-Buffer']['ncs-1']['trained'] = [f.path for f in os.scandir(topDir + 'Buffer-buffer data/ncs-1p/Analysis/csv_files/Trained') if 'train' in f.name and not f.name.startswith('~') and not f.name.startswith('.')]

###glr-1p
all_worms['N2']['Buffer-Buffer-Buffer']['glr-1']['naive'] = [f.path for f in os.scandir(topDir + 'Buffer-buffer data/glr-1p/Analysis/Naive/') if 'naive' in f.name and not f.name.startswith('~') and not f.name.startswith('.')]
all_worms['N2']['Buffer-Buffer-Buffer']['glr-1']['trained'] = [f.path for f in os.scandir(topDir + 'Buffer-buffer data/glr-1p/Analysis/Trained/') if 'train' in f.name and not f.name.startswith('~') and not f.name.startswith('.')]

###acr-5p
all_worms['N2']['Buffer-Buffer-Buffer']['acr-5']['naive'] = [f.path for f in os.scandir(topDir + 'Buffer-buffer data/acr-5p/Analysis/Naive/') if 'naive' in f.name and not f.name.startswith('~') and not f.name.startswith('.')]
all_worms['N2']['Buffer-Buffer-Buffer']['acr-5']['trained'] = [f.path for f in os.scandir(topDir + 'Buffer-buffer data/acr-5p/Analysis/Trained/') if 'train' in f.name and not f.name.startswith('~') and not f.name.startswith('.')]

###inx-4+mbr-1p
all_worms['N2']['Buffer-Buffer-Buffer']['inx-4+mbr-1']['naive'] = [f.path for f in os.scandir(topDir + 'Buffer-buffer data/inx-4+mbr-1p/Analysis/Naive/') if not f.name.startswith('~') and not f.name.startswith('.')]
all_worms['N2']['Buffer-Buffer-Buffer']['inx-4+mbr-1']['trained'] = [f.path for f in os.scandir(topDir + 'Buffer-buffer data/inx-4+mbr-1p/Analysis/Trained/') if not f.name.startswith('~') and not f.name.startswith('.')]

### flp-3p+flp-7p+nmr-1p+sro-1p
all_worms['N2']['Buffer-Buffer-Buffer']['flp-3p+flp-7p+nmr-1p+sro-1p']['naive'] = [f.path for f in os.scandir(topDir + 'Buffer-buffer data/flp-3p+flp-7p+sro-1p+nmr-1p/Analysis/csv-raw/') if 'naive' in f.name and not f.name.startswith('~') and not f.name.startswith('.')]
all_worms['N2']['Buffer-Buffer-Buffer']['flp-3p+flp-7p+nmr-1p+sro-1p']['trained'] = [f.path for f in os.scandir(topDir + 'Buffer-buffer data/flp-3p+flp-7p+sro-1p+nmr-1p/Analysis/csv-raw/') if 'train' in f.name and not f.name.startswith('~') and not f.name.startswith('.')]

### odr-2(2b)+odr-2(18)p
all_worms['N2']['Buffer-Buffer-Buffer']['odr-2(2b)+odr-2(18)p']['naive'] = [f.path for f in os.scandir(topDir + 'Buffer-buffer data/odr-2(2b)+odr-2(18)p/Summary/csv_files/Naive/') if 'naive' in f.name and not f.name.startswith('~') and not f.name.startswith('.')]
all_worms['N2']['Buffer-Buffer-Buffer']['odr-2(2b)+odr-2(18)p']['trained'] = [f.path for f in os.scandir(topDir + 'Buffer-buffer data/odr-2(2b)+odr-2(18)p/Summary/csv_files/Trained/') if 'train' in f.name and not f.name.startswith('~') and not f.name.startswith('.')]

##OP-gacA-OP
###ncs-1p
all_worms['N2']['OP-gacA-OP']['ncs-1']['naive'] = [f.path for f in os.scandir(topDir + 'OP-gacA/Analysis/Analysis summary/Naive/') if 'naive' in f.name and not f.name.startswith('~') and not f.name.startswith('.')]
all_worms['N2']['OP-gacA-OP']['ncs-1']['trained'] = [f.path for f in os.scandir(topDir + 'OP-gacA/Analysis/Analysis summary/Trained/') if 'train' in f.name and not f.name.startswith('~') and not f.name.startswith('.')]

#Mutants- inx-7
## OP-PA-OP
### ncs-1p
all_worms['inx-7-mutant']['OP-PA-OP']['ncs-1']['naive'] = [f.path for f in os.scandir(topDir + 'Decorrelation_validation/inx-7/ZC4251+ZC4255-OP-PA/Analysis/ZC4255/Naive') if not f.name.startswith('~') and not f.name.startswith('.')]
all_worms['inx-7-mutant']['OP-PA-OP']['ncs-1']['trained'] = [f.path for f in os.scandir(topDir + 'Decorrelation_validation/inx-7/ZC4251+ZC4255-OP-PA/Analysis/ZC4255/Trained') if not f.name.startswith('~') and not f.name.startswith('.')]

#Mutants- inx-7-rescue
## OP-PA-OP
### ncs-1p
all_worms['inx-7-rescue']['OP-PA-OP']['ncs-1']['naive'] = [f.path for f in os.scandir(topDir + 'Decorrelation_validation/inx-7/ZC4251+ZC4255-OP-PA/Analysis/ZC4251/Naive') if not f.name.startswith('~') and not f.name.startswith('.')]
all_worms['inx-7-rescue']['OP-PA-OP']['ncs-1']['trained'] = [f.path for f in os.scandir(topDir + 'Decorrelation_validation/inx-7/ZC4251+ZC4255-OP-PA/Analysis/ZC4251/Trained') if not f.name.startswith('~') and not f.name.startswith('.')]

## If there is no data for a particular genotype, stimulus, promoter, naive/trained, then remove it from the dictionary
for genotype in genotypes:
    for stimulus in stimuli:
        for promoter in promoters:
            for naive_trained in ['naive', 'trained']:
                if all_worms[genotype][stimulus][promoter][naive_trained] == '':
                    del all_worms[genotype][stimulus][promoter][naive_trained]
            if all_worms[genotype][stimulus][promoter] == {}:
                del all_worms[genotype][stimulus][promoter]
        if all_worms[genotype][stimulus] == {}:
            del all_worms[genotype][stimulus]
    if all_worms[genotype] == {}:
        del all_worms[genotype]

In [9]:
for genotype in genotypes:
    for stimulus in all_worms[genotype]:
        for promoter in all_worms[genotype][stimulus]:
            for condition in all_worms[genotype][stimulus][promoter]:
                print(genotype, stimulus, promoter, condition, len(all_worms[genotype][stimulus][promoter][condition]))

N2 OP-PA-OP ncs-1 naive 19
N2 OP-PA-OP ncs-1 trained 20
N2 OP-PA-OP inx-4 naive 28
N2 OP-PA-OP inx-4 trained 28
N2 OP-PA-OP glr-1 naive 27
N2 OP-PA-OP glr-1 trained 23
N2 OP-PA-OP acr-5 naive 27
N2 OP-PA-OP acr-5 trained 30
N2 OP-PA-OP inx-4+mbr-1 naive 26
N2 OP-PA-OP inx-4+mbr-1 trained 28
N2 OP-PA-OP flp-3p+flp-7p+nmr-1p+sro-1p naive 22
N2 OP-PA-OP flp-3p+flp-7p+nmr-1p+sro-1p trained 21
N2 OP-PA-OP odr-2(2b)+odr-2(18)p naive 22
N2 OP-PA-OP odr-2(2b)+odr-2(18)p trained 21
N2 OP-Buffer-OP ncs-1 naive 15
N2 OP-Buffer-OP ncs-1 trained 19
N2 OP-Buffer-OP inx-4 naive 0
N2 OP-Buffer-OP inx-4 trained 0
N2 OP-Buffer-OP glr-1 naive 0
N2 OP-Buffer-OP glr-1 trained 0
N2 OP-Buffer-OP acr-5 naive 0
N2 OP-Buffer-OP acr-5 trained 0
N2 OP-Buffer-OP inx-4+mbr-1 naive 0
N2 OP-Buffer-OP inx-4+mbr-1 trained 0
N2 OP-Buffer-OP flp-3p+flp-7p+nmr-1p+sro-1p naive 0
N2 OP-Buffer-OP flp-3p+flp-7p+nmr-1p+sro-1p trained 0
N2 OP-Buffer-OP odr-2(2b)+odr-2(18)p naive 0
N2 OP-Buffer-OP odr-2(2b)+odr-2(18)p trained 0


In [10]:
## Read csv file 

def read_csv_file(filename):
    with open(filename, 'r') as f:
        focal_plane_row = f.readline().strip().split('\t')[0].split(',')
        header_row = f.readline().strip().split('\t')[0].split(',')

    data = pd.read_csv(filename, sep=',', header=None, engine='python', skiprows=2)
    data.columns = header_row

    ### Clean up the header and focal plane rows
    while len(header_row) < data.shape[1]:
        header_row.append('')
    while len(focal_plane_row) < data.shape[1]:
        focal_plane_row.append('')

    focal_plane_groups = {}
    current_fp = None

    final_headers = []
    header_fp_pairs = []
    current_fp = None

    ### Create a dictionary of focal planes and their corresponding columns
    for fp, col in zip(focal_plane_row, header_row):
        if not col or pd.isna(col) or col.strip() == '':
            final_headers.append(None)  # Mark to skip this column
            header_fp_pairs.append((None, None))
            continue

        if fp.strip():
            current_fp = fp.strip()

        col_clean = col.strip()
        new_col_name = f"{current_fp}_{col_clean}"
        final_headers.append(new_col_name)
        header_fp_pairs.append((current_fp, new_col_name))

    valid_indices = [i for i, name in enumerate(final_headers) if name is not None]
    data = data.iloc[:, valid_indices]
    new_headers = [final_headers[i] for i in valid_indices]
    data.columns = new_headers

    focal_plane_dfs = {}
    for (fp, colname) in [header_fp_pairs[i] for i in valid_indices]:
        if fp not in focal_plane_dfs:
            focal_plane_dfs[fp] = []
        if not colname[-5:]=='Frame':
            focal_plane_dfs[fp].append(colname)

    focal_plane_dfs = {
        fp[-1]: data[cols].copy()
        for fp, cols in focal_plane_dfs.items()
    }
    return focal_plane_dfs

In [11]:
def readFileToWorm(file, genotype, promoter, stimulus, learning_condition, sex= 'Hermaphrodite', stage='Day-1 Adult', baseline_normalized=True):
    print(file)
    neuronDf = {}
    finalNeuronDf = {}
    already_added = {}
    if file.endswith('.csv'):
        file_type = 'csv'
        try:
            worm_name = file.split('/')[-1][:-4].split('raw_results_')[-1]
            worm = Worm(name=worm_name, stage=stage, sex=sex, genotype=genotype, promoter=promoter, stimulus=stimulus, learning_condition=learning_condition)
            df_focal_planes = read_csv_file(file)
            focal_planes = df_focal_planes.keys()
            for fp in focal_planes:
                df = df_focal_planes[fp]
                neuronNames = [key.split('_')[1] for key in df.keys() if not key.split('_')[1][-3:] == '-bg']
                neuronNames_bg = [key.split('_')[1] for key in df.keys() if key.split('_')[1][-3:] == '-bg']
                try:
                    assert len(neuronNames) == 2*len(set(neuronNames)), f"Duplicate or missing neuron names found in focal plane {fp}. {Counter(neuronNames)}"
                    assert len(neuronNames_bg) == 2*len(set(neuronNames_bg)), f"Duplicate or missing background neuron names found in focal plane {fp}."
                    assert len(neuronNames) >0, f"At least one neuron found in focal plane {fp}."
                    assert all([f'{n}-bg' in neuronNames_bg for n in neuronNames]), f"Background neuron not found for all neurons in focal plane {fp}."
                except Exception as e:
                    print(f"Error in reading file {file}. {e} {worm_name} {fp} {neuronNames}")

                for neuronName in neuronNames:
                    if not neuronName in finalNeuronDf:
                        finalNeuronDf[neuronName] = pd.DataFrame()
                    for x in ['_gcamp', '-bg_gcamp', '_mcherry', '-bg_mcherry']:
                        if not f"{neuronName}{x}" in df.keys():
                            f"Neuron {neuronName} doesn't have all key {x} in focal plane {fp}."
                    new_keys = {key: key.split(f"_{neuronName}_") if not f"_{neuronName}-bg_" in key else key.split(f"_{neuronName}-") for key in df.keys()} #
                    key_list = [key for key in new_keys if any(x in new_keys[key] for x in ['gcamp', 'bg_gcamp', 'mcherry', 'bg_mcherry'])]
                    neuronDf[neuronName] = df.loc[:, [key for key in df.keys() if key in key_list]]
                    neuronDf[neuronName].columns = ['_'.join([new_keys[key][0][:3], new_keys[key][1]]) for key in neuronDf[neuronName].columns]

                    bg_subtracted_gcamp = neuronDf[neuronName][f'FP{fp}_gcamp'] - neuronDf[neuronName][f'FP{fp}_bg_gcamp'] 
                    bg_subtracted_mcherry = (neuronDf[neuronName][f'FP{fp}_mcherry'] - neuronDf[neuronName][f'FP{fp}_bg_mcherry'])
                    bg_subtracted_mcherry_mean_normed = bg_subtracted_mcherry/np.mean(bg_subtracted_mcherry)
                    normalized_gcamp = bg_subtracted_gcamp/bg_subtracted_mcherry_mean_normed
                    if baseline_normalized:
                        baseline = np.mean(normalized_gcamp[baseline_points[0]:baseline_points[1]])
                        baseline_normalized_gcamp = (normalized_gcamp - baseline)/baseline
                        finalNeuronDf[neuronName][fp] = baseline_normalized_gcamp
                    else:
                        finalNeuronDf[neuronName][fp] = normalized_gcamp
        except Exception as e:
            print(f"Error in reading file {file}. {e} {worm_name} {fp} {neuronName}")

        # df = pd.read_csv(file)

    elif file.endswith('.xlsx'):
        ### Check if the header names are duplicated and df is merging.
        try:
            worm_name = file.split('/')[-1][:-5]
            worm = Worm(name=worm_name, stage=stage, sex=sex, genotype=genotype, promoter=promoter, stimulus=stimulus, learning_condition=learning_condition)
            excel_file = pd.ExcelFile(file)
            for sheet in excel_file.sheet_names:
                if sheet not in ['Sample', 'SAMPLE', 'correlation', 'Correlation', 'Corrrelation', 'Sheet1', 'Sheet17']:
                    df = pd.read_excel(excel_file, sheet_name=sheet)
                    # print(sheet)

                    if '-' in sheet:
                        neuronName = sheet.split("-")[0]
                        if not neuronName in finalNeuronDf:
                            finalNeuronDf[neuronName] = pd.DataFrame()
                        focal_planes_string = sheet.split("-")[1]
                        if "&" in focal_planes_string:
                            focal_planes = [fp[-1] for fp in focal_planes_string.split("&")]
                        else:
                            focal_planes = [focal_planes_string[-1]]
                        focal_counter = Counter(focal_planes)

                        if len(focal_counter)!=len(focal_planes):
                            new_focal_planes = []
                            for c in sorted(focal_counter):
                                if focal_counter[c] >1:
                                    new_focal_planes += [f"{c}.{fc}" for fc in range(1, focal_counter[c]+1)]
                            focal_planes = new_focal_planes
                    else:
                        neuronName = sheet
                        focal_planes = ['1']
                    # print(f"Focal planes: {focal_planes}")
                    df_keys = [key for key in df.keys()]

                    if len(df_keys)==0:
                        # print(f"No data in this sheet {sheet} for file {file}")
                        continue

                    if df_keys[0] in ['ROI', 'Unnamed: 0']:
                        file_type = 'xlsx-old'
                    elif df_keys[0] == ' ':
                        file_type = 'xlsx-new'
                    else:
                        print(f"df-keys are {df_keys}.")
                    
                    if file_type == 'xlsx-new':
                        # print(f"{neuronName} starting.")
                        new_keys = {key: key.split(f"_{neuronName}_") if not f"_{neuronName}-bg_" in key else key.split(f"_{neuronName}-") for key in df.keys()} #
                        key_list = [key for key in new_keys if any(x in new_keys[key] for x in ['gcamp', 'bg_gcamp', 'mcherry', 'bg_mcherry'])]
                        neuronDf[neuronName] = df.loc[:, [key for key in df.keys() if key in key_list]]
                        neuronDf[neuronName].columns = ['_'.join([new_keys[key][0][:3], new_keys[key][1]]) for key in neuronDf[neuronName].columns]
                    elif file_type == 'xlsx-old':
                        if df_keys[0] in ['ROI', 'Unnamed: 0']:
                            # Identify empty columns (all NaN)
                            empty_cols = df.columns[df.isna().all()].tolist()
                            # Get indices of those empty columns
                            empty_indices = [df.columns.get_loc(col) for col in empty_cols]
                            # Add boundaries (start=0 and end=last column)
                            split_points = [0] + empty_indices + [df.shape[1]]
                            # Store cleaned blocks
                            combined_dfs = []
                            j=0
                            for i in range(len(split_points) - 1):
                                start = split_points[i]
                                end = split_points[i + 1]
                                
                                # Skip empty slices
                                if start == end or start == end-1: # Consecutive empty columns
                                    continue
                                
                                sub_df = df.iloc[:, start:end].copy()
                                sub_df.dropna(axis=1, how='all', inplace=True)
                                

                            # roi_col_indices = [i for i, col in enumerate(df.columns) if col[:3] == 'ROI']
                            # roi_col_indices.append(len(df.columns))
                            # combined_dfs = []
                            # for i in range(len(roi_col_indices) - 1):
                                focal_plane = focal_planes[j]
                            #     start = roi_col_indices[i]
                            #     end = roi_col_indices[i + 1]
                            #     # Slice the columns for this ROI section
                            #     sub_df = df.iloc[:, start:end]
                            #     sub_df.dropna(axis=1, how='all', inplace=True)
                                sub_df.columns = [f'FP{focal_plane}_gcamp frame', f'FP{focal_plane}_gcamp', f'FP{focal_plane}_bg_gcamp', f'FP{focal_plane}_normed', f'FP{focal_plane}_mcherry frame', f'FP{focal_plane}_mcherry', f'FP{focal_plane}_bg_mcherry', f'FP{focal_plane}_diff_mcherry', f'FP{focal_plane}_baseline_normed_mcherry', f'FP{focal_plane}_baseline', f'FP{focal_plane}_time', f'FP{focal_plane}_final']
                                if len(sub_df.columns) != 12:
                                    print(f"Error in reading file {file}. {neuronName} {focal_plane} {sub_df.columns}")
                            #     #neuronDf[neuronName][focal_plane] = sub_df
                                combined_dfs.append(sub_df)
                                j+=1
                            neuronDf[neuronName] = pd.concat(combined_dfs, axis=1)
                        else:
                            print(f"Problem with the file format for file {file}. No ROI header found")
                    for focal_plane in focal_planes:
                        bg_subtracted_gcamp = neuronDf[neuronName][f'FP{focal_plane}_gcamp'] - neuronDf[neuronName][f'FP{focal_plane}_bg_gcamp'] 
                        bg_subtracted_mcherry = (neuronDf[neuronName][f'FP{focal_plane}_mcherry'] - neuronDf[neuronName][f'FP{focal_plane}_bg_mcherry'])
                        bg_subtracted_mcherry_mean_normed = bg_subtracted_mcherry/np.mean(bg_subtracted_mcherry)
                        normalized_gcamp = bg_subtracted_gcamp/bg_subtracted_mcherry_mean_normed
                        if baseline_normalized:
                            baseline = np.mean(normalized_gcamp[baseline_points[0]:baseline_points[1]])
                            baseline_normalized_gcamp = (normalized_gcamp - baseline)/baseline
                            finalNeuronDf[neuronName][focal_plane] = baseline_normalized_gcamp
                        else:
                            finalNeuronDf[neuronName][focal_plane] = normalized_gcamp

        except Exception as e:
            print(f"Error in reading file {file}. {e} {sheet}")
    else:
        print("File format not supported")
    
    try:
        nn = NervousSystem(worm, network=learning_condition, condition = learning_condition)
        utils.build_nervous_system(nn, neuron_data=f"{utils.DATADIR}/Cell_list.pkl", chem_synapses=f"{utils.DATADIR}/chem_adj.pkl", elec_synapses=f"{utils.DATADIR}/gapjn_symm_adj.pkl", positions=f"{utils.DATADIR}/neuronPosition.pkl")
        neuron_class = {neuronName: [neuronName + "L", neuronName + "R"] for neuronName in finalNeuronDf.keys() if neuronName + "L" in nn.neurons and neuronName + "R" in nn.neurons}
        nn = nn.fold_network(neuron_class, name=nn.name)
        nn.recorded_neurons = []
        
        for neuronName in finalNeuronDf.keys():
            for focal_plane in finalNeuronDf[neuronName].keys():
                if len(finalNeuronDf[neuronName][focal_plane])<min_timepoints:
                    continue
                if neuronName in exception_neurs: #Making exception here
                    nname = exception_neurs[neuronName]
                else:
                    if neuronName in nn.neurons:
                        nname = neuronName
                    else:
                        if neuronName + "L" in finalNeuronDf.keys() or neuronName + "R" in finalNeuronDf.keys():
                            print("L/R neuron already present")
                            continue
                        else:
                            print(f"{neuronName} could not be added.")
                            continue
                #nname = correct_neurons(nname)
                t = nn.neurons[nname].add_trial(focal_plane)
                t.recording = finalNeuronDf[neuronName][focal_plane].to_numpy()[:min_timepoints]
                nn.recorded_neurons.append(nname)
    except Exception as e:
        print(f"Error making nervous system for worm {worm_name}. {e} at {neuronName} {nname}")
    return worm
    
# No longer necessary to fix
# def correct_neurons(nname):
#     if 'RIP' in nname:
#         if len(nname)>3:
#             nname = 'OLL' + nname[3:]
#         else:
#             nname = 'OLLL' # Just randomly assigning left for now.
#     return nname 

def hasLR(worm, neuron_name):
    ## If there's no LR seperation, then return False
    pass

def averageLR(worm, neuron_name):
    ## If there's no LR seperation, then average their responses.
    pass

def unsureNeuron(worm, neuron_name):
    ## Flag neurons with uncertain identity
    pass

In [13]:
## Add stimulus information: stimulus = {'OP': [], 'PA':[]} where the list contains the time-series of stimulus values (1 or 0)
## Contract the LR neurons early except for the ones that are not supposed to be contracted (ASEL/ASER). Keep the focal plane seperately. For amplitude plots 
## later, average out the LR responses. 

# genotypes = ['N2', 'inx-7', 'inx-7-rescue']
# promoters_filt = ['inx-4']
# stimuli_filt = ['OP-PA-OP']
# condition_filt = ['naive']
baseline_normalized=True

wormDict = {}
for genotype in all_worms:
    for stimulus in all_worms[genotype]:
        # if stimulus in stimuli_filt:
            for promoter in all_worms[genotype][stimulus]:
                # if promoter in promoters_filt:
                    for condition in all_worms[genotype][stimulus][promoter]:
                        # if condition in condition_filt:
                            for file in all_worms[genotype][stimulus][promoter][condition]:
                                if not (file.startswith('~') or 'Jingting Liang\'s conflicted copy' in file):
                                    worm = readFileToWorm(file, genotype, promoter, stimulus, condition, baseline_normalized=baseline_normalized)
                                    wormDict[worm.name] = worm
                                # print(worm)

/Users/sahilmoza/Dropbox_rclone/Lu-Zhang lab share/learning data/Imaging/OP-PA/ncs-1p/Analysis/Naive/ncs-1p 20230331 naive-worm3.xlsx
/Users/sahilmoza/Dropbox_rclone/Lu-Zhang lab share/learning data/Imaging/OP-PA/ncs-1p/Analysis/Naive/ncs-1p 20230406 naive-worm3.xlsx
/Users/sahilmoza/Dropbox_rclone/Lu-Zhang lab share/learning data/Imaging/OP-PA/ncs-1p/Analysis/Naive/ncs-1p 20230330 naive-worm5.xlsx
/Users/sahilmoza/Dropbox_rclone/Lu-Zhang lab share/learning data/Imaging/OP-PA/ncs-1p/Analysis/Naive/ncs-1p 20230330 naive-worm4.xlsx
/Users/sahilmoza/Dropbox_rclone/Lu-Zhang lab share/learning data/Imaging/OP-PA/ncs-1p/Analysis/Naive/ncs-1p 20230406 naive-worm2.xlsx
/Users/sahilmoza/Dropbox_rclone/Lu-Zhang lab share/learning data/Imaging/OP-PA/ncs-1p/Analysis/Naive/ncs-1p 20230331 naive-worm2.xlsx
/Users/sahilmoza/Dropbox_rclone/Lu-Zhang lab share/learning data/Imaging/OP-PA/ncs-1p/Analysis/Naive/ncs-1p 20230404 naive-worm1.xlsx
/Users/sahilmoza/Dropbox_rclone/Lu-Zhang lab share/learning da

In [None]:
w = list(wormDict.keys())[0]
wormDict[w].networks
neuronList = []

w = list(wormDict.keys())[0]
for condition in wormDict[w].networks:
    neuronList+= wormDict[w].networks[condition].neurons.keys()
neuronList = set(neuronList)
print(neuronList)

if not os.path.exists('pickles/'+ today ):
    os.makedirs('pickles/'+ today)
output_pickleDir = 'pickles/'+ today + '/'

for worm in wormDict:
    for condition in wormDict[worm].networks:
        pf_name = output_pickleDir + wormDict[worm].genotype + '_' + wormDict[worm].promoter + '_' + wormDict[worm].stimulus + '_' + condition + '_' + wormDict[worm].name + ('_not_baseline_normalized' if not baseline_normalized else '') + ('_{global_normalize}' if global_normalize else '')
        print(pf_name)
        with open (pf_name, 'wb') as po:
            pickle.dump(wormDict[worm], po)


{'ALA', 'RMDDR', 'RMHR', 'ASEL', 'AVHR', 'RIS', 'URADR', 'SMDVL', 'VA09', 'AS05', 'IL1DR', 'DA03', 'ALMR', 'DVC', 'SIADL', 'VA07', 'M3R', 'IL2R', 'AS10', 'DD05', 'AS02', 'VA03', 'OLLR', 'CANR', 'VD13', 'PVQL', 'RIFR', 'URAVL', 'DA01', 'VD09', 'ASJL', 'URAVR', 'RIPL', 'SIBDR', 'ALNL', 'AWAR', 'ASJR', 'CEPVR', 'IL1VL', 'AVDL', 'PVPL', 'AS06', 'ASIL', 'VD12', 'VA06', 'DB05', 'VB01', 'URXR', 'RIMR', 'AS08', 'VC01', 'SDQR', 'NSML', 'RIBL', 'DVB', 'SIBVR', 'VD01', 'AIYR', 'RMDVR', 'AVBL', 'AVM', 'RMEL', 'PLNR', 'AVKL', 'URYVL', 'DB01', 'PHBR', 'VA01', 'RMED', 'RMDDL', 'DD03', 'MCR', 'OLQVR', 'VC02', 'VC03', 'M2R', 'VD08', 'DA07', 'AIAR', 'DA05', 'VC05', 'RMDVL', 'PHCL', 'AS11', 'SMBDL', 'HSNL', 'ALNR', 'PDA', 'ASGL', 'PDEL', 'PHAL', 'FLPL', 'IL2DR', 'CEPVL', 'AINR', 'PVQR', 'SAADL', 'PVT', 'AIMR', 'RID', 'PVCL', 'HSNR', 'AVBR', 'SIADR', 'ADEL', 'SMBVR', 'PVWL', 'ASIR', 'DA08', 'RIR', 'ADFL', 'URBR', 'SMDDR', 'VA04', 'VD10', 'ASER', 'AVG', 'IL1L', 'RIAL', 'M4', 'SABVL', 'I1L', 'AS04', 'AINL',

In [17]:
counts = []
for wname,w in wormDict.items():
    counts.append(f'{w.genotype} {w.stimulus} {w.promoter} {list(w.networks.keys())[0]}')
for ty, num in sorted(Counter(counts).items(), key=lambda x:x[0]):
    print(f'{ty} {num}')

N2 Buffer-Buffer-Buffer acr-5 naive 10
N2 Buffer-Buffer-Buffer acr-5 trained 10
N2 Buffer-Buffer-Buffer glr-1 naive 10
N2 Buffer-Buffer-Buffer glr-1 trained 10
N2 Buffer-Buffer-Buffer inx-4+mbr-1 naive 10
N2 Buffer-Buffer-Buffer inx-4+mbr-1 trained 10
N2 Buffer-Buffer-Buffer ncs-1 naive 10
N2 Buffer-Buffer-Buffer ncs-1 trained 10
N2 Buffer-Buffer-Buffer odr-2(2b)+odr-2(18)p naive 11
N2 Buffer-Buffer-Buffer odr-2(2b)+odr-2(18)p trained 12
N2 Buffer-PA-Buffer acr-5 naive 29
N2 Buffer-PA-Buffer acr-5 trained 31
N2 Buffer-PA-Buffer flp-3p+flp-7p+nmr-1p+sro-1p naive 23
N2 Buffer-PA-Buffer flp-3p+flp-7p+nmr-1p+sro-1p trained 18
N2 Buffer-PA-Buffer glr-1 naive 20
N2 Buffer-PA-Buffer glr-1 trained 27
N2 Buffer-PA-Buffer inx-4 naive 21
N2 Buffer-PA-Buffer inx-4 trained 23
N2 Buffer-PA-Buffer inx-4+mbr-1 naive 21
N2 Buffer-PA-Buffer inx-4+mbr-1 trained 23
N2 Buffer-PA-Buffer ncs-1 naive 23
N2 Buffer-PA-Buffer ncs-1 trained 24
N2 Buffer-PA-Buffer odr-2(2b)+odr-2(18)p naive 25
N2 Buffer-PA-Buffer 