In [1]:
from pathlib import Path
import pandas as pd

In [2]:
# meta 
h19meta = pd.read_csv('./data/raw/Metadata/Human19_Metadata.csv', index_col=0)
h19meta = h19meta[h19meta.stimulus_type == 'longsquare']
h19meta.rename(columns={'ZD': 'ZD status'}, inplace=True)

h18meta = pd.read_csv('./data/raw/Metadata/Human18_Metadata.csv', index_col=0)
h18meta = h18meta[h18meta['Stimulus type'] == 'Long_Square']
h18meta.rename(columns={'ZD': 'ZD status'}, inplace=True)

m19meta = pd.read_csv('./data/raw/Metadata/Mouse19_Metadata.csv', index_col=0)
m19meta = m19meta[m19meta.stimulus_type == 'longsquare']

m18meta = pd.read_csv('./data/raw/Metadata/Mouse18_Metadata.csv', index_col=0)
m18meta = m18meta[m18meta.stimulus_type == 'longsquare']

In [3]:
h19meta.shape

(56, 16)

In [4]:
# solutions metadata for converted nwbs
solutions_df = pd.read_csv('./data/raw/Metadata/extracted_features_Solutions.csv', usecols=['nwb', 'Extra-Solution', 'Internal-solution'])

solutions_df.rename(columns={'Extra-Solution': 'external_soln', 
                             'Internal-solution': 'internal_soln'}, inplace=True)

solutions_df['file_id'] = solutions_df.nwb.str.rstrip('.nwb')
solutions_df.drop('nwb', axis=1, inplace=True)

## Demographics data cleaning

In [5]:
dem19 = pd.read_excel('./data/raw/Demographic_information/Human tissue-2019-Demographic information.xlsx', engine='openpyxl').dropna(how='all')
dem18 = pd.read_excel('./data/raw/Demographic_information/Human tissue-2018-Demographic information.xlsx', engine='openpyxl').dropna(how='all')

In [6]:
resection19_map = {
    'R Ant Temp Lobe + Amygdalohippocampectomy': 'Right-ATL',
    'L Ant Temp Lobe + Amygdalohippocampectomy': 'Left-ATL',
    'L Ant Temp Lobectomy ': 'Left-ATL',
    'Left Anterior temporal lobectomy + Amygdalohippocampectomy': 'Left-ATL',
    'L Awake Temp crani for resection with ECoG': 'Left-ATL', 
    'Left temporal lobectomy + Amygdalohippocampectomy +/- skull defect repair': 'Left-ATL',
    'Resection of mass from left parietal lobe (update required)': 'Parietal lobe',
    'R awake craniotomy with ECOG (SMA)': 'Right-ATL', 
    'R Ant Temp Lobe + Amygdalohippocampectomy + dermoid cyst resection': 'Right-ATL',
    'L Temp crani for tumor resection + ECoG': 'Left-ATL' 
}

dem19['resection_location'] = dem19['Resection Procedure '].map(resection19_map)


resection18_map = {
    'L ATL + Amyg': 'Left-ATL',
    'R ATL + Amyg': 'Right-ATL',
    'R ATL + Amyg + DNET removal': 'Right-ATL', #DNET: dysembryoplastic neuroepithelial tumor? 
    'R Awake FL Crani for Lesion resection': 'Right-FL', 
    'NOT A RESECTION:  Patient Specific Implant': 'Unknown',
    'R Awake Crani with EcOG': 'Unknown',
    'L ATL + Amyg + lesion with resection of cavernoma': 'Left-ATL',
    'R ATL + Amyg + lesion with resection of cavernoma': 'Right-ATL', 
    'L ATL + Amyg + cavernoma resection': 'Left-ATL',
    'L FL for lesion': 'Left-FL' 
}

dem18['resection_location'] = dem18['Resection Procedure '].map(resection18_map)

In [7]:
# should we change diagnosis column to tumour presence column?
diagnosis18_map = {
    'Medically refractory epilepsy': 'Epilepsy',
    'Medically refractory epilepsy plus non-febrile sz in infancy (TX with phenobarbital)': 'Epilepsy',
    "Can't access; pt expired": 'Unknown',
    'Medically refractory epilepsy + tumor': 'Tumor', #both
    'Cavernous malformation': '',
    'Medically refractory epilepsy; lesion': 'Epilepsy',
    'Left mesial temporal sclerosis': 'Epilepsy',
    'Left temporal tumor, probably DNET; seizures': 'Tumor', #both
    'left sided mesial temporal sclerosis and medically refractory epilepsy': 'Epilepsy',
    'Right sided mesial temporal sclerosis and medically refractory epilepsy': 'Epilepsy',
    'Left MTL cavernoma and medically intractable epilepsy': 'Epilepsy',
    'Right mesiotemporal intraaxial tumor and epilepsy': 'Tumor', #both
    'Left MTL cavernoma resection leading to absence seizures': 'Epilepsy',
    '(likely MRE with tumor)': 'Tumor' #both
    
}

dem18.rename(columns={'Diagnosis': 'Diagnosis_long'}, inplace=True)
dem18['diagnosis'] = dem18['Diagnosis_long'].str.strip().map(diagnosis18_map)


diagnosis19_map = {
    'Medically refractory epilepsy': 'Epilepsy',
    'Emergency Pt.': 'Unknown',
    'Medically refractory epilepsy -likely glioneural hamartoma': 'Tumor', #both
    'Medically refractory epilepsy secondary to underlying dermoid cyst': 'Epilepsy', #unsure?
    'Medically refractory epilepsy with right sided mesial temporal sclerosis': 'Epilepsy',
    'Mesial Temporal Epilepsy': 'Epilepsy'
}

dem19.rename(columns={'Diagnosis': 'Diagnosis_long'}, inplace=True)
dem19['diagnosis'] = dem19['Diagnosis_long'].str.strip().map(diagnosis19_map)



demog_cols = ['PatientId', 'Sex', 'Age At OR ','Years of Seizure history', 'diagnosis', 'Diagnosis_long',
              'SX type', 'resection_location', 'Resection Procedure ', 'Focal cortical dysplasia (FCD)',
              'Other Imaging notes','Antiepileptic Drugs ']

In [8]:
idmap_h19 = {
    "2019_11_04_0098": 1912,
    "2019_11_04_0128": 1912,
    "2019_11_26_0103": 1913,
    "2019_11_28_0016": 1914,
    "2019_11_28_0054": 1914,
    "2019_11_28_0090": 1914,
    "2019_11_28_0114": 1914,
    "2019_11_28_0127": 1914,
    "19228030": 1906,
    "19228044": 1906,
    "19228058": 1906,
    "19228068": 1906,
    "19122003": 1901,
    "19122026": 1901,
    "19128006": 1902,
    "19128040": 1902,
    "19128061": 1902, 
    "19128068": 1902,
    "19129058": 1903,
    "19129072": 1903,
    "19129014": 1903,
    "19129022": 1903,
    "19129024": 1903,
    "19129037": 1903,
    "19129040": 1903,
    "19129043": 1903,
    "19129047": 1903,
    "19319025": 1907,
    "19320001": 1908,
    "19320007": 1908,
    "19320017": 1908,
    "19320022": 1908,
    "19320025": 1908,
    "19320030": 1908,
    "19320041": 1908,
    "19328001": 1909,
    "19328009": 1909,
    "19328019": 1909,
    "19328034": 1909,
    "19328039": 1909,
    "19328046": 1909,
    "2019_11_04_0001": 1912,
    "2019_11_04_0083": 1912,
    "2019_11_26_0006": 1913,
    "2019_11_26_0019": 1913,
    "2019_11_26_0037": 1913,
    "2019_11_26_0094": 1913,
    "2019_11_26_0110": 1913,
    "2019_11_28_0010": 1914,
    "2019_11_28_0038": 1914,
    "2019_11_28_0079": 1914,
    "2019_11_28_0119": 1914,
    "19o10010": 1911,
    "19o10045": 1911,
    "19o10054": 1911,
    "19o10065": 1911
    }


idmap_h18 = {
    "18320005": 1808, #case1
    "18320015": 1808, #case1
    "18320021": 1808, #case1
    "18320014": 1809, #case2
    "18320019": 1809, #case2
    "18320031": 1809, #case2
    "18426010": 1815, #could be case 1816?
    "18426017": 1815, #could be case 1816?
    "18201004": 1802,
    "18201011": 1802,
    "18201028": 1802,
    "18220008": 1803,
    "18220019": 1803,
    "18129004": 1801,
    "18129009": 1801,
    "18329051": 1813,
    "18329062": 1813,
    "18329014": 1813,
    "18329044": 1813,
    "18o22001": 1822,
    "18o22010": 1822,
    "18o22020": 1822
    }

In [9]:
h19meta['PatientId'] = h19meta['ABF File'].map(idmap_h19)
h18meta['PatientId'] = h18meta['ABF File'].map(idmap_h18)

In [10]:
print(h19meta.columns)
print(h18meta.columns)
print(m19meta.columns)
print(m18meta.columns)

Index(['ABF File', 'Data Type', 'Exp. Date', 'Cell #', 'Cell Layer',
       'Stim Type', 'Threshold', 'DC', 'ZD status', 'Hold', 'Gain', 'Offset',
       'Response Channel', 'Command Channel', 'RMP', 'stimulus_type',
       'PatientId'],
      dtype='object')
Index(['ABF File', 'Data Type', 'Exp. Date', 'Case #', 'Cell #', 'Cell Layer',
       'Stim Type', 'Threshold', 'Gain Value', 'ZD status', 'Hold',
       'Unnamed: 12', 'Gain', 'Offset', 'Response Channel', 'Command Channel',
       'RMP', 'Stimulus type', 'PatientId'],
      dtype='object')
Index(['Cell Name', 'Data Type', 'Exp. Date', 'Cell #', 'Cell Layer',
       'Stim Type', 'ZD status', 'stimulus_type', 'Gain', 'Offset',
       'Response Channel', 'Command Channel', 'RMP'],
      dtype='object')
Index(['Cell Name', 'Data Type', 'Exp. Date', 'Cell #', 'Cell Layer',
       'Stim Type', 'ZD status', 'stimulus_type', 'Gain', 'Offset',
       'Response Channel', 'Command Channel', 'RMP'],
      dtype='object')


In [11]:
cols_to_select = ['ABF File', 'Data Type', 'Exp. Date', 'Cell #', 'Cell Layer', 'ZD status', 'RMP']
hcols_to_select = cols_to_select + ['PatientId']

In [12]:
dem19.loc[:, demog_cols].shape

(14, 12)

In [13]:
h19meta.loc[:, hcols_to_select].shape

(56, 8)

In [14]:
# merge demogs into metadata
h19meta = h19meta.loc[:, hcols_to_select].merge(dem19.loc[:, demog_cols], on='PatientId')
# merge in solutions info
h19meta = h19meta.merge(solutions_df, left_on='ABF File', right_on='file_id').drop('file_id', axis=1)

In [15]:
h18meta.shape

(22, 19)

In [16]:
dem18.loc[:, demog_cols].shape

(26, 12)

In [17]:
# this should merge essential metadata for h18 (should only really add solutions used for each recording)
h18meta = h18meta.loc[:, hcols_to_select].merge(dem18.loc[:, demog_cols], on='PatientId')
# merge in solutions info
h18meta = h18meta.merge(solutions_df, left_on='ABF File', right_on='file_id').drop('file_id', axis=1)

In [18]:
h18meta.rename(columns={'PatientId': 'subject_id', 
                      'Sex': 'sex',
                      'Cell Layer': 'layer_name',
                      'Age At OR ' : 'age',
                      'Years of Seizure history': 'seizure_duration_years', 
                      #'Diagnosis': 'diagnosis',
                      'Antiepileptic Drugs ': 'drugs',
                      'Exp. Date': 'expt_date'}, inplace=True)

In [19]:
h19meta.rename(columns={'PatientId': 'subject_id', 
                      'Sex': 'sex',
                      'Cell Layer': 'layer_name',
                      'Age At OR ' : 'age',
                      'Years of Seizure history': 'seizure_duration_years', 
                      #'Diagnosis': 'diagnosis',
                      'Antiepileptic Drugs ': 'drugs',
                      'Exp. Date': 'expt_date'}, inplace=True)

## Natcom metadata cleaning

In [20]:
nat_com_meta_url = 'https://raw.githubusercontent.com/stripathy/valiante_lab_abf_process/master/output_files/cell_final_raw_meta_df.csv'
nat_com_meta = pd.read_csv(nat_com_meta_url, index_col=0)
nat_com_meta['cell_id'] = nat_com_meta.cell_id.apply(lambda x: x.rstrip('.abf'))

In [21]:
nat_com_meta.columns

Index(['cell_id', 'expt_date', 'layer_name', 'cell_type', 'recorder_name',
       'full_path', 'abf_tag', 'abf_version', 'file_time', 'num_sweeps',
       'resp_chan', 'resp_gain', 'resp_offset', 'resp_sampling_rate',
       'rmp_error', 'rmp_val', 'stim_amp_vec', 'stim_chan', 'stim_duration',
       'stim_end_time', 'stim_gain', 'stim_name', 'stim_path',
       'stim_sampling_rate', 'stim_start_time', 'valid_stim'],
      dtype='object')

In [22]:
nat_com_meta.loc[:, ['cell_id', 'expt_date', 'layer_name', 'cell_type', 'rmp_val']]

Unnamed: 0,cell_id,expt_date,layer_name,cell_type,rmp_val
0,13n05011,2013-11-05,L5,Pyr,-54.937742
1,13n21007,2013-11-21,L5,Pyr,-59.158322
2,13d02004,2013-12-02,L5,Pyr,-59.069821
3,13d02022,2013-12-02,L5,Pyr,-64.428708
4,13d02049,2013-12-02,L5,Pyr,-57.000000
...,...,...,...,...,...
207,2020_01_27_0042,2020-01-27,L5,Pyr,-60.700000
210,2020_01_28_0029,2020-01-28,L5,Pyr,-68.000000
208,2020_01_28_0008,2020-01-28,L5,Pyr,-65.000000
209,2020_01_28_0017,2020-01-28,L5,Pyr,-55.000000


In [23]:
# extract demographics data from Shreejoys repo
demog_url = 'https://raw.githubusercontent.com/stripathy/valiante_ih/master/summary_tables/cells_w_demographic_data.csv'
demog_df = pd.read_csv(demog_url, index_col=0)
demog_df['cell_id'] = demog_df.cell_id.apply(lambda x: x.rstrip('.abf'))

In [24]:
demog_df.shape

(160, 45)

In [25]:
demog_df.head()

Unnamed: 0,cell_id,layer_name,cell_type,recorder_name,subject_id,acquisition_time,voltage_drift,has_morphology,tag_comments,resection_date,...,tau,rheo,apthr,peak_t,fast_trough_t,trough_t,res_center_freq,res_3dB_freq,res_sharpness,has_resonance
1,13n05011,L5,Pyr,Homeira,X2013.11.05,2013-11-05 17:06:16.448,0.158689,False,,2013-11-05,...,22.871723,50.000001,-36.022949,0.2275,0.2319,0.2625,,,,
2,13n21007,L5,Pyr,Homeira,X2013.11.21,2013-11-21 14:30:46.356,3.677365,False,,2013-11-21,...,11.569936,50.000001,-43.212891,0.1442,0.1462,0.1582,,,,
3,13d02004,L5,Pyr,Homeira,X2013.12.02,2013-12-02 15:47:42.278,2.288816,False,,2013-12-02,...,20.580639,50.000001,-52.404785,0.1777,0.1799,0.209,,,,
4,13d02022,L5,Pyr,Homeira,X2013.12.02,2013-12-02 17:09:07.433,1.049802,False,,2013-12-02,...,14.647232,150.000002,-49.554443,0.1725,0.1745,0.1744,,,,
5,13d02049,L5,Pyr,Homeira,X2013.12.02,2013-12-02 18:21:58.566,1.789062,False,gain50 resting -57mv,2013-12-02,...,21.951789,50.000001,-52.264404,0.1775,0.1798,0.201,,,,


In [26]:
demog_df.columns

Index(['cell_id', 'layer_name', 'cell_type', 'recorder_name', 'subject_id',
       'acquisition_time', 'voltage_drift', 'has_morphology', 'tag_comments',
       'resection_date', 'age', 'sex', 'seizure_duration', 'unique_subject',
       'diagnosis', 'resection_location', 'drugs', 'Diagnosis_long',
       'Resection_location_long', 'has_burst', 'rin', 'rmp', 'apamp', 'ahpamp',
       'aphw', 'apvel', 'sagamp', 'adratio', 'first_isi', 'avgisi', 'cvisi',
       'sag', 'fislope', 'latency', 'avg_rate', 'tau', 'rheo', 'apthr',
       'peak_t', 'fast_trough_t', 'trough_t', 'res_center_freq',
       'res_3dB_freq', 'res_sharpness', 'has_resonance'],
      dtype='object')

In [27]:
demog_df = demog_df.loc[:, ['cell_id', 'layer_name', 'cell_type', 'recorder_name', 'subject_id',
       'acquisition_time', 'voltage_drift', 'has_morphology', 'tag_comments',
       'resection_date', 'age', 'sex', 'seizure_duration', 'unique_subject',
       'diagnosis', 'resection_location', 'drugs', 'Diagnosis_long',
       'Resection_location_long']]

In [28]:
demog_df.shape

(160, 19)

In [29]:
natcom_merged = nat_com_meta.loc[:, ['cell_id', 'expt_date', 'rmp_val']].merge(demog_df, on='cell_id')

In [30]:
natcom_merged['Data Type'] = 'Human'
natcom_merged['Cell #'] = 1
natcom_merged['ZD status'] = 'n.a'

natcom_merged.rename(columns={'rmp_val': 'RMP', 
                              'seizure_duration': 'seizure_duration_years',
                              'cell_id': 'ABF File'}, inplace=True)

# adds cols for the natcoms data. solutions were regular aCSF and potassium gluconate 
natcom_merged['external_soln'] = 'aCSF'
natcom_merged['internal_soln'] = 'Potassium gluconate'

In [31]:
natcom_merged.shape

(160, 26)

In [32]:
cols_to_select = ['ABF File', 'Data Type', 'expt_date', 'layer_name', 'RMP', 'subject_id','sex', 
                  'age', 'seizure_duration_years', 'diagnosis', 'resection_location', 'drugs', 'Cell #', 
                  'ZD status', 'internal_soln', 'external_soln']

In [33]:
natcom_merged.loc[:, cols_to_select]

Unnamed: 0,ABF File,Data Type,expt_date,layer_name,RMP,subject_id,sex,age,seizure_duration_years,diagnosis,resection_location,drugs,Cell #,ZD status,internal_soln,external_soln
0,13n05011,Human,2013-11-05,L5,-54.937742,X2013.11.05,M,57,45,Epilepsy,Right-ATL,"LSC, CZP, CBZ",1,n.a,Potassium gluconate,aCSF
1,13n21007,Human,2013-11-21,L5,-59.158322,X2013.11.21,M,27,11,Epilepsy,Right-ATL,"LSC, LRZ, CLB",1,n.a,Potassium gluconate,aCSF
2,13d02004,Human,2013-12-02,L5,-59.069821,X2013.12.02,F,39,11,Tumor,Right-ATL,"LSC, LRZ, LEV",1,n.a,Potassium gluconate,aCSF
3,13d02022,Human,2013-12-02,L5,-64.428708,X2013.12.02,F,39,11,Tumor,Right-ATL,"LSC, LRZ, LEV",1,n.a,Potassium gluconate,aCSF
4,13d02049,Human,2013-12-02,L5,-57.000000,X2013.12.02,F,39,11,Tumor,Right-ATL,"LSC, LRZ, LEV",1,n.a,Potassium gluconate,aCSF
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155,2020_01_27_0042,Human,2020-01-27,L5,-60.700000,X2020.01.27,M,24,4,Epilepsy,Left-ATL,"LEV, LTG",1,n.a,Potassium gluconate,aCSF
156,2020_01_28_0029,Human,2020-01-28,L5,-68.000000,X2020.01.28,M,33,6,Epilepsy,Right-ATL,"LSc,PGB,PHN",1,n.a,Potassium gluconate,aCSF
157,2020_01_28_0008,Human,2020-01-28,L5,-65.000000,X2020.01.28,M,33,6,Epilepsy,Right-ATL,"LSc,PGB,PHN",1,n.a,Potassium gluconate,aCSF
158,2020_01_28_0017,Human,2020-01-28,L5,-55.000000,X2020.01.28,M,33,6,Epilepsy,Right-ATL,"LSc,PGB,PHN",1,n.a,Potassium gluconate,aCSF


In [34]:
h19meta.shape

(56, 21)

In [35]:
# there are 20 files overlapping from h19 and natcom
h19meta[h19meta['ABF File'].isin(natcom_merged['ABF File'])].shape

(20, 21)

In [36]:
# keep only h19 data that is not represented in natcoms data
h19meta = h19meta[~h19meta['ABF File'].isin(natcom_merged['ABF File'])]

In [37]:
h19meta.loc[:, cols_to_select].shape

(36, 16)

In [38]:
h18meta.loc[:, cols_to_select].shape

(18, 16)

In [39]:
human_meta = pd.concat([natcom_merged.loc[:, cols_to_select], h19meta.loc[:, cols_to_select], h18meta.loc[:, cols_to_select]])

In [40]:
layers = {'L5': 'L5', 'L23': 'L23', 'L2.3': 'L23', 'L3C': 'L3C', 'L3c': 'L3C', 'L2': 'L23', 'L3': 'L23', 'L23-Int': 'L23', 'L5-Int': 'L5'}
human_meta['aggregated_cell_layer'] = human_meta.layer_name.map(layers)

In [41]:
human_meta = pd.concat([human_meta, pd.get_dummies(human_meta.diagnosis, prefix='diagnosis', dtype=bool)], axis=1)#.drop('diagnosis', axis=1)

In [42]:
human_meta.sex = human_meta.sex.str.strip(" ")

# Process mouse meta

In [43]:
m19meta = pd.read_csv('./data/raw/Metadata/Mouse19_Metadata.csv', index_col=0)
m19meta = m19meta[m19meta.stimulus_type == 'longsquare']
m19meta['nwb'] = m19meta['Cell Name'].apply(lambda x: x + '.nwb')

m18meta = pd.read_csv('./data/raw/Metadata/Mouse18_Metadata.csv', index_col=0)
m18meta = m18meta[m18meta.stimulus_type == 'longsquare']
m18meta['nwb'] = m18meta['Cell Name'].apply(lambda x: x + '.nwb')

In [44]:
print(human_meta.shape)
print(m18meta.shape)
print(m19meta.shape)

(214, 19)
(5, 14)
(16, 14)


In [45]:
mcols_fix = {'Cell Layer': 'aggregated_cell_layer', 
             'Cell Name': 'ABF File', 
             'Exp. Date': 'expt_date'}

m18meta.rename(columns=mcols_fix, inplace=True)
m19meta.rename(columns=mcols_fix, inplace=True)

In [46]:
m18meta = m18meta.merge(solutions_df, left_on='ABF File', right_on='file_id', how='left').drop('file_id', axis=1)
m19meta = m19meta.merge(solutions_df, left_on='ABF File', right_on='file_id', how='left').drop('file_id', axis=1)

In [47]:
m18meta = m18meta.drop(['Stim Type', 'stimulus_type', 'Gain', 'Offset', 'Response Channel', 'Command Channel', 'nwb'], axis=1)
m19meta = m19meta.drop(['Stim Type', 'stimulus_type', 'Gain', 'Offset', 'Response Channel', 'Command Channel', 'nwb'], axis=1)

In [48]:
all_meta = pd.concat([human_meta, m18meta, m19meta])

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  all_meta = pd.concat([human_meta, m18meta, m19meta])


# Add epileptogenic col and data about slice preparation solutions

# tissue resected from the following patients was identified as from an epileptogenic focus 
epileptogenic_ids_natcom = ['X2014.01.27', 'X2015.06.22', 'X2015.07.23']
epileptogenic_ids_2018 = ['1806', '1810', '1811', '1812']
epileptogenic_ids_2019 = ['1904', '1908']

epileptogenic_ids = epileptogenic_ids_natcom + epileptogenic_ids_2018 + epileptogenic_ids_2019
all_meta['is_epileptogenic'] = all_meta['ABF File'].isin(epileptogenic_ids)

In [49]:
# recordings w/ the following ids were identified as from an epileptogenic focus (from natcoms data)
epileptogenic_ids = ['20140127_600_1_0118', '20140127_600_1_0158', '15622000',
                     '15622005', '15622013', '15622015', '15622019', '15723013']

all_meta['is_epileptogenic'] = all_meta['ABF File'].isin(epileptogenic_ids)

In [50]:
# tissue resected from the following patients was identified as from an epileptogenic focus 
epileptogenic_subject_ids = [1806, 1810, 1811, 1812, 1904, 1908]
all_meta.loc[all_meta.subject_id.isin(epileptogenic_subject_ids), 'is_epileptogenic'] = True
all_meta[all_meta.subject_id.isin(epileptogenic_subject_ids)]

Unnamed: 0,ABF File,Cell #,Data Type,RMP,ZD status,age,aggregated_cell_layer,diagnosis,diagnosis_Epilepsy,diagnosis_Tumor,drugs,expt_date,external_soln,internal_soln,layer_name,resection_location,seizure_duration_years,sex,subject_id,is_epileptogenic
42,19320022,C5,Human,-75.9,n.a,26.0,L23,Epilepsy,True,False,"ESL, TMP, CLB",March_20_ 2019,Synaptic Blockers,Potassium gluconate,L23,Right-ATL,9,F,1908,True


In [51]:
# ids for recordings with morphologies on neuromorpho
morpho_ids = ['19122043', '19129004', '19320022', '2020_01_06_0090']
all_meta[all_meta['ABF File'].isin(morpho_ids)]

Unnamed: 0,ABF File,Cell #,Data Type,RMP,ZD status,age,aggregated_cell_layer,diagnosis,diagnosis_Epilepsy,diagnosis_Tumor,drugs,expt_date,external_soln,internal_soln,layer_name,resection_location,seizure_duration_years,sex,subject_id,is_epileptogenic
125,19129004,1,Human,-62.9,n.a,52.0,L3C,Epilepsy,True,False,"LTG, LEF",2019-01-29,aCSF,Potassium gluconate,L3c,Left-ATL,6,F,X2019.01.29,False
147,2020_01_06_0090,1,Human,-65.2,n.a,42.0,L3C,Epilepsy,True,False,"CBZ, GPN",2020-01-06,aCSF,Potassium gluconate,L3c,Right-ATL,3,F,X2020.01.06,False
42,19320022,C5,Human,-75.9,n.a,26.0,L23,Epilepsy,True,False,"ESL, TMP, CLB",March_20_ 2019,Synaptic Blockers,Potassium gluconate,L23,Right-ATL,9,F,1908,True


In [52]:
cells_w_morphology = ['19122003', # smaller L2 morphlogy, first on left
                       '2017_03_20_0026', # synaptic blockers?
                       '19320017', # small L2 cell, not currently highlighted in paper
                       '19122043', # cell currently labelled as L2 350 um
                       '19129004', # large L3 morphology
                       '19320022', # L3c cell, synaptic blockers
                       '19219004', # L3c cell, will be added to paper, synaptic blockers
                       '19219021', # L3c cell, will be added to paper, synaptic blockers
                       '19128061', # big L5 morphology
                       '19129015', # smaller L5 morphology
                       '2020_01_06_0048', # L3c cell
                       '2019_11_28_0010'] 

In [53]:
cells_w_neuromorphoID = ['19122003', # NMO_159995
                         '19122043', # NMO_159992
                         '19129004', # NMO_159994
                         #'19219021', #
                         '19128061', # NMO_159993
                         '19129015', # NMO_159989
                         '2020_01_06_0048', # NMO_159990
                         '2019_11_28_0010'] #NMO_159991
#'19320017', # apparently have this morphology but not uploaded
#'2017_03_20_0026', #
#'19320022', #

In [54]:
cells_to_upload = ['19320017',
                   '2017_03_20_0026',
                   '19320022',
                   '19219021']

In [55]:
import numpy as np

In [56]:
np.sort(all_meta[all_meta['ABF File'].isin(cells_w_neuromorphoID)].loc[:, 'ABF File'].values)

array(['19122003', '19128061', '19129004', '19129015', '2019_11_28_0010',
       '2020_01_06_0048'], dtype=object)

In [57]:
np.sort(all_meta[all_meta['ABF File'].isin(cells_w_morphology)].loc[:, 'ABF File'].values)

array(['19122003', '19128061', '19129004', '19129015', '19320017',
       '19320022', '2019_11_28_0010', '2020_01_06_0048'], dtype=object)

In [58]:
all_meta['has_morphology'] = all_meta['ABF File'].isin(cells_w_morphology)

In [59]:
neuromorpho_id_map = {
    '19122003': 'NMO_159995', 
    '19128061': 'NMO_159993',
    '19129015': 'NMO_159989',
    '19129004': 'NMO_159994',
    '19320017': '',
    '2020_01_06_0048': '',
    '19320022': ''
}

neuromorpho_url_map = {
    '19122003': '', 
    '19128061': '',
    '19129015': '',
    '19129004': '',
    '19320017': '',
    '2020_01_06_0048': '',
    '19320022': ''
}

In [60]:
all_meta[all_meta.has_morphology]['ABF File'].values

array(['19122003', '19128061', '19129015', '19129004', '19320017',
       '2020_01_06_0048', '2019_11_28_0010', '19320022'], dtype=object)

In [61]:
cutting_solutions = pd.read_csv('./data/raw/cutting_solution.csv')

In [62]:
cutting_solutions.recording_id = cutting_solutions.recording_id.str.rstrip('.nwb')

In [63]:
cutting_solutions

Unnamed: 0,recording_id,cutting_solution
0,19128006,Sucrose
1,19128040,Sucrose
2,19129043,Sucrose
3,19129058,Sucrose
4,19129072,Sucrose
...,...,...
136,19320024,Sucrose
137,19129047,Sucrose
138,2016_03_01_0000,Sucrose
139,15o08002,Sucrose


In [64]:
all_meta = all_meta.merge(cutting_solutions, left_on='ABF File', right_on='recording_id', how='left').drop('recording_id', axis=1)

In [65]:
all_meta['sex'] = all_meta.sex.str.strip(' ')

In [66]:
meta_output_path = Path('./data/processed/meta/')
meta_output_path.mkdir(exist_ok=True, parents=True)
human_meta.to_csv(meta_output_path / 'human_meta_cleaned.csv', index=None)
all_meta.to_csv(meta_output_path / 'interim-all_meta_cleaned.csv', index=None)