In [1]:
import os
import sqlite3
import pandas as pd

In [2]:
dataset_directory = 'data/hc-3'
docs_directory = os.path.join(dataset_directory,'docs')

#### Loading

In [3]:
original_directory = os.path.join(docs_directory,'hc3-original-docs')
original_db_filepath = os.path.join(original_directory,'hc3.db')
original_db_connection = sqlite3.connect(original_db_filepath)
original_db_dataframe = pd.read_sql_query("SELECT * FROM sqlite_master", original_db_connection)
original_db = {}

for table_name in original_db_dataframe['tbl_name']:
    df = pd.read_sql_query('SELECT * from %s' % table_name, original_db_connection)
    df = df.fillna(value='')
    df = df.replace('""','')
    df = df.replace('None','')
    
    original_db[table_name] = df

original_db_connection.close()

#### Binding

##### Behavior

In [4]:
original_db['hc3_mpg'].columns = ['id_mpg','topdir','session']
original_db['animal'].columns = ['id_animal','animal']
original_db['hc3_session'].columns = ['id_session', 'topdir', 'session', 'session_size']
original_db['video_files'].columns = ['topdir', 'session', 'video_file', 'video_size']
original_db['map_cell_counts'].columns = ['topdir','mc_ncells']
original_db['clu_cell_counts'].columns = ['topdir','nshanks','cc_ncells']

In [5]:
original_db['elepos']['elepos'] = [{e+1:entry for e,entry in enumerate(values) if entry != ''} for values in original_db['elepos'].iloc[:,4:].values]
original_db['elepos'].drop(original_db['elepos'].columns[4:-1],axis=1,inplace=True)

In [6]:
df = original_db['beh'].copy()

df = df.merge(original_db['video_files'],how='left',on=['topdir','session'])
df = df.merge(original_db['hc3_session'],how='left',on=['topdir','session'])
df = df.merge(original_db['hc3_topdir'],how='left',on=['topdir'])
df = df.merge(original_db['animal'],how='left',on=['animal'])
df = df.merge(original_db['hc3_mpg'],how='left',on=['topdir','session'])
df = df.merge(original_db['map_cell_counts'],how='left',on=['topdir'])
df = df.merge(original_db['clu_cell_counts'],how='left',on=['topdir'])
df = df.merge(original_db['elepos'],how='left',on=['topdir','mergen','unit'])

doc_behavior = df.dropna(subset=['id_session'])

##### Electrophysiology

In [7]:
region_dict = {a:b for a,b in original_db['region_name'].iloc}
original_db['region']['region'] = original_db['region']['region_id'].map(region_dict)

In [8]:
original_db['iCell'].columns = ['id','pyramidal','interneuron','nor_pyr_int','pyramidal_ccg','interneuron_ccg']

In [9]:
df = original_db['pmMap'].copy()

df = df.merge(original_db['region_pm1943'],on=['id'])
df = df.merge(original_db['region_elepos'],on=['id'])
df = df.merge(original_db['region'],on=['id'])
df = df.merge(original_db['clean'],on=['id'])
df = df.merge(original_db['iCell'],on=['id'])

df['to_remove'] = 0
for fileid, ele, clu in original_db['to_remove'].iloc:
    id1 = df['FileID'].values == fileid 
    id2 = df['ele'] == ele
    id3 = df['clu'] == clu
    df.loc[id1 & id2.values & id3.values,'to_remove'] = 1

df = df.merge(original_db['elepos'][['topdir','mergen','fileid']],how='left',left_on=['FileID'],right_on=['fileid'])
df = df[df['mergen'].isin(doc_behavior['mergen'])]
df = df[df['good'] == 1]

doc_ephys = df

In [16]:
doc_ephys.reset_index(drop=True,inplace=True)

In [25]:
doc_ephys_animals = []
doc_ephys_sessions = []
doc_ephys_behaviors = []
for entry in doc_ephys.iloc:
    id_session = doc_behavior['mergen'] == entry['mergen']
    behaviors = doc_behavior[id_session]['behavior'].values
    sessions = doc_behavior[id_session]['session'].values
    animal = doc_behavior[id_session]['animal'].unique()[0]
    
    doc_ephys_animals.append(animal)
    doc_ephys_sessions.append(sessions)
    doc_ephys_behaviors.append(behaviors)

doc_ephys['behaviors'] = doc_ephys_behaviors
doc_ephys['sessions'] = doc_ephys_sessions
doc_ephys['animal'] = doc_ephys_animals

In [26]:
%store doc_behavior
%store doc_ephys

Stored 'doc_behavior' (DataFrame)
Stored 'doc_ephys' (DataFrame)
