In [3]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2
    
from tqdm import tqdm
import os
import platform
print(platform.python_version())
import sys
import matplotlib.pyplot as plt
import numpy as np
import json
import pandas as pd
from collections import OrderedDict
from datetime import date
import zarr

from phathom import io
from bmtrap.util import *
from bmtrap.preprocessing import BMPreprocessing as BMPrep
bmp = BMPrep()

3.7.11


In [100]:
dpath = '/media/share5/mykim/Re-3L_R3'
cc_prefix = 'count_cp_l1-7_all'

In [102]:
# load csv
fname = os.path.join(dpath, '%s.csv'%cc_prefix)
fname_converted = os.path.join(dpath, '%s_converted.csv'%cc_prefix)
df = pd.read_csv(fname)
df.head()

Unnamed: 0,id,region,count,area,density
0,2,Cerebrum,126116,8830913,14.281196
1,639,Brain stem,32390,3844511,8.424999
2,1014,Cerebellum,33104,1725083,19.189801
3,1102,cranial nerves,1109,395755,2.802239
4,1171,cerebellum related fiber tracts,2946,314232,9.375239


In [103]:
df = df[['id', 'count', 'density', 'region']]
df.head()

Unnamed: 0,id,count,density,region
0,2,126116,14.281196,Cerebrum
1,639,32390,8.424999,Brain stem
2,1014,33104,19.189801,Cerebellum
3,1102,1109,2.802239,cranial nerves
4,1171,2946,9.375239,cerebellum related fiber tracts


In [104]:
align_df = pd.read_csv('/media/share5/MYK/ATLAS/mouse/AllBrainRegions.csv')
align_df.head()

Unnamed: 0,id,name,acronym,parent_structure_id,depth
0,0,root,root,-1,0
1,1,Basic cell groups and regions,grey,0,1
2,2,Cerebrum,CH,1,2
3,3,Cerebral cortex,CTX,2,3
4,4,Cortical plate,CTXpl,3,4


In [105]:
a = align_df[align_df['acronym']=='SCdg']
a

Unnamed: 0,id,name,acronym,parent_structure_id,depth
831,831,"Superior colliculus, motor related, deep gray ...",SCdg,830,6


### Start Converting
#### 1. Get a copy of dataframe

In [106]:
df_new = df.copy()
df_new.head()

Unnamed: 0,id,count,density,region
0,2,126116,14.281196,Cerebrum
1,639,32390,8.424999,Brain stem
2,1014,33104,19.189801,Cerebellum
3,1102,1109,2.802239,cranial nerves
4,1171,2946,9.375239,cerebellum related fiber tracts


In [107]:
df_new = df_new.rename(columns={"count": "counts", "region": "name"})
df_new.head()

Unnamed: 0,id,counts,density,name
0,2,126116,14.281196,Cerebrum
1,639,32390,8.424999,Brain stem
2,1014,33104,19.189801,Cerebellum
3,1102,1109,2.802239,cranial nerves
4,1171,2946,9.375239,cerebellum related fiber tracts


#### 2. Get acronyms from names

In [108]:
name_list = df_new['name'].to_list()
def get_acronym(name):
    ac = align_df[align_df['name']==name].iloc[0]['acronym']
    return ac

acronym_list = []
for item in name_list:
    acr = get_acronym(item)
    acronym_list.append(acr)

In [109]:
# add to the dataframe
df_new['acronym'] = acronym_list
df_new.head()

Unnamed: 0,id,counts,density,name,acronym
0,2,126116,14.281196,Cerebrum,CH
1,639,32390,8.424999,Brain stem,BS
2,1014,33104,19.189801,Cerebellum,CB
3,1102,1109,2.802239,cranial nerves,cm
4,1171,2946,9.375239,cerebellum related fiber tracts,cbf


#### 3. Add columns for parent regions

In [110]:
df_new['d7'] = ""
df_new['d6'] = ""
df_new['d5'] = ""
df_new['d4'] = ""
df_new['d3'] = ""
df_new['d2'] = ""
df_new['d1'] = ""
df_new.head()

Unnamed: 0,id,counts,density,name,acronym,d7,d6,d5,d4,d3,d2,d1
0,2,126116,14.281196,Cerebrum,CH,,,,,,,
1,639,32390,8.424999,Brain stem,BS,,,,,,,
2,1014,33104,19.189801,Cerebellum,CB,,,,,,,
3,1102,1109,2.802239,cranial nerves,cm,,,,,,,
4,1171,2946,9.375239,cerebellum related fiber tracts,cbf,,,,,,,


#### 4. Fill in the parent regions

In [111]:
id_list = df_new['id'].to_list()
id_list[:10]

[2, 639, 1014, 1102, 1171, 1189, 1190, 1218, 1238, 1293]

In [112]:
def get_name_by_id(a_df, rid):
    item = a_df[a_df['id']==rid]
    return item.iloc[0]['name']

def trace_back(a_df, rid, traces=[]):
    item = a_df[a_df['id']==rid]
    name = item.iloc[0]['name']
    acronym = item.iloc[0]['acronym']
    pid = item.iloc[0]['parent_structure_id']
    traces.append(name)
    #print('name: ', name, ', acronym: ', acronym, ', pid:', pid)
    if pid in [-1, 0, 1]:
        return traces
    else: 
        trace_back(a_df, pid, traces)
        
def fill_all_depth(l, maxlen=7):
    ncopy = maxlen - len(l)
    l_filled = [l[0]]*ncopy + l
    return l_filled

In [113]:
# loop through rows
for index, row in df_new.iterrows():
    rid = row['id']
    rname = row['name']
    pl = []
    trace_back(align_df, rid, pl)
    pl2 = fill_all_depth(pl)
    
    df_new.at[index, 'd7'] = pl2[0]
    df_new.at[index, 'd6'] = pl2[1]
    df_new.at[index, 'd5'] = pl2[2]
    df_new.at[index, 'd4'] = pl2[3]
    df_new.at[index, 'd3'] = pl2[4]
    df_new.at[index, 'd2'] = pl2[5]
    df_new.at[index, 'd1'] = pl2[6]
    
df_filtered = df_new[df_new['density'] > 0.0] 
print(df_filtered.shape)
df_filtered.head()

(743, 12)


Unnamed: 0,id,counts,density,name,acronym,d7,d6,d5,d4,d3,d2,d1
0,2,126116,14.281196,Cerebrum,CH,Cerebrum,Cerebrum,Cerebrum,Cerebrum,Cerebrum,Cerebrum,Cerebrum
1,639,32390,8.424999,Brain stem,BS,Brain stem,Brain stem,Brain stem,Brain stem,Brain stem,Brain stem,Brain stem
2,1014,33104,19.189801,Cerebellum,CB,Cerebellum,Cerebellum,Cerebellum,Cerebellum,Cerebellum,Cerebellum,Cerebellum
3,1102,1109,2.802239,cranial nerves,cm,cranial nerves,cranial nerves,cranial nerves,cranial nerves,cranial nerves,cranial nerves,fiber tracts
4,1171,2946,9.375239,cerebellum related fiber tracts,cbf,cerebellum related fiber tracts,cerebellum related fiber tracts,cerebellum related fiber tracts,cerebellum related fiber tracts,cerebellum related fiber tracts,cerebellum related fiber tracts,fiber tracts


In [114]:
# sort and save
df_filtered_sorted = df_filtered.sort_values(by='id') 
df_filtered_sorted.to_csv(fname_converted, sep=',', index=False)
df_filtered_sorted.head()

Unnamed: 0,id,counts,density,name,acronym,d7,d6,d5,d4,d3,d2,d1
0,2,126116,14.281196,Cerebrum,CH,Cerebrum,Cerebrum,Cerebrum,Cerebrum,Cerebrum,Cerebrum,Cerebrum
18,3,122100,17.219117,Cerebral cortex,CTX,Cerebral cortex,Cerebral cortex,Cerebral cortex,Cerebral cortex,Cerebral cortex,Cerebral cortex,Cerebrum
79,4,120779,17.746012,Cortical plate,CTXpl,Cortical plate,Cortical plate,Cortical plate,Cortical plate,Cortical plate,Cerebral cortex,Cerebrum
188,5,98602,24.982226,Isocortex,Isocortex,Isocortex,Isocortex,Isocortex,Isocortex,Cortical plate,Cerebral cortex,Cerebrum
318,6,129,4.146443,"Frontal pole, cerebral cortex",FRP,"Frontal pole, cerebral cortex","Frontal pole, cerebral cortex","Frontal pole, cerebral cortex",Isocortex,Cortical plate,Cerebral cortex,Cerebrum


In [115]:
fname_converted

'/media/share7/Susumu_Tonegawa_datasets/LCT_datasets_batch_4_11-9-20/Tonegawa_R3-Re-3L_Autofluor-tdTomato/output_l1-7_all_converted.csv'