# 5.1 Generate Overview of Embryos
Working on cell type by developmental stage downsampled view of the data. I'll downsample the data from each embryo into cell types. Each embryo has the same developmental stage.

In [1]:
from clustergrammer2 import net
df = {}

>> clustergrammer2 backend version 0.4.2


In [2]:
import pandas as pd
from glob import glob
import os
from copy import deepcopy

In [3]:
def add_cats_from_meta(barcodes, df_meta, add_cat_list):
    '''
    Add categories from df_meta.
    '''

    # get metadata of interest (add_cat_list) from barcodes of interest
    df_cats = df_meta.loc[barcodes][add_cat_list]

    # get list of cats
    list_cat_ini = [list(x) for x in df_cats.values]

    # add titles to cats
    list_cat_titles = [ list([str(x) + ': ' + str(y) for x,y in zip(add_cat_list, a)]) for a in list_cat_ini]

    # add barcodes to new columns
    new_cols = [tuple([x] + y) for x,y in zip(barcodes, list_cat_titles)]

    return new_cols

### Cell Type Distributions

In [4]:
list_cell_types = []
meta_list = []
new_samples = sorted(glob('../data/big_data/cao_2million-cell_2019_61-embryo_parquet_files_binder/*'))
for inst_sample in new_samples:

    df_meta = pd.read_parquet(inst_sample + '/meta_cell.parquet')
    meta_list.append(df_meta)
    
    list_cell_types.extend(list(df_meta['Main_cell_type'].get_values()))
    
print(len(list_cell_types))

1386587


In [5]:
ser_cell_types = pd.Series(list_cell_types)
all_cell_types = ser_cell_types.value_counts().index.tolist()

print('there are', len(all_cell_types), 'cell types')
ser_cell_types.value_counts()

there are 38 cell types


Chondrocytes & osteoblasts       104698
Connective tissue progenitors     98964
Intermediate Mesoderm             89518
Jaw and tooth progenitors         82289
Early mesenchyme                  71949
Excitatory neurons                68567
Epithelial cells                  66209
Radial glia                       65428
Neural progenitor cells           58332
Postmitotic premature neurons     56033
Oligodendrocyte Progenitors       54606
Isthmic organizer cells           48498
Neural Tube                       45985
Inhibitory neurons                44658
Myocytes                          43197
Definitive erythroid lineage      34205
Chondroctye progenitors           33539
Inhibitory neuron progenitors     31214
Premature oligodendrocyte         29538
Limb mesenchyme                   26559
Sensory neurons                   26477
Endothelial cells                 26431
Stromal cells                     23259
Osteoblasts                       23223
Schwann cell precursor            23145


# Main Cell Types Times Developmental Stage

In [6]:
38 * 5

190

In [7]:
df_meta_all = pd.concat(meta_list)
df_meta_all.shape

(1386587, 36)

In [8]:
# for inst_cell_type in all_cell_types:
#     print('\n')
#     inst_df = df_meta_all[df_meta_all['Main_cell_type'] == inst_cell_type]
#     print(inst_cell_type, inst_df.shape)
#     print(inst_df['development_stage'].value_counts())

In [10]:
# inst_df['Main_trajectory'].value_counts()

In [12]:
# df_gex.shape

In [13]:
# df_meta.shape

### Working on adding categories from metadata

In [15]:
# new_cols[0]

In [17]:
# ct_pop

In [None]:
%%time
df_mean_list = []
for inst_sample_path in new_samples:
    df_gex = pd.read_parquet(inst_sample_path + '/gex.parquet')
    df_meta = pd.read_parquet(inst_sample_path + '/meta_cell.parquet')
    inst_sample = inst_sample_path.split('/')[-1]
    
    inst_embryo = 'embryo-' + inst_sample.split('-')[1]
    inst_dev = inst_sample.split('-')[2]
    
    new_cols = add_cats_from_meta(df_gex.columns.tolist(), 
                                  df_meta, 
                                  ['Main_cell_type', 'development_stage'])
    
    ct_pop = pd.Series([x[1] for x in new_cols]).value_counts()
    ct_pop.index = [x.split(': ')[1] for x in ct_pop.index.tolist()]
    
    df_cat = deepcopy(df_gex)
    df_cat.columns = new_cols
    
    df_mi = net.row_tuple_to_multiindex(df_cat.transpose())
    df_mean_ini = df_mi.groupby(level='Main_cell_type').mean().transpose()
    rows = df_mean_ini.index.tolist()
    cols = [(x + '_' + inst_sample.replace('embryo','e'), 
             'Cell Type: ' + x, 
             'Dev Stage: ' + inst_dev, 
             'Embryo: ' + inst_embryo, 
             'Num: ' + str(ct_pop[x])) for x in df_mean_ini.columns.tolist()]
    mat = df_mean_ini.get_values()
    df_mean = pd.DataFrame(index=rows, columns=cols, data=mat)
    print(df_cat.shape, df_mean.shape)
    
    df_mean_list.append(df_mean)

In [19]:
df_merge = pd.concat(df_mean_list, axis=1)
df_merge.shape

(5000, 74)

In [20]:
df_merge.head()

Unnamed: 0,"(Cardiac muscle lineages_e-1-E9.5, Cell Type: Cardiac muscle lineages, Dev Stage: E9.5, Embryo: embryo-1, Num: 263)","(Cholinergic neurons_e-1-E9.5, Cell Type: Cholinergic neurons, Dev Stage: E9.5, Embryo: embryo-1, Num: 76)","(Chondroctye progenitors_e-1-E9.5, Cell Type: Chondroctye progenitors, Dev Stage: E9.5, Embryo: embryo-1, Num: 388)","(Chondrocytes & osteoblasts_e-1-E9.5, Cell Type: Chondrocytes & osteoblasts, Dev Stage: E9.5, Embryo: embryo-1, Num: 8)","(Connective tissue progenitors_e-1-E9.5, Cell Type: Connective tissue progenitors, Dev Stage: E9.5, Embryo: embryo-1, Num: 3)","(Definitive erythroid lineage_e-1-E9.5, Cell Type: Definitive erythroid lineage, Dev Stage: E9.5, Embryo: embryo-1, Num: 6)","(Early mesenchyme_e-1-E9.5, Cell Type: Early mesenchyme, Dev Stage: E9.5, Embryo: embryo-1, Num: 3623)","(Endothelial cells_e-1-E9.5, Cell Type: Endothelial cells, Dev Stage: E9.5, Embryo: embryo-1, Num: 585)","(Ependymal cell_e-1-E9.5, Cell Type: Ependymal cell, Dev Stage: E9.5, Embryo: embryo-1, Num: 41)","(Epithelial cells_e-1-E9.5, Cell Type: Epithelial cells, Dev Stage: E9.5, Embryo: embryo-1, Num: 1319)",...,"(Oligodendrocyte Progenitors_e-10-E11.5, Cell Type: Oligodendrocyte Progenitors, Dev Stage: E11.5, Embryo: embryo-10, Num: 1897)","(Osteoblasts_e-10-E11.5, Cell Type: Osteoblasts, Dev Stage: E11.5, Embryo: embryo-10, Num: 567)","(Postmitotic premature neurons_e-10-E11.5, Cell Type: Postmitotic premature neurons, Dev Stage: E11.5, Embryo: embryo-10, Num: 1842)","(Premature oligodendrocyte_e-10-E11.5, Cell Type: Premature oligodendrocyte, Dev Stage: E11.5, Embryo: embryo-10, Num: 768)","(Primitive erythroid lineage_e-10-E11.5, Cell Type: Primitive erythroid lineage, Dev Stage: E11.5, Embryo: embryo-10, Num: 374)","(Radial glia_e-10-E11.5, Cell Type: Radial glia, Dev Stage: E11.5, Embryo: embryo-10, Num: 2454)","(Schwann cell precursor_e-10-E11.5, Cell Type: Schwann cell precursor, Dev Stage: E11.5, Embryo: embryo-10, Num: 489)","(Sensory neurons_e-10-E11.5, Cell Type: Sensory neurons, Dev Stage: E11.5, Embryo: embryo-10, Num: 635)","(Stromal cells_e-10-E11.5, Cell Type: Stromal cells, Dev Stage: E11.5, Embryo: embryo-10, Num: 594)","(White blood cells_e-10-E11.5, Cell Type: White blood cells, Dev Stage: E11.5, Embryo: embryo-10, Num: 159)"
Gm42418,75.190114,11.723684,10.487113,5.875,7.0,41.666667,8.023185,23.37094,9.292683,12.514784,...,17.114391,107.201058,20.87025,16.928385,65.339572,14.048085,25.486708,19.377953,15.186869,45.955975
Gpc6,3.26616,2.552632,7.389175,11.25,5.0,0.333333,9.173061,0.586325,6.536585,4.319939,...,4.140749,5.289242,1.647666,5.645833,0.07754,3.08802,4.99591,3.702362,1.094276,1.27673
Dcc,1.022814,9.828947,0.533505,0.0,0.0,1.166667,0.412365,0.100855,1.585366,0.292646,...,2.811808,0.597884,17.382193,3.002604,0.088235,5.398533,2.621677,2.155906,0.26431,0.295597
mt-Rnr1,3.460076,1.447368,1.896907,1.375,2.666667,7.833333,1.359647,3.902564,1.634146,2.223654,...,6.488666,28.922399,7.149294,5.330729,19.31016,4.873268,6.99182,4.993701,5.271044,18.603774
mt-Rnr2,3.56654,1.263158,1.543814,1.125,1.0,12.166667,1.211151,3.704274,1.292683,1.756634,...,8.173432,30.368607,8.963084,6.56901,27.459893,5.899756,8.06544,6.32126,6.220539,25.213836


In [21]:
net.load_df(df_merge)
net.filter_N_top(inst_rc='row', N_top=100, rank_type='var')
net.normalize(axis='row', norm_type='zscore')
net.widget()

ExampleWidget(network='{"row_nodes": [{"name": "Gm42418", "ini": 100, "clust": 1, "rank": 20, "rankvar": 33, "…

In [28]:
len(new_cols)

15666

In [29]:
new_cols[0]

('sci3-me-002.AGATCGGATGCGTTGGAGC',
 'Main_cell_type: Early mesenchyme',
 'Main_trajectory: Mesenchymal trajectory',
 'development_stage: 9.5')

In [30]:
df_gex.shape

(5000, 15666)

In [31]:
df_meta.shape

(15666, 36)

In [38]:
ser_traj = pd.Series([x[1] for x in df_cat.columns.tolist()])
cell_types = ser_traj.value_counts().index.tolist()
ser_traj.value_counts()

Main_cell_type: Early mesenchyme                 3623
Main_cell_type: Neural Tube                      2448
Main_cell_type: Intermediate Mesoderm            1560
Main_cell_type: Epithelial cells                 1319
Main_cell_type: Isthmic organizer cells           925
Main_cell_type: Oligodendrocyte Progenitors       784
Main_cell_type: Radial glia                       696
Main_cell_type: Endothelial cells                 585
Main_cell_type: Stromal cells                     527
Main_cell_type: Primitive erythroid lineage       495
Main_cell_type: Neural progenitor cells           439
Main_cell_type: Chondroctye progenitors           388
Main_cell_type: Notochord cells                   370
Main_cell_type: Jaw and tooth progenitors         344
Main_cell_type: Schwann cell precursor            307
Main_cell_type: Cardiac muscle lineages           263
Main_cell_type: Sensory neurons                   142
Main_cell_type: Myocytes                           80
Main_cell_type: Cholinergic 

In [100]:
# for inst_cell_type in cell_types:
#     print(inst_cell_type)
#     ser_traj = pd.Series([x[3] for x in df_cat.columns.tolist() if x[1] == inst_cell_type])
#     print(len(ser_traj.value_counts().index.tolist()))

In [35]:
ser_traj = pd.Series([x[3] for x in df_cat.columns.tolist()])
ser_traj.value_counts()

development_stage: 9.5    15666
dtype: int64

In [82]:
df_cat.shape

(5000, 15666)

In [87]:
df_sig.shape

(248, 36)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Gm42418,Gpc6,Dcc,mt-Rnr1,mt-Rnr2,Hbb-y,Gpc3,Hba-x,Auts2,Slc8a1,...,Cfap20,Ackr3,Srfbp1,Dcp1b,Snrpd1,Zfp606,Pou6f2,Cc2d2a,Spg7,Psma7
Name,Main_cell_type,Main_trajectory,development_stage,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
sci3-me-002.AGATCGGATGCGTTGGAGC,Early mesenchyme,Mesenchymal trajectory,9.5,5,1,0,2,1,0,2,0,3,2,...,0,0,0,0,0,0,0,0,0,0
sci3-me-002.TAATACCAGTGCGTTGGAGC,Radial glia,Neural tube and notochord trajectory,9.5,4,16,1,2,0,1,1,0,1,8,...,0,0,0,0,0,0,1,0,0,0
sci3-me-002.AGATTCAACTGCGTTGGAGC,Oligodendrocyte Progenitors,Neural tube and notochord trajectory,9.5,21,10,2,4,4,0,4,0,3,1,...,0,0,0,0,0,0,0,0,1,0
sci3-me-002.GTCATCTGTAGAGCTATAA,Schwann cell precursor,Neural crest 2,9.5,7,15,10,1,2,0,0,0,11,0,...,0,0,0,0,0,0,0,0,0,0
sci3-me-002.TCAGTTGGTGCGTTGGAGC,Early mesenchyme,Mesenchymal trajectory,9.5,10,3,8,1,1,0,13,0,20,2,...,0,0,0,0,0,0,0,0,0,0


In [98]:
df_mi = net.row_tuple_to_multiindex(df_cat.transpose())
df_mean = df_mi.groupby(level='Main_cell_type').mean().transpose()
df_mean.shape

(5000, 36)

In [99]:
df_mean.head()

Main_cell_type,Cardiac muscle lineages,Cholinergic neurons,Chondroctye progenitors,Chondrocytes & osteoblasts,Connective tissue progenitors,Definitive erythroid lineage,Early mesenchyme,Endothelial cells,Ependymal cell,Epithelial cells,...,Oligodendrocyte Progenitors,Osteoblasts,Postmitotic premature neurons,Premature oligodendrocyte,Primitive erythroid lineage,Radial glia,Schwann cell precursor,Sensory neurons,Stromal cells,White blood cells
Gm42418,75.190114,11.723684,10.487113,5.875,7.0,41.666667,8.023185,23.37094,9.292683,12.514784,...,8.820153,195.490909,9.0,10.466667,21.424242,7.708333,9.374593,11.190141,7.633776,27.6
Gpc6,3.26616,2.552632,7.389175,11.25,5.0,0.333333,9.173061,0.586325,6.536585,4.319939,...,7.178571,3.309091,2.0,4.4,0.092929,5.512931,6.013029,5.366197,1.43833,2.0
Dcc,1.022814,9.828947,0.533505,0.0,0.0,1.166667,0.412365,0.100855,1.585366,0.292646,...,1.998724,0.218182,14.25,2.0,0.036364,8.114943,4.104235,2.605634,0.094877,0.0
mt-Rnr1,3.460076,1.447368,1.896907,1.375,2.666667,7.833333,1.359647,3.902564,1.634146,2.223654,...,1.540816,46.563636,1.583333,1.2,11.163636,1.314655,1.90228,1.584507,1.419355,9.866667
mt-Rnr2,3.56654,1.263158,1.543814,1.125,1.0,12.166667,1.211151,3.704274,1.292683,1.756634,...,1.350765,35.4,2.666667,1.2,17.705051,1.208333,1.586319,1.43662,1.371917,15.666667
