In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="whitegrid")
import numpy as np
import scanpy as sc
from anndata import read_h5ad
from anndata import AnnData
import scipy as sp
import scipy.stats
from gprofiler import GProfiler
import pickle
from adjustText import adjust_text
from matplotlib import gridspec
# Other specific functions 
from itertools import product
from statsmodels.stats.multitest import multipletests
import time
import os

import sys
sys.path.insert(1, '../')
import util

# autoreload
%load_ext autoreload
%autoreload 2
# logging
sc.logging.print_versions()

scanpy==1.4.4 anndata==0.7.1 umap==0.3.10 numpy==1.17.3 scipy==1.3.1 pandas==0.25.2 scikit-learn==0.21.3 statsmodels==0.11.0


In [2]:
# GLOBAL VARIABLES
DATA_PATH = '/n/groups/price/martin/tms_gene_data'
DGE_RES_PATH = DATA_PATH + '/DGE_result'
DGE_RES_PATH_OLD = DATA_PATH + '/DE_result_old'
ANNO_DATA_PATH = DATA_PATH + '/annotation_data'
RESULT_PATH = DATA_PATH + '/result_v1'

METHOD_LIST = ['facs', 'droplet']
DIC_METHOD_NAME = {'facs':'FACS', 'droplet':'droplet'}
CELLCATE_LIST = ['immune', 'stem cell/progenitor', 'stromal', 'endothelial', 'epithelial', 'parenchymal']

### Load data

In [3]:
# Load the data obs df: facs
temp_data = util.load_normalized_data(DATA_PATH, data_name='facs', total_ct_per_cell=1e4,
                                      flag_size_factor=False, flag_log1p=False)
gene_list_facs = list(temp_data.var_names)
gene_list_facs.sort()
temp_data.obs['n_genes'] = (temp_data.X>0).sum(axis=1)
df_obs_facs = temp_data.obs.copy()
df_obs_facs['analyte'] = ['%s.%s'%(x,y) for x,y in zip(df_obs_facs['tissue'],
                                                       df_obs_facs['cell_ontology_class'])]

# Load the data obs df: droplet
temp_data = util.load_normalized_data(DATA_PATH, data_name='droplet',
                                      flag_size_factor=False, flag_log1p=False)
gene_list_droplet = list(temp_data.var_names)
gene_list_droplet.sort()
temp_data.obs['n_genes'] = (temp_data.X>0).sum(axis=1)
df_obs_droplet = temp_data.obs.copy()
df_obs_droplet['analyte'] = ['%s.%s'%(x,y) for x,y in zip(df_obs_droplet['tissue'], 
                                                          df_obs_droplet['cell_ontology_class'])]

# Load the data obs df: bulk
temp_data = util.load_normalized_data_bulk(DATA_PATH, flag_size_factor=False, flag_log1p=False)
gene_list_bulk = list(temp_data.var_names)
gene_list_bulk.sort()
temp_data.obs['n_genes'] = (temp_data.X>0).sum(axis=1)
df_obs_bulk = temp_data.obs.copy()
df_obs_bulk['analyte'] = df_obs_bulk['tissue']

# dic for obs
dic_obs = {'facs':df_obs_facs, 'droplet':df_obs_droplet, 'bulk':df_obs_bulk}
dic_gene_list = {'facs':gene_list_facs, 'droplet':gene_list_droplet, 'bulk':gene_list_bulk}

# del temp results
del temp_data

Trying to set attribute `.obs` of view, copying.
Trying to set attribute `.obs` of view, copying.


### Load DGE results

In [4]:
# Load DGE results
df_info_facs,dic_dge_facs = util.load_DGE_res(DATA_PATH, dname='facs.tc',version='1e4')
df_info_droplet,dic_dge_droplet = util.load_DGE_res(DATA_PATH, dname='droplet.tc',version='1e4')

# Change analyte name
temp_list = list(dic_dge_facs.keys())
for analyte in temp_list:
    tissue,cell_type = analyte.split('.')
    cell_type = cell_type.replace('_', ' ')
    dic_dge_facs['%s.%s'%(tissue,cell_type)] = dic_dge_facs[analyte].copy()
    if '%s.%s'%(tissue,cell_type) != analyte: del dic_dge_facs[analyte]

temp_list = list(dic_dge_droplet.keys())
for analyte in temp_list:
    tissue,cell_type = analyte.split('.')
    cell_type = cell_type.replace('_', ' ')
    dic_dge_droplet['%s.%s'%(tissue,cell_type)] = dic_dge_droplet[analyte].copy()
    if '%s.%s'%(tissue,cell_type) != analyte:  del dic_dge_droplet[analyte]
        
# fixit: update bh_p (not sure if this is necessary)
dic_dge = {'facs':dic_dge_facs, 'droplet':dic_dge_droplet}

# Append tissue-level results
df_info_facs_tissue,dic_dge['facs.tissue'] = util.load_DGE_res(DATA_PATH, dname='facs.tissue', version='1e4')
df_info_droplet_tissue,dic_dge['droplet.tissue'] = util.load_DGE_res(DATA_PATH, dname='droplet.tissue',
                                                                     version='1e4')
df_info_bulk_tissue,dic_dge['bulk.tissue'] = util.load_DGE_res(DATA_PATH, dname='bulk.tissue', version='1e4')

In [5]:
# dic_analysis_list and dic_fdr_threshold

# analysis list: facs
min_cell_number = 100
ind_select = (df_info_facs['n_cell_young']>min_cell_number) & (df_info_facs['n_cell_old']>min_cell_number)
analysis_list_facs = list(df_info_facs.index[ind_select])

# analysis list: droplet
min_cell_number = 500
ind_select = (df_info_droplet['n_cell_young']>min_cell_number) & (df_info_droplet['n_cell_old']>min_cell_number)
analysis_list_droplet = list(df_info_droplet.index[ind_select])

dic_analysis_list = {'facs':analysis_list_facs, 'droplet':analysis_list_droplet}
for method in METHOD_LIST:
    print('%s, n_tc=%d'%(method, len(dic_analysis_list[method])))

# thresholds parameters
coef_threshold = 0.005
dic_fdr_threshold = {'facs':0.01, 'droplet':0.01, 'bulk':0.1}

facs, n_tc=76
droplet, n_tc=26


In [6]:
# Structured DGE results
dic_H_p = {}
dic_H_fdr = {}
dic_coef = {}
dic_coef_z = {}
dic_coef_se = {}
dic_coef_p = {}
dic_coef_fdr = {}

for method in METHOD_LIST:
    
    dic_H_p[method] = pd.DataFrame(index = dic_gene_list[method])
    dic_H_fdr[method] = pd.DataFrame(index = dic_gene_list[method])
    dic_coef[method] = pd.DataFrame(index = dic_gene_list[method])
    dic_coef_z[method] = pd.DataFrame(index = dic_gene_list[method])
    dic_coef_se[method] = pd.DataFrame(index = dic_gene_list[method])
    
    dic_coef_p[method] = pd.DataFrame(index = dic_gene_list[method])
    dic_coef_fdr[method] = pd.DataFrame(index = dic_gene_list[method])
    
    for analyte in dic_analysis_list[method]:
        
        dic_H_p[method][analyte] = dic_dge[method][analyte]['age.H_p']
        dic_H_fdr[method][analyte] = dic_dge[method][analyte]['age.H_fdr']
        dic_coef[method][analyte] = dic_dge[method][analyte]['age.logFC']
        dic_coef_z[method][analyte] = dic_dge[method][analyte]['age.logFC_z']
        dic_coef_se[method][analyte] = dic_dge[method][analyte]['age.logFC']/\
                                        dic_dge[method][analyte]['age.logFC_z']
        
        temp_v = dic_dge[method][analyte]['age.logFC_z']
        temp_gene_list = list(dic_dge[method][analyte].index)
        temp_v_p = (1-sp.stats.norm.cdf(np.absolute(temp_v)))*2
        temp_v_p[np.isnan(temp_v_p)] = 1
        temp_v_fdr = multipletests(temp_v_p, method='fdr_bh')[1]
        
        dic_coef_p[method].loc[temp_gene_list, analyte] = temp_v_p
        dic_coef_fdr[method].loc[temp_gene_list, analyte] = temp_v_fdr
        
    # na values
    dic_H_p[method] = dic_H_p[method].fillna(1)
    dic_H_fdr[method] = dic_H_fdr[method].fillna(1)
    dic_coef[method] = dic_coef[method].fillna(0)    
    dic_coef_z[method] = dic_coef_z[method].fillna(0)    
    dic_coef_se[method] = dic_coef_se[method].fillna(1e6)
    
    dic_coef_p[method] = dic_coef_p[method].fillna(1)
    dic_coef_fdr[method] = dic_coef_fdr[method].fillna(1)

  return (a < x) & (x < b)
  return (a < x) & (x < b)
  cond2 = (x >= _b) & cond0


In [7]:
# Structured DGE results: append tissue-level results 
for method in METHOD_LIST+['bulk']:
    tissue_list = list(dic_dge['%s.tissue'%method])
    tissue_list.sort()
    keyname = '%s.tissue'%method
        
    dic_H_p[keyname] = pd.DataFrame(index = dic_gene_list[method])
    dic_H_fdr[keyname] = pd.DataFrame(index = dic_gene_list[method])
    dic_coef[keyname] = pd.DataFrame(index = dic_gene_list[method])
    dic_coef_z[keyname] = pd.DataFrame(index = dic_gene_list[method])
    dic_coef_se[keyname] = pd.DataFrame(index = dic_gene_list[method])

    dic_coef_p[keyname] = pd.DataFrame(index = dic_gene_list[method])
    dic_coef_fdr[keyname] = pd.DataFrame(index = dic_gene_list[method])

    for analyte in tissue_list:

        dic_H_p[keyname][analyte] = dic_dge[keyname][analyte]['age.H_p']
        dic_H_fdr[keyname][analyte] = dic_dge[keyname][analyte]['age.H_fdr']
        dic_coef[keyname][analyte] = dic_dge[keyname][analyte]['age.logFC']
        dic_coef_z[keyname][analyte] = dic_dge[keyname][analyte]['age.logFC_z']
        dic_coef_se[keyname][analyte] = dic_dge[keyname][analyte]['age.logFC']/\
                                        dic_dge[keyname][analyte]['age.logFC_z']

        temp_v = dic_dge[keyname][analyte]['age.logFC_z']
        temp_gene_list = list(dic_dge[keyname][analyte].index)
        temp_v_p = (1-sp.stats.norm.cdf(np.absolute(temp_v)))*2
        temp_v_p[np.isnan(temp_v_p)] = 1
        temp_v_fdr = multipletests(temp_v_p, method='fdr_bh')[1]

        dic_coef_p[keyname].loc[temp_gene_list, analyte] = temp_v_p
        dic_coef_fdr[keyname].loc[temp_gene_list, analyte] = temp_v_fdr

    # na values
    dic_H_p[keyname] = dic_H_p[keyname].fillna(1)
    dic_H_fdr[keyname] = dic_H_fdr[keyname].fillna(1)
    dic_coef[keyname] = dic_coef[keyname].fillna(0)    
    dic_coef_z[keyname] = dic_coef_z[keyname].fillna(0)    
    dic_coef_se[keyname] = dic_coef_se[keyname].fillna(1e6)

    dic_coef_p[keyname] = dic_coef_p[keyname].fillna(1)
    dic_coef_fdr[keyname] = dic_coef_fdr[keyname].fillna(1)

### Load annotations

In [8]:
# df_cell_category
df_cell_category = pd.read_csv(ANNO_DATA_PATH + '/cell_ontology_class_functional_annotation.073020.tsv',
                               header=0, index_col=None, sep='\t')
df_cell_category = df_cell_category.fillna('')

df_cell_category['analyte'] = ['%s.%s'%(x,y) for x,y in zip(df_cell_category['tissue'],
                                                            df_cell_category['cell_ontology_class'])]
df_cell_category.index = df_cell_category['analyte']

df_cell_category = df_cell_category[['cell category', 'turnover_mouse', 'binary_lifespan']]

In [9]:
# Analyte annotation
dic_anno = {x:pd.DataFrame(index=dic_analysis_list[x])
            for x in METHOD_LIST}

for method in METHOD_LIST:
    # tissue and cell_ontology_class
    dic_anno[method]['tissue'] = [x.split('.')[0] for x in dic_anno[method].index]
    dic_anno[method]['cell_ontology_class'] = [x.split('.')[1] for x in dic_anno[method].index]
    
    # n_cell
    dic_anno[method]['n_cell'] = [((dic_obs[method]['tissue']==x.split('.')[0]) & 
                                   (dic_obs[method]['cell_ontology_class']==x.split('.')[1])).sum() 
                                  for x in dic_anno[method].index]
    
    # n_celltype in the tissue
    temp_dic = {x:(dic_anno[method]['tissue']==x).sum() for x in set(dic_anno[method]['tissue'])}
    dic_anno[method]['n_celltype'] = [temp_dic[x] for x in dic_anno[method]['tissue']]
    
    # n_rej
    dic_anno[method]['n_rej'] = [np.sum((dic_H_fdr[method][x]<dic_fdr_threshold[method]) &
                                        (np.absolute(dic_coef[method][x])>coef_threshold))
                                 for x in dic_anno[method].index]
    
    dic_anno[method]['n_rej.up'] = [np.sum((dic_H_fdr[method][x]<dic_fdr_threshold[method]) &
                                           (dic_coef[method][x]>coef_threshold))
                                    for x in dic_anno[method].index]
    
    dic_anno[method]['n_rej.down'] = [np.sum((dic_H_fdr[method][x]<dic_fdr_threshold[method]) &
                                             (dic_coef[method][x]<-coef_threshold))
                                      for x in dic_anno[method].index]
    
    
    dic_anno[method] = dic_anno[method].join(df_cell_category)

### Gene partition

In [10]:
# Global aging genes
dic_gene_anno = {}
for method in METHOD_LIST:

    dic_gene_anno[method] = pd.DataFrame(index = dic_gene_list[method])
    dic_gene_anno[method]['prop_sig'] = ((dic_H_fdr[method]<dic_fdr_threshold[method]) & 
                                         (np.absolute(dic_coef[method])>coef_threshold)).mean(axis=1)
    dic_gene_anno[method]['prop_upreg'] = (dic_coef[method]>coef_threshold).sum(axis=1) /\
                                            (np.absolute(dic_coef[method])>coef_threshold).sum(axis=1) 
    
    dic_gene_anno[method]['median_fc'] = dic_coef[method].median(axis=1)
    dic_gene_anno[method]['median_fdr'] = dic_H_fdr[method].median(axis=1)
    
    # Proportion, weighted by # of tissue-cell types
    v_w = 1 / dic_anno[method].loc[dic_H_fdr[method].columns, 'n_celltype'].values
    
    temp_v = ((dic_H_fdr[method]<dic_fdr_threshold[method]) & (np.absolute(dic_coef[method])>coef_threshold))
    dic_gene_anno[method]['prop_sig_w'] = np.average(temp_v, axis=1, weights=v_w)
    
    temp_v1 = np.average(dic_coef[method]>coef_threshold, axis=1, weights=v_w)
    temp_v2 = np.average(np.absolute(dic_coef[method])>coef_threshold, axis=1, weights=v_w)
    dic_gene_anno[method]['prop_upreg_w'] = temp_v1/temp_v2.clip(min=1e-3)
    
    # Add global aging gene label 
    dic_gene_anno[method]['global'] = dic_gene_anno[method]['prop_sig_w']>0.5
    
    dic_gene_anno[method]['global.dir'] = ''
    ind_select = (dic_gene_anno[method]['global']) & (dic_gene_anno[method]['prop_upreg_w']>0.8)
    dic_gene_anno[method].loc[ind_select, 'global.dir'] = 'up'
    ind_select = (dic_gene_anno[method]['global']) & (dic_gene_anno[method]['prop_upreg_w']<0.2)
    dic_gene_anno[method].loc[ind_select, 'global.dir'] = 'down'
    ind_select = (dic_gene_anno[method]['global']) & (dic_gene_anno[method]['global.dir']=='')
    dic_gene_anno[method].loc[ind_select, 'global.dir'] = 'other'
    
    print('%-20s GAG_total=%-5d GAG_80up=%-5d GAG_80down=%-5d GAG_other=%-5d'
          %(method, dic_gene_anno[method]['global'].sum(),
            (dic_gene_anno[method]['global.dir']=='up').sum(),
            (dic_gene_anno[method]['global.dir']=='down').sum(),
            (dic_gene_anno[method]['global.dir']=='other').sum()))

# Append tissue-level results
for method in METHOD_LIST+['bulk']:
    
    keyname='%s.tissue'%method

    dic_gene_anno[keyname] = pd.DataFrame(index = dic_gene_list[method])
    dic_gene_anno[keyname]['prop_sig'] = ((dic_H_fdr[keyname]<dic_fdr_threshold[method]) & 
                                         (np.absolute(dic_coef[keyname])>coef_threshold)).mean(axis=1)
    dic_gene_anno[keyname]['prop_upreg'] = (dic_coef[keyname]>coef_threshold).sum(axis=1) /\
                                            (np.absolute(dic_coef[keyname])>coef_threshold).sum(axis=1) 
    
    dic_gene_anno[keyname]['median_fc'] = dic_coef[keyname].median(axis=1)
    dic_gene_anno[keyname]['median_fdr'] = dic_H_fdr[keyname].median(axis=1)
    
    # Proportion, weighted by # of tissue-cell types
    dic_gene_anno[keyname]['prop_sig_w'] = dic_gene_anno[keyname]['prop_sig']
    dic_gene_anno[keyname]['prop_upreg_w'] = dic_gene_anno[keyname]['prop_upreg']
    
    # Add global aging gene label 
    dic_gene_anno[keyname]['global'] = dic_gene_anno[keyname]['prop_sig_w']>0.8
    
    dic_gene_anno[keyname]['global.dir'] = ''
    ind_select = (dic_gene_anno[keyname]['global']) & (dic_gene_anno[keyname]['prop_upreg_w']>0.8)
    dic_gene_anno[keyname].loc[ind_select, 'global.dir'] = 'up'
    ind_select = (dic_gene_anno[keyname]['global']) & (dic_gene_anno[keyname]['prop_upreg_w']<0.2)
    dic_gene_anno[keyname].loc[ind_select, 'global.dir'] = 'down'
    ind_select = (dic_gene_anno[keyname]['global']) & (dic_gene_anno[keyname]['global.dir']=='')
    dic_gene_anno[keyname].loc[ind_select, 'global.dir'] = 'other'
    
    print('%-20s GAG_total=%-5d GAG_80up=%-5d GAG_80down=%-5d GAG_other=%-5d'
          %(keyname, dic_gene_anno[keyname]['global'].sum(),
            (dic_gene_anno[keyname]['global.dir']=='up').sum(),
            (dic_gene_anno[keyname]['global.dir']=='down').sum(),
            (dic_gene_anno[keyname]['global.dir']=='other').sum()))
#     break

facs                 GAG_total=330   GAG_80up=93    GAG_80down=190   GAG_other=47   
droplet              GAG_total=59    GAG_80up=6     GAG_80down=4     GAG_other=49   
facs.tissue          GAG_total=147   GAG_80up=59    GAG_80down=74    GAG_other=14   
droplet.tissue       GAG_total=48    GAG_80up=1     GAG_80down=2     GAG_other=45   
bulk.tissue          GAG_total=0     GAG_80up=0     GAG_80down=0     GAG_other=0    


In [11]:
start_time = time.time()

temp_dic_comparison = {}

for method in METHOD_LIST:
    # Functional category
    for cate in CELLCATE_LIST:
        tar_list = [x for x in dic_analysis_list[method] 
                    if cate in dic_anno[method].loc[x,'cell category']]
        ref_list = [x for x in dic_analysis_list[method] if x not in tar_list]
        temp_dic_comparison['%s:spec_func:%s'%(method,cate)] = (tar_list,ref_list)
    # Tissue
    for tissue in set(dic_anno[method]['tissue']):
        tar_list = [x for x in dic_analysis_list[method] if dic_anno[method].loc[x,'tissue']==tissue]
        ref_list = [x for x in dic_analysis_list[method] if x not in tar_list]
        temp_dic_comparison['%s:spec_tissue:%s'%(method,tissue)] = (tar_list,ref_list)
    # Cell type
    for celltype in set(dic_anno[method]['cell_ontology_class']):
        tar_list = [x for x in dic_analysis_list[method] 
                    if dic_anno[method].loc[x,'cell_ontology_class']==celltype]
        ref_list = [x for x in dic_analysis_list[method] if x not in tar_list]
        temp_dic_comparison['%s:spec_celltype:%s'%(method,celltype)] = (tar_list,ref_list)
    # Tissue-cell type
    for analyte in dic_analysis_list[method]:
        tar_list = [analyte]
        ref_list = [x for x in dic_analysis_list[method] if x not in tar_list]
        temp_dic_comparison['%s:spec_tissue_celltype:%s'%(method,analyte)] = (tar_list,ref_list)
        
# Run meta analysis
for term in temp_dic_comparison.keys():
    
    method = term.split(':')[0]
    comparison_name = term.replace('%s:'%method,'')
    tar_list,ref_list = temp_dic_comparison[term]
    
    temp_df_mean_tar = dic_coef[method][tar_list].copy()
    temp_df_se_tar = dic_coef_se[method][tar_list].copy()
    temp_df_mean_ref = dic_coef[method][ref_list].copy()
    temp_df_se_ref = dic_coef_se[method][ref_list].copy()

    # Meta comparison
    v_p = []
    mean1 = []
    se1 = []
    mean2 = []
    se2 = []
    for i_gene,gene in enumerate(dic_gene_list[method]):

        mean_tar,se_tar = util.meta_analysis(temp_df_mean_tar.iloc[i_gene, :].values,
                                             temp_df_se_tar.iloc[i_gene, :].values)
        mean_ref,se_ref = util.meta_analysis(temp_df_mean_ref.iloc[i_gene, :].values,
                                             temp_df_se_ref.iloc[i_gene, :].values)
        
        mean1.append(mean_tar)
        se1.append(se_tar)
        mean2.append(mean_ref)
        se2.append(se_ref)
        v_p.append(util.get_p_two_point(mean_tar, se_tar, mean2=mean_ref, se2=se_ref))

    dic_gene_anno[method]['%s.mean'%comparison_name] = mean1
    dic_gene_anno[method]['%s.se'%comparison_name] = se1
    dic_gene_anno[method]['%s.mean_ref'%comparison_name] = mean2
    dic_gene_anno[method]['%s.se_ref'%comparison_name] = se2
    dic_gene_anno[method]['%s.p_dif'%comparison_name] = v_p
    dic_gene_anno[method]['%s.fdr_dif'%comparison_name] = multipletests(v_p, method='fdr_bh')[1]
    
    print('# %-30s time=%0.1fs'%(term, time.time()-start_time))

  tau2 = np.maximum(0, (Q-df) / (vwts.sum() - vwts.dot(vwts) / vwts.sum()))


# facs:spec_func:immune          time=83.1s
# facs:spec_func:stem cell/progenitor time=167.7s
# facs:spec_func:stromal         time=254.1s
# facs:spec_func:endothelial     time=341.0s
# facs:spec_func:epithelial      time=425.0s
# facs:spec_func:parenchymal     time=510.5s
# facs:spec_tissue:Spleen        time=573.1s
# facs:spec_tissue:Skin          time=627.2s
# facs:spec_tissue:Large_Intestine time=681.6s
# facs:spec_tissue:Brain_Non-Myeloid time=737.2s
# facs:spec_tissue:Tongue        time=800.8s


  summ = wt.dot(d) / wt.sum()
  varsum = np.sum(wt*wt*(variances+tau2)) / (np.sum(wt)**2)
  tau2 = np.maximum(0, (Q-df) / (vwts.sum() - vwts.dot(vwts) / vwts.sum()))
  reject = pvals_sorted <= ecdffactor*alpha


# facs:spec_tissue:Brain_Myeloid time=879.9s
# facs:spec_tissue:Trachea       time=958.4s
# facs:spec_tissue:Heart         time=1033.5s
# facs:spec_tissue:BAT           time=1088.5s
# facs:spec_tissue:Thymus        time=1144.1s
# facs:spec_tissue:Pancreas      time=1199.9s
# facs:spec_tissue:Bladder       time=1275.2s
# facs:spec_tissue:MAT           time=1341.0s
# facs:spec_tissue:GAT           time=1367.0s
# facs:spec_tissue:SCAT          time=1393.2s
# facs:spec_tissue:Kidney        time=1420.9s
# facs:spec_tissue:Aorta         time=1445.9s
# facs:spec_tissue:Diaphragm     time=1472.0s
# facs:spec_tissue:Liver         time=1498.8s
# facs:spec_tissue:Mammary_Gland time=1524.9s
# facs:spec_tissue:Marrow        time=1551.3s
# facs:spec_tissue:Limb_Muscle   time=1578.2s
# facs:spec_tissue:Lung          time=1605.1s
# facs:spec_celltype:adventitial cell time=1631.3s
# facs:spec_celltype:bronchial smooth muscle cell time=1657.2s
# facs:spec_celltype:granulocyte time=1683.1s
# facs:spec_ce

# facs:spec_tissue_celltype:SCAT.mesenchymal stem cell of adipose time=6582.7s
# facs:spec_tissue_celltype:SCAT.myeloid cell time=6634.5s
# facs:spec_tissue_celltype:Skin.basal cell of epidermis time=6686.4s
# facs:spec_tissue_celltype:Skin.bulge keratinocyte time=6738.1s
# facs:spec_tissue_celltype:Skin.epidermal cell time=6790.0s
# facs:spec_tissue_celltype:Spleen.B cell time=6821.8s
# facs:spec_tissue_celltype:Spleen.CD4-positive, alpha-beta T cell time=6846.8s
# facs:spec_tissue_celltype:Spleen.CD8-positive, alpha-beta T cell time=6871.8s
# facs:spec_tissue_celltype:Thymus.DN4 thymocyte time=6896.8s
# facs:spec_tissue_celltype:Thymus.thymocyte time=6922.0s
# facs:spec_tissue_celltype:Tongue.basal cell of epidermis time=6946.9s
# facs:spec_tissue_celltype:Tongue.keratinocyte time=6972.0s
# facs:spec_tissue_celltype:Trachea.endothelial cell time=6997.0s
# facs:spec_tissue_celltype:Trachea.fibroblast time=7022.0s
# facs:spec_tissue_celltype:Trachea.macrophage time=7046.9s
# droplet:sp

In [12]:
# Write results 
for method in METHOD_LIST:
    
    # Write full result
    temp_df = dic_gene_anno[method].copy()
    temp_df['gene'] = temp_df.index 
    temp_df = temp_df[['gene'] + [x for x in temp_df.columns if x !='gene']]
    temp_df.to_csv(DATA_PATH+'/result_v1/tms_gene_table/gene_stats_%s.081420.gz'%method,
                   sep='\t', header=True, index=False, compression='gzip')

for method in METHOD_LIST+['bulk']:
    # Write tissue-level results
    temp_df = dic_gene_anno['%s.tissue'%method].copy()
    temp_df['gene'] = temp_df.index 
    temp_df = temp_df[['gene'] + [x for x in temp_df.columns if x !='gene']]
    temp_df.to_csv(DATA_PATH+'/result_v1/tms_gene_table/gene_stats_%s_tissue.081420.gz'%method,
                   sep='\t', header=True, index=False, compression='gzip')