In [1]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import numpy as np
from statsmodels.stats import multitest

In [2]:
df = pd.read_csv('kegg/dream_test.csv', index_col = 0).sort_values(['Gene', 'Tissue'])

In [3]:
df_res = []
for (c, t), item in df.groupby(['CellType', 'Tissue']):
    fdr = multitest.multipletests(item['P.Value'].values, method='fdr_bh')[1]
    item['FDR'] = fdr
    df_res.append(item)
df_res = pd.concat(df_res)

In [4]:
df_res[(df_res.FDR < 0.05)].groupby(['CellType', 'Tissue']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Gene,logFC,P.Value,z.std,FDR
CellType,Tissue,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Ast2,PFC,1,1,1,1,1
Ast2,choroidpleus,1,1,1,1,1
Ep,PFC,7,7,7,7,7
Epi,PFC,2,2,2,2,2
Ex,choroidpleus,1,1,1,1,1
In,choroidpleus,2,2,2,2,2
In,medulla,1,1,1,1,1
Mes,PFC,1,1,1,1,1
Mic,PFC,16,16,16,16,16
Mo,PFC,1,1,1,1,1


In [5]:
df_res.head()

Unnamed: 0,Gene,Tissue,CellType,logFC,P.Value,z.std,FDR
81,KEGG_ABC_TRANSPORTERS,PFC,Ast1,-0.009595,0.488271,-0.693062,0.995943
57,KEGG_ACUTE_MYELOID_LEUKEMIA,PFC,Ast1,-0.013438,0.363249,-0.909192,0.995943
150,KEGG_ADHERENS_JUNCTION,PFC,Ast1,-0.002784,0.876802,-0.155025,0.995943
5,KEGG_ADIPOCYTOKINE_SIGNALING_PATHWAY,PFC,Ast1,-0.023944,0.061123,-1.872608,0.995943
110,KEGG_ALANINE_ASPARTATE_AND_GLUTAMATE_METABOLISM,PFC,Ast1,0.009962,0.637672,0.470956,0.995943


In [6]:
df_res.to_csv('../table/Table_S3_KEGG_test.csv', index = False)

In [8]:
id2ct = sorted(set(df.CellType))
id2tissue = sorted(set(df.Tissue))

df_up = pd.DataFrame(0, index = id2tissue, columns = id2ct)
df_dn = pd.DataFrame(0, index = id2tissue, columns = id2ct)

for (c, t), tmp in df.groupby(['CellType', 'Tissue']):
    fdr = multitest.multipletests(tmp['P.Value'].values, method='fdr_bh')[1]
    n_up = np.sum((fdr < 0.05) & (tmp.logFC > 0))
    n_dn = np.sum((fdr < 0.05) & (tmp.logFC < 0))
    df_up.loc[t, c] = n_up
    df_dn.loc[t, c] = n_dn

In [9]:
df_up + df_dn

Unnamed: 0,Ast1,Ast2,End,Ep,Epi,Ex,In,LM,Mes,Mic,Mo,Oli,Opc,Peri1,Peri2
PFC,0,1,0,7,2,0,0,0,1,16,1,0,0,0,0
choroidpleus,0,1,0,0,0,1,2,0,0,0,0,0,4,0,0
medulla,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0


In [6]:
df[(df.Tissue == 'PFC') & (df.CellType == 'Mic')].sort_values('P.Value').iloc[:16]

Unnamed: 0,Gene,Tissue,CellType,logFC,P.Value,z.std
5023,KEGG_FC_GAMMA_R_MEDIATED_PHAGOCYTOSIS,PFC,Mic,0.068116,2.5e-05,4.216556
5024,KEGG_PRIMARY_IMMUNODEFICIENCY,PFC,Mic,0.095417,0.00011,3.868188
5025,KEGG_VEGF_SIGNALING_PATHWAY,PFC,Mic,0.038755,0.000129,3.827614
5026,KEGG_NATURAL_KILLER_CELL_MEDIATED_CYTOTOXICITY,PFC,Mic,0.051493,0.000133,3.820312
5027,KEGG_FC_EPSILON_RI_SIGNALING_PATHWAY,PFC,Mic,0.061614,0.000224,3.689806
5028,KEGG_UBIQUITIN_MEDIATED_PROTEOLYSIS,PFC,Mic,0.032053,0.000332,3.588993
5029,KEGG_CHEMOKINE_SIGNALING_PATHWAY,PFC,Mic,0.031664,0.000519,3.470645
5030,KEGG_LYSINE_DEGRADATION,PFC,Mic,0.027297,0.000965,3.300508
5031,KEGG_T_CELL_RECEPTOR_SIGNALING_PATHWAY,PFC,Mic,0.029544,0.001041,3.279125
5032,KEGG_ALPHA_LINOLENIC_ACID_METABOLISM,PFC,Mic,0.028562,0.001237,3.230179
