In [1]:
import numpy as np
import pandas as pd
import glob

In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [3]:
pd.set_option('display.max_rows', 10)

In [4]:
# Import Barres celltype enrichment raw data
raw_df = pd.read_excel('../data/barreslab_rnaseq.xlsx',sheet_name='Raw Data',index_col='Gene symbol')

In [6]:
# Define each gene's expression level in oligodendrocyte as the average of three cell types
raw_df['Oligodendrocyte']=raw_df.loc[:,'Oligodendrocyte Precursor Cell':'Myelinating Oligodendrocytes'].mean(axis=1)

In [7]:
# Clean up table
raw_df.drop(['Description','Oligodendrocyte Precursor Cell','Newly Formed Oligodendrocyte','Myelinating Oligodendrocytes'],axis=1,inplace=True)

In [8]:
raw_df

Unnamed: 0_level_0,Astrocytes,Neuron,Microglia,Endothelial Cells,Oligodendrocyte
Gene symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0610005C13Rik,0.150958,0.100000,0.100000,0.100000,0.100000
0610007C21Rik,44.891260,34.185493,89.145033,63.561109,61.071109
0610007L01Rik,11.015039,12.028727,18.462419,13.521373,11.161263
0610007P08Rik,7.144110,2.020408,0.446900,4.693264,4.455001
0610007P14Rik,48.935666,24.370482,10.411304,10.569456,62.335636
...,...,...,...,...,...
Zyg11a,0.100000,0.100000,0.100000,0.100000,0.100000
Zyg11b,7.196140,14.748427,2.513958,7.883384,5.880200
Zyx,12.443373,12.732356,74.185894,35.416643,17.037024
Zzef1,9.346480,4.842996,2.433903,4.443834,4.460314


# Gene Ontology

In [21]:
# import GO of all protein-coding genes
go_raw_df = pd.read_csv('/Users/xulinhe/OneDrive/Herculano_Lab/GO_mouse_MGI_Oct_13_2019/all_Ensembl98_protein_coding_gene.princeton_GO_mapper.tsv',usecols=['TERM','ANNOTATED_GENES'],index_col='TERM',sep='\t')

In [22]:
transposed_go_df = go_raw_df.ANNOTATED_GENES.str.split(',',expand=True).T

In [24]:
go_dfs = []
for i, column in enumerate(transposed_go_df.columns):
    data = {'Gene':transposed_go_df[column]}
    df = pd.DataFrame(data)
    df = df.dropna()
    a_list = df.Gene.values.tolist()
    a_list = [x.strip(' ') for x in a_list]
    a_list_data = {'Gene':a_list}
    df = pd.DataFrame(a_list_data)
    df.insert(1,column,True)
    df.set_index('Gene',inplace=True)
    go_dfs.append(df)

In [25]:
go_terms = []
for go_df in go_dfs:
    go_term = go_df.columns[0]
    go_terms.append(go_term)
    #work_df= pd.merge(work_df, go_df, how='left', left_on='Gene symbol', right_on='Gene')
    raw_df = pd.merge(raw_df, go_df, how='left', left_index=True, right_index=True)
    raw_df.replace({go_term: {np.nan:False}},inplace=True)

In [27]:
size_go_df = pd.read_csv('/Users/xulinhe/OneDrive/Herculano_Lab/Benchmark_Genes/GO-regulation_of_cell_size.txt',sep='\t')

In [28]:
size_go_df['regulation of cell size']=True

In [30]:
size_go_df.drop(size_go_df.columns[1:-1],axis=1,inplace=True)
size_go_df.drop_duplicates(inplace=True)
#size_go_df.drop(size_go_df.columns[0:-1],axis=1,inplace=True)
#df.drop(df.columns[[0, 4, 2]], axis = 1, inplace = True)

In [31]:
size_go_df.reset_index(drop=True,inplace=True)

In [32]:
size_go_df.rename(columns={'MGI Gene/Marker ID':'Gene symbol'}, inplace=True)

In [33]:
size_go_df.set_index('Gene symbol',inplace=True)

In [34]:
raw_df= pd.merge(raw_df, size_go_df, how='left', left_index=True, right_index=True)

In [35]:
raw_df.replace({'regulation of cell size': {np.nan:False}},inplace=True)

# Ensembl 98 mouse dN/dS (averaged across 92 species)

In [10]:
# dN/dS of all protein coding genes
dnds_df = pd.read_csv('/Users/xulinhe/OneDrive/Herculano_Lab/Zhang_et_al_2014/92species-to-mouse.ensembl98.avg_dnds.tsv',sep='\t',index_col='Gene name')

In [40]:
dnds_arr = dnds_df['avg_dNdS'].dropna().values

In [41]:
raw_df = pd.merge(raw_df,dnds_df,left_index=True, right_index=True, how='inner')

In [44]:
a_df = raw_df.loc[raw_df['Astrocytes']>4*raw_df[['Neuron','Microglia','Endothelial Cells','Oligodendrocyte']].mean(axis=1)].copy(deep=True)
n_df = raw_df.loc[raw_df['Neuron']>4*raw_df[['Astrocytes','Microglia','Endothelial Cells','Oligodendrocyte']].mean(axis=1)].copy(deep=True)
m_df = raw_df.loc[raw_df['Microglia']>4*raw_df[['Astrocytes','Neuron','Endothelial Cells','Oligodendrocyte']].mean(axis=1)].copy(deep=True)
e_df = raw_df.loc[raw_df['Endothelial Cells']>4*raw_df[['Astrocytes','Microglia','Neuron','Oligodendrocyte']].mean(axis=1)].copy(deep=True)
o_df = raw_df.loc[raw_df['Oligodendrocyte']>4*raw_df[['Astrocytes','Microglia','Endothelial Cells','Neuron']].mean(axis=1)].copy(deep=True)

In [46]:
o_df.to_csv('/Users/xulinhe/OneDrive/Herculano_Lab/Zhang_et_al_2014/Feb_16_2020.oligodendrocyte_gene_list.txt',columns=[],header=False,index=True)
e_df.to_csv('/Users/xulinhe/OneDrive/Herculano_Lab/Zhang_et_al_2014/Feb_16_2020.endothelia_gene_list.txt',columns=[],header=False,index=True)
m_df.to_csv('/Users/xulinhe/OneDrive/Herculano_Lab/Zhang_et_al_2014/Feb_16_2020.microglia_gene_list.txt',columns=[],header=False,index=True)
n_df.to_csv('/Users/xulinhe/OneDrive/Herculano_Lab/Zhang_et_al_2014/Feb_16_2020.neuron_gene_list.txt',columns=[],header=False,index=True)
a_df.to_csv('/Users/xulinhe/OneDrive/Herculano_Lab/Zhang_et_al_2014/Feb_16_2020.astrocyte_gene_list.txt',columns=[],header=False,index=True)

In [48]:
a_df = a_df.drop(['Astrocytes','Neuron','Microglia','Endothelial Cells','Oligodendrocyte'],axis=1)
n_df = n_df.drop(['Astrocytes','Neuron','Microglia','Endothelial Cells','Oligodendrocyte'],axis=1)
m_df = m_df.drop(['Astrocytes','Neuron','Microglia','Endothelial Cells','Oligodendrocyte'],axis=1)
e_df = e_df.drop(['Astrocytes','Neuron','Microglia','Endothelial Cells','Oligodendrocyte'],axis=1)
o_df = o_df.drop(['Astrocytes','Neuron','Microglia','Endothelial Cells','Oligodendrocyte'],axis=1)

In [49]:
a_df['Astrocyte-specific']='In Astrocyte'
n_df['Neuron-specific']='In Neuron'
m_df['Microglia-specific']='In Microglia'
e_df['Endothelia-specific']='In Endothelia'
o_df['Oligodendrocyte-specific']='In Oligodendrocyte'

In [50]:
gene_info_df = pd.concat([a_df,n_df,m_df,e_df,o_df],sort=False)

In [51]:
gene_info_df['Astrocyte-specific'].fillna(value='Not IN Astrocyte',inplace=True)
gene_info_df['Neuron-specific'].fillna(value='Not IN Neuron',inplace=True)
gene_info_df['Microglia-specific'].fillna(value='Not IN Microglia',inplace=True)
gene_info_df['Endothelia-specific'].fillna(value='Not IN Endothelia',inplace=True)
gene_info_df['Oligodendrocyte-specific'].fillna(value='Not IN Oligodendrocyte',inplace=True)

In [52]:
# Setting the threshold of average dN/dS for a gene to be considered high-dN/dS or low-dN/dS
high_cutoff = gene_info_df['avg_dNdS'].quantile(0.75)
low_cutoff = gene_info_df['avg_dNdS'].quantile(0.25)
high_cutoff
low_cutoff

0.19135320683079418

0.055883309714075666

In [54]:
high_dNdS_df = gene_info_df.loc[(gene_info_df['avg_dNdS'] >= high_cutoff)].copy(deep=True)
low_dNdS_df = gene_info_df.loc[(gene_info_df['avg_dNdS'] <= low_cutoff)].copy(deep=True)
gene_info_df['low_dN/dS'] = 'other genes'
gene_info_df.loc[low_dNdS_df.index,'low_dN/dS'] = '< cutoff'
gene_info_df['high_dN/dS'] = 'other genes'
gene_info_df.loc[high_dNdS_df.index,'high_dN/dS'] = '> cutoff'

In [55]:
med = gene_info_df['avg_dNdS'].median(axis=0)

In [57]:
# I also need a list of GO terms later for contingency analysis
go_list = raw_df.columns[6:-8]

In [68]:
go_list=go_list.to_list()

In [69]:
go_list.append('regulation of cell size')

# Graphs: Celltype-specific Genes vs. Benchmarks

In [59]:
import matplotlib
import matplotlib.pyplot as plt
import statsmodels.api as sm
import scipy.stats as stats
import seaborn as sns
import matplotlib.ticker as mtick

In [60]:
matplotlib.rcParams['figure.dpi']= 300

### Redo Contingency Analysis for each GO with the new Ensembl 98 data

In [61]:
font = {'size':8}
matplotlib.rc('font', **font)

In [62]:
def crosstable (df,Celltype,GO): # GO can be a GO term or low_dN/dS or high_dN/dS for categorical analysis with dN/dS score 
    table = pd.crosstab(df.loc[:,Celltype],df.loc[:,GO])
    odds_ratio = stats.fisher_exact(table)[0]
    chi2stats = stats.chi2_contingency(table)
    col_sum = table.sum(axis=0)
    col_sum.name = 'Total'
    table = table.append(col_sum)
    table['Total'] = table.sum(axis=1)
    # This step can actually be accomplished by stating margins=True in pd.crosstab()
    return(table,chi2stats,odds_ratio)

In [73]:
def tab_plot(fig,df,GO):
    # fig is the figure that this table plot will be draw in
    # N is the serial number of the table. If it's the first table, then input 1
    # df is the dataframe with celltype info columns and GO info columns (here 'gene_info_df')
    # dNdS_cat is the column of category of dN/dS level, so high_dN/dS or low_dN/dS
    
    n_gene_GO = df[df[GO]==True].shape[0]
    
    n=5 # the number of lines ocupied by distribution histogram 
    
    ##### Astrocyte #####
    
    ### low dN/dS ###
    
    # Generating the contingency table for astrocyte and low dN/dS as a dataframe 
    table_A_low, stat_A_low, odds_A_low= crosstable(df[df[GO]==True],'Astrocyte-specific','low_dN/dS')
    # Statistics for Astrocyte and low dN/dS
    tot_low = table_A_low.iloc[2,0] #Total number of genes afiliated with this GO with avg dN/dS lower than 1st quantile
    tot_not_low = table_A_low.iloc[2,1] #Total number of genes afiliated with this GO with avg dN/dS higher than 1st quantile
    tot_A = table_A_low.iloc[0,2] #Total number of astrocyte-specific genes afiliated with this GO
    tot_not_A = table_A_low.iloc[1,2] #Total number of genes afiliated with this GO that are not astrocyte-specific
    # The expected (fitted) value for each cell in the contingency table
    expected_A_low_arr = np.outer(table_A_low.loc[:,'Total'],table_A_low.loc['Total',:])/table_A_low.loc['Total','Total'] 
    # Put the expected value into the table with the observed value 
    table_A_low.iloc[0,0]='{0} ({1:.2f})'.format(table_A_low.iloc[0,0],expected_A_low_arr[0,0])
    table_A_low.iloc[0,1]='{0} ({1:.2f})'.format(table_A_low.iloc[0,1],expected_A_low_arr[0,1])
    table_A_low.iloc[1,0]='{0} ({1:.2f})'.format(table_A_low.iloc[1,0],expected_A_low_arr[1,0])
    table_A_low.iloc[1,1]='{0} ({1:.2f})'.format(table_A_low.iloc[1,1],expected_A_low_arr[1,1])
    # Set axes for contingency table against astrocyte
    tabA_low_plot = fig.add_subplot(grid[n+0,0])
    tabA_low_plot.axis('tight')
    tabA_low_plot.axis('off')
    stringA_low = 'chi2: {0:.3f}, p: {1:.10f}, OR: {2:.5f}'.format(stat_A_low[0],stat_A_low[1],odds_A_low)
    
    ### High dN/dS ### 
    
    # Generating the contingency table for astrocyte and high dN/dS as a dataframe 
    table_A_high, stat_A_high, odds_A_high= crosstable(df[df[GO]==True],'Astrocyte-specific','high_dN/dS')
    # Statistics for Astrocyte and high dN/dS
    tot_high = table_A_high.iloc[2,0]
    tot_not_high = table_A_high.iloc[2,1]
    # The expected (fitted) value for each cell in the contingency table
    expected_A_high_arr = np.outer(table_A_high.loc[:,'Total'],table_A_high.loc['Total',:])/table_A_high.loc['Total','Total'] 
    # Put the expected value into the table with the observed value 
    table_A_high.iloc[0,0]='{0} ({1:.2f})'.format(table_A_high.iloc[0,0],expected_A_high_arr[0,0])
    table_A_high.iloc[0,1]='{0} ({1:.2f})'.format(table_A_high.iloc[0,1],expected_A_high_arr[0,1])
    table_A_high.iloc[1,0]='{0} ({1:.2f})'.format(table_A_high.iloc[1,0],expected_A_high_arr[1,0])
    table_A_high.iloc[1,1]='{0} ({1:.2f})'.format(table_A_high.iloc[1,1],expected_A_high_arr[1,1])
    # Set axes for contingency table against astrocyte
    tabA_high_plot = fig.add_subplot(grid[n+0,1])
    tabA_high_plot.axis('tight')
    tabA_high_plot.axis('off')
    stringA_high = 'chi2: {0:.3f}, p: {1:.10f}, OR: {2:.5f}'.format(stat_A_high[0],stat_A_high[1],odds_A_high)
    
    ##### Microglia #####
    
    ### low dN/dS ###
    
    # Generating the contingency table for microglia and low dN/dS as a dataframe 
    table_M_low, stat_M_low, odds_M_low= crosstable(df[df[GO]==True],'Microglia-specific','low_dN/dS')
    # Statistics for Microglia and low dN/dS
    tot_M = table_M_low.iloc[0,2] #Total number of microglia-specific genes afiliated with this GO
    tot_not_M = table_M_low.iloc[1,2] #Total number of genes afiliated with this GO that are not microglia-specific
    # The expected (fitted) value for each cell in the contingency table
    expected_M_low_arr = np.outer(table_M_low.loc[:,'Total'],table_M_low.loc['Total',:])/table_M_low.loc['Total','Total'] 
    # Put the expected value into the table with the observed value 
    table_M_low.iloc[0,0]='{0} ({1:.2f})'.format(table_M_low.iloc[0,0],expected_M_low_arr[0,0])
    table_M_low.iloc[0,1]='{0} ({1:.2f})'.format(table_M_low.iloc[0,1],expected_M_low_arr[0,1])
    table_M_low.iloc[1,0]='{0} ({1:.2f})'.format(table_M_low.iloc[1,0],expected_M_low_arr[1,0])
    table_M_low.iloc[1,1]='{0} ({1:.2f})'.format(table_M_low.iloc[1,1],expected_M_low_arr[1,1])
    # Set axes for contingency table against microglia
    tabM_low_plot = fig.add_subplot(grid[n+2,0])
    tabM_low_plot.axis('tight')
    tabM_low_plot.axis('off')
    stringM_low = 'chi2: {0:.3f}, p: {1:.10f}, OR: {2:.5f}'.format(stat_M_low[0],stat_M_low[1],odds_M_low)
    
    ### High dN/dS ###
    
    # Generating the contingency table for microglia and high dN/dS as a dataframe 
    table_M_high, stat_M_high, odds_M_high= crosstable(df[df[GO]==True],'Microglia-specific','high_dN/dS')
    # Statistics for Microglia and high dN/dS
    tot_high = table_M_high.iloc[2,0]
    tot_not_high = table_M_high.iloc[2,1]
    # The expected (fitted) value for each cell in the contingency table
    expected_M_high_arr = np.outer(table_M_high.loc[:,'Total'],table_M_high.loc['Total',:])/table_M_high.loc['Total','Total'] 
    # Put the expected value into the table with the observed value 
    table_M_high.iloc[0,0]='{0} ({1:.2f})'.format(table_M_high.iloc[0,0],expected_M_high_arr[0,0])
    table_M_high.iloc[0,1]='{0} ({1:.2f})'.format(table_M_high.iloc[0,1],expected_M_high_arr[0,1])
    table_M_high.iloc[1,0]='{0} ({1:.2f})'.format(table_M_high.iloc[1,0],expected_M_high_arr[1,0])
    table_M_high.iloc[1,1]='{0} ({1:.2f})'.format(table_M_high.iloc[1,1],expected_M_high_arr[1,1])
    # Set axes for contingency table against microglia
    tabM_high_plot = fig.add_subplot(grid[n+2,1])
    tabM_high_plot.axis('tight')
    tabM_high_plot.axis('off')
    stringM_high = 'chi2: {0:.3f}, p: {1:.10f}, OR: {2:.5f}'.format(stat_M_high[0],stat_M_high[1],odds_M_high)
    
    ##### Oligodendrocyte #####
    
    ### low dN/dS ###
    
    # Generating the contingency table for oligodendrocyte and low dN/dS as a dataframe 
    table_O_low, stat_O_low, odds_O_low= crosstable(df[df[GO]==True],'Oligodendrocyte-specific','low_dN/dS')
    # Statistics for Oligodendrocyte and low dN/dS
    tot_O = table_O_low.iloc[0,2] #Total number of oligodendrocyte-specific genes afiliated with this GO
    tot_not_O = table_O_low.iloc[1,2] #Total number of genes afiliated with this GO that are not oligodendrocyte-specific
    # The expected (fitted) value for each cell in the contingency table
    expected_O_low_arr = np.outer(table_O_low.loc[:,'Total'],table_O_low.loc['Total',:])/table_O_low.loc['Total','Total'] 
    # Put the expected value into the table with the observed value 
    table_O_low.iloc[0,0]='{0} ({1:.2f})'.format(table_O_low.iloc[0,0],expected_O_low_arr[0,0])
    table_O_low.iloc[0,1]='{0} ({1:.2f})'.format(table_O_low.iloc[0,1],expected_O_low_arr[0,1])
    table_O_low.iloc[1,0]='{0} ({1:.2f})'.format(table_O_low.iloc[1,0],expected_O_low_arr[1,0])
    table_O_low.iloc[1,1]='{0} ({1:.2f})'.format(table_O_low.iloc[1,1],expected_O_low_arr[1,1])
    # Set axes for contingency table against oligodendrocyte
    tabO_low_plot = fig.add_subplot(grid[n+4,0])
    tabO_low_plot.axis('tight')
    tabO_low_plot.axis('off')
    stringO_low = 'chi2: {0:.3f}, p: {1:.10f}, OR: {2:.5f}'.format(stat_O_low[0],stat_O_low[1],odds_O_low)
    
    ### High dN/dS ###
    
    # Generating the contingency table for oligodendrocyte and high dN/dS as a dataframe 
    table_O_high, stat_O_high, odds_O_high= crosstable(df[df[GO]==True],'Oligodendrocyte-specific','high_dN/dS')
    # Statistics for Oligodendrocyte and high dN/dS
    tot_high = table_O_high.iloc[2,0]
    tot_not_high = table_O_high.iloc[2,1]
    # The expected (fitted) value for each cell in the contingency table
    expected_O_high_arr = np.outer(table_O_high.loc[:,'Total'],table_O_high.loc['Total',:])/table_O_high.loc['Total','Total'] 
    # Put the expected value into the table with the observed value 
    table_O_high.iloc[0,0]='{0} ({1:.2f})'.format(table_O_high.iloc[0,0],expected_O_high_arr[0,0])
    table_O_high.iloc[0,1]='{0} ({1:.2f})'.format(table_O_high.iloc[0,1],expected_O_high_arr[0,1])
    table_O_high.iloc[1,0]='{0} ({1:.2f})'.format(table_O_high.iloc[1,0],expected_O_high_arr[1,0])
    table_O_high.iloc[1,1]='{0} ({1:.2f})'.format(table_O_high.iloc[1,1],expected_O_high_arr[1,1])
    # Set axes for contingency table against oligodendrocyte
    tabO_high_plot = fig.add_subplot(grid[n+4,1])
    tabO_high_plot.axis('tight')
    tabO_high_plot.axis('off')
    stringO_high = 'chi2: {0:.3f}, p: {1:.10f}, OR: {2:.5f}'.format(stat_O_high[0],stat_O_high[1],odds_O_high)
    
        ##### Neuron #####
    
    ### low dN/dS ###
    
    # Generating the contingency table for neuron and low dN/dS as a dataframe 
    table_N_low, stat_N_low, odds_N_low= crosstable(df[df[GO]==True],'Neuron-specific','low_dN/dS')    
    # Statistics for Neuron and low dN/dS
    tot_low = table_N_low.iloc[2,0] #Total number of genes afiliated with this GO with avg dN/dS lower than 1st quantile
    tot_not_low = table_N_low.iloc[2,1] #Total number of genes afiliated with this GO with avg dN/dS higher than 1st quantile
    tot_N = table_N_low.iloc[0,2] #Total number of neuron-specific genes afiliated with this GO
    tot_not_N = table_N_low.iloc[1,2] #Total number of genes afiliated with this GO that are not neuron-specific
    # The expected (fitted) value for each cell in the contingency table
    expected_N_low_arr = np.outer(table_N_low.loc[:,'Total'],table_N_low.loc['Total',:])/table_N_low.loc['Total','Total'] 
    # Put the expected value into the table with the observed value 
    table_N_low.iloc[0,0]='{0} ({1:.2f})'.format(table_N_low.iloc[0,0],expected_N_low_arr[0,0])
    table_N_low.iloc[0,1]='{0} ({1:.2f})'.format(table_N_low.iloc[0,1],expected_N_low_arr[0,1])
    table_N_low.iloc[1,0]='{0} ({1:.2f})'.format(table_N_low.iloc[1,0],expected_N_low_arr[1,0])
    table_N_low.iloc[1,1]='{0} ({1:.2f})'.format(table_N_low.iloc[1,1],expected_N_low_arr[1,1])
    # Set axes for contingency table against neuron
    tabN_low_plot = fig.add_subplot(grid[n+6,0])
    tabN_low_plot.axis('tight')
    tabN_low_plot.axis('off')
    stringN_low = 'n = {0:d} genes belong to {1:s}'.format(n_gene_GO,GO)
    stringN_low = 'chi2: {0:.3f}, p: {1:.10f}, OR: {2:.5f}'.format(stat_N_low[0],stat_N_low[1], odds_N_low)
    
    ### High dN/dS ### 
    
    # Generating the contingency table for neuron and high dN/dS as a dataframe 
    table_N_high, stat_N_high, odds_N_high= crosstable(df[df[GO]==True],'Neuron-specific','high_dN/dS')
    # Statistics for Neuron and high dN/dS
    tot_high = table_N_high.iloc[2,0]
    tot_not_high = table_N_high.iloc[2,1]
    # The expected (fitted) value for each cell in the contingency table
    expected_N_high_arr = np.outer(table_N_high.loc[:,'Total'],table_N_high.loc['Total',:])/table_N_high.loc['Total','Total'] 
    # Put the expected value into the table with the observed value 
    table_N_high.iloc[0,0]='{0} ({1:.2f})'.format(table_N_high.iloc[0,0],expected_N_high_arr[0,0])
    table_N_high.iloc[0,1]='{0} ({1:.2f})'.format(table_N_high.iloc[0,1],expected_N_high_arr[0,1])
    table_N_high.iloc[1,0]='{0} ({1:.2f})'.format(table_N_high.iloc[1,0],expected_N_high_arr[1,0])
    table_N_high.iloc[1,1]='{0} ({1:.2f})'.format(table_N_high.iloc[1,1],expected_N_high_arr[1,1])
    # Set axes for contingency table against neuron
    tabN_high_plot = fig.add_subplot(grid[n+6,1])
    tabN_high_plot.axis('tight')
    tabN_high_plot.axis('off')
    stringN_high = 'chi2: {0:.3f}, p: {1:.10f}, OR: {2:.5f}'.format(stat_N_high[0],stat_N_high[1],odds_N_high)
    
    ##### Endothelia #####
    
    ### low dN/dS ###
    
    # Generating the contingency table for endothelia and low dN/dS as a dataframe 
    table_E_low, stat_E_low, odds_E_low= crosstable(df[df[GO]==True],'Endothelia-specific','low_dN/dS')
    # Statistics for Endothelia and low dN/dS
    tot_E = table_E_low.iloc[0,2] #Total number of endothelia-specific genes afiliated with this GO
    tot_not_E = table_E_low.iloc[1,2] #Total number of genes afiliated with this GO that are not endothelia-specific
    # The expected (fitted) value for each cell in the contingency table
    expected_E_low_arr = np.outer(table_E_low.loc[:,'Total'],table_E_low.loc['Total',:])/table_E_low.loc['Total','Total'] 
    # Put the expected value into the table with the observed value 
    table_E_low.iloc[0,0]='{0} ({1:.2f})'.format(table_E_low.iloc[0,0],expected_E_low_arr[0,0])
    table_E_low.iloc[0,1]='{0} ({1:.2f})'.format(table_E_low.iloc[0,1],expected_E_low_arr[0,1])
    table_E_low.iloc[1,0]='{0} ({1:.2f})'.format(table_E_low.iloc[1,0],expected_E_low_arr[1,0])
    table_E_low.iloc[1,1]='{0} ({1:.2f})'.format(table_E_low.iloc[1,1],expected_E_low_arr[1,1])
    # Set axes for contingency table against endothelia
    tabE_low_plot = fig.add_subplot(grid[n+8,0])
    tabE_low_plot.axis('tight')
    tabE_low_plot.axis('off')
    stringE_low = 'chi2: {0:.3f}, p: {1:.10f}, OR: {2:.5f}'.format(stat_E_low[0],stat_E_low[1],odds_E_low)
    
    ### High dN/dS ###
    
    # Generating the contingency table for endothelia and high dN/dS as a dataframe 
    table_E_high, stat_E_high, odds_E_high= crosstable(df[df[GO]==True],'Endothelia-specific','high_dN/dS')
    # Statistics for Endothelia and high dN/dS
    tot_high = table_E_high.iloc[2,0]
    tot_not_high = table_E_high.iloc[2,1]
    # The expected (fitted) value for each cell in the contingency table
    expected_E_high_arr = np.outer(table_E_high.loc[:,'Total'],table_E_high.loc['Total',:])/table_E_high.loc['Total','Total'] 
    # Put the expected value into the table with the observed value 
    table_E_high.iloc[0,0]='{0} ({1:.2f})'.format(table_E_high.iloc[0,0],expected_E_high_arr[0,0])
    table_E_high.iloc[0,1]='{0} ({1:.2f})'.format(table_E_high.iloc[0,1],expected_E_high_arr[0,1])
    table_E_high.iloc[1,0]='{0} ({1:.2f})'.format(table_E_high.iloc[1,0],expected_E_high_arr[1,0])
    table_E_high.iloc[1,1]='{0} ({1:.2f})'.format(table_E_high.iloc[1,1],expected_E_high_arr[1,1])
    # Set axes for contingency table against endothelia
    tabE_high_plot = fig.add_subplot(grid[n+8,1])
    tabE_high_plot.axis('tight')
    tabE_high_plot.axis('off')
    stringE_high = 'chi2: {0:.3f}, p: {1:.10f}, OR: {2:.5f}'.format(stat_E_high[0],stat_E_high[1],odds_E_high)
    
    
    ########## Drawing Contingency Tables ##########
    
    # Drawing the contingency table against neuron-low onto figure 
    cell_text = []
    for row in range(len(table_N_low)):
        cell_text.append(table_N_low.iloc[row])
    tabN_low = tabN_low_plot.table(cellText=cell_text, rowLabels=table_N_low.index,colLabels=table_N_low.columns, colWidths=[0.35, 0.35, 0.15], loc='center')
    tabN_low.auto_set_font_size(False)
    tabN_low.set_fontsize(7)
    # Drawing the contingency table against neuron-high onto figure
    cell_text = []
    for row in range(len(table_N_high)):
        cell_text.append(table_N_high.iloc[row])
    tabN_high = tabN_high_plot.table(cellText=cell_text, rowLabels=table_N_high.index,colLabels=table_N_high.columns, colWidths=[0.35, 0.35, 0.15], loc='center')
    tabN_high.auto_set_font_size(False)
    tabN_high.set_fontsize(7)
    
    # Drawing the contingency table against endothelia-low onto figure 
    cell_text = []
    for row in range(len(table_E_low)):
        cell_text.append(table_E_low.iloc[row])
    tabE_low = tabE_low_plot.table(cellText=cell_text, rowLabels=table_E_low.index,colLabels=table_E_low.columns, colWidths=[0.35, 0.35, 0.15], loc='center')
    tabE_low.auto_set_font_size(False)
    tabE_low.set_fontsize(7)
    # Drawing the contingency table against endothelia-high onto figure
    cell_text = []
    for row in range(len(table_E_high)):
        cell_text.append(table_E_high.iloc[row])
    tabE_high = tabE_high_plot.table(cellText=cell_text, rowLabels=table_E_high.index,colLabels=table_E_high.columns, colWidths=[0.35, 0.35, 0.15], loc='center')
    tabE_high.auto_set_font_size(False)
    tabE_high.set_fontsize(7)
    
    # Drawing the contingency table against astrocyte-low onto figure 
    cell_text = []
    for row in range(len(table_A_low)):
        cell_text.append(table_A_low.iloc[row])
    tabA_low = tabA_low_plot.table(cellText=cell_text, rowLabels=table_A_low.index,colLabels=table_A_low.columns, colWidths=[0.35, 0.35, 0.15], loc='center')
    tabA_low.auto_set_font_size(False)
    tabA_low.set_fontsize(7)
    # Drawing the contingency table against astrocyte-high onto figure
    cell_text = []
    for row in range(len(table_A_high)):
        cell_text.append(table_A_high.iloc[row])
    tabA_high = tabA_high_plot.table(cellText=cell_text, rowLabels=table_A_high.index,colLabels=table_A_high.columns, colWidths=[0.35, 0.35, 0.15], loc='center')
    tabA_high.auto_set_font_size(False)
    tabA_high.set_fontsize(7)
    
    # Drawing the contingency table against microglia-low onto figure 
    cell_text = []
    for row in range(len(table_M_low)):
        cell_text.append(table_M_low.iloc[row])
    tabM_low = tabM_low_plot.table(cellText=cell_text, rowLabels=table_M_low.index,colLabels=table_M_low.columns, colWidths=[0.35, 0.35, 0.15], loc='center')
    tabM_low.auto_set_font_size(False)
    tabM_low.set_fontsize(7)
    # Drawing the contingency table against microglia-high onto figure
    cell_text = []
    for row in range(len(table_M_high)):
        cell_text.append(table_M_high.iloc[row])
    tabM_high = tabM_high_plot.table(cellText=cell_text, rowLabels=table_M_high.index,colLabels=table_M_high.columns, colWidths=[0.35, 0.35, 0.15], loc='center')
    tabM_high.auto_set_font_size(False)
    tabM_high.set_fontsize(7)
    
    # Drawing the contingency table against oligodendrocyte-low onto figure 
    cell_text = []
    for row in range(len(table_O_low)):
        cell_text.append(table_O_low.iloc[row])
    tabO_low = tabO_low_plot.table(cellText=cell_text, rowLabels=table_O_low.index,colLabels=table_O_low.columns, colWidths=[0.28, 0.28, 0.11], loc='center')
    tabO_low.auto_set_font_size(False)
    tabO_low.set_fontsize(7)
    # Drawing the contingency table against oligodendrocyte-high onto figure
    cell_text = []
    for row in range(len(table_O_high)):
        cell_text.append(table_O_high.iloc[row])
    tabO_high = tabO_high_plot.table(cellText=cell_text, rowLabels=table_O_high.index,colLabels=table_O_high.columns, colWidths=[0.28, 0.28, 0.11], loc='center')
    tabO_high.auto_set_font_size(False)
    tabO_high.set_fontsize(7)
    
    ########## Writing statistics of each contingency table under the table ##########
    
    # Set axes for chi-square statistics for Neuron
    low_Nchi2_plot = fig.add_subplot(grid[n+7,0])
    low_Nchi2_plot.axis('tight')
    low_Nchi2_plot.axis('off')
    high_Nchi2_plot = fig.add_subplot(grid[n+7,1])
    high_Nchi2_plot.axis('tight')
    high_Nchi2_plot.axis('off')
    
    # Set axes for chi-square statistics for Endothilial cells
    low_Echi2_plot = fig.add_subplot(grid[n+9,0])
    low_Echi2_plot.axis('tight')
    low_Echi2_plot.axis('off')
    high_Echi2_plot = fig.add_subplot(grid[n+9,1])
    high_Echi2_plot.axis('tight')
    high_Echi2_plot.axis('off')
    
    # Set axes for chi-square statistics for Astrocyte
    low_Achi2_plot = fig.add_subplot(grid[n+1,0])
    low_Achi2_plot.axis('tight')
    low_Achi2_plot.axis('off')
    high_Achi2_plot = fig.add_subplot(grid[n+1,1])
    high_Achi2_plot.axis('tight')
    high_Achi2_plot.axis('off')
    
    # Set axes for chi-square statistics for Endothilial cells
    low_Mchi2_plot = fig.add_subplot(grid[n+3,0])
    low_Mchi2_plot.axis('tight')
    low_Mchi2_plot.axis('off')
    high_Mchi2_plot = fig.add_subplot(grid[n+3,1])
    high_Mchi2_plot.axis('tight')
    high_Mchi2_plot.axis('off')
    
    # Set axes for chi-square statistics for Oligodendrocytel cells
    low_Ochi2_plot = fig.add_subplot(grid[n+5,0])
    low_Ochi2_plot.axis('tight')
    low_Ochi2_plot.axis('off')
    high_Ochi2_plot = fig.add_subplot(grid[n+5,1])
    high_Ochi2_plot.axis('tight')
    high_Ochi2_plot.axis('off')
    
    low_Achi2_plot.text(0.25, 0.6, stringA_low,
        horizontalalignment='center',
        verticalalignment='center',
        transform=low_Achi2_plot.transAxes)
    high_Achi2_plot.text(0.25, 0.6, stringA_high,
        horizontalalignment='center',
        verticalalignment='center',
        transform=high_Achi2_plot.transAxes)
    
    low_Mchi2_plot.text(0.25, 0.6, stringM_low,
        horizontalalignment='center',
        verticalalignment='center',
        transform=low_Mchi2_plot.transAxes)
    high_Mchi2_plot.text(0.25, 0.6, stringM_high,
        horizontalalignment='center',
        verticalalignment='center',
        transform=high_Mchi2_plot.transAxes)
    
    low_Ochi2_plot.text(0.25, 0.6, stringO_low,
        horizontalalignment='center',
        verticalalignment='center',
        transform=low_Ochi2_plot.transAxes)
    high_Ochi2_plot.text(0.25, 0.6, stringO_high,
        horizontalalignment='center',
        verticalalignment='center',
        transform=high_Ochi2_plot.transAxes)
    
    low_Nchi2_plot.text(0.25, 0.6, stringN_low,
        horizontalalignment='center',
        verticalalignment='center',
        transform=low_Nchi2_plot.transAxes)
    high_Nchi2_plot.text(0.25, 0.6, stringN_high,
        horizontalalignment='center',
        verticalalignment='center',
        transform=high_Nchi2_plot.transAxes)
    
    low_Echi2_plot.text(0.25, 0.6, stringE_low,
        horizontalalignment='center',
        verticalalignment='center',
        transform=low_Echi2_plot.transAxes)
    high_Echi2_plot.text(0.25, 0.6, stringE_high,
        horizontalalignment='center',
        verticalalignment='center',
        transform=high_Echi2_plot.transAxes)
    
    tab_plot.Nchi2_num=stat_N_low[0]
    tab_plot.Np_num=stat_N_low[1]

In [117]:
# create a excel table of contingency analysis stats results for each GO
table_df=pd.DataFrame(columns=['GO','num_genes',
                            'n_med','low_n_chi2','low_n_p',
                            'low_n_OR','low_n_fisher_p',
                            'high_n_chi2','high_n_p',
                            'high_n_OR','high_n_fisher_p',
                            'e_med','low_e_chi2','low_e_p',
                            'low_e_OR','low_e_fisher_p',
                            'high_e_chi2','high_e_p',
                            'high_e_OR','high_e_fisher_p',
                            'o_med','low_o_chi2','low_o_p',
                            'low_o_OR','low_o_fisher_p',
                            'high_o_chi2','high_o_p',
                            'high_o_OR','high_o_fisher_p',
                            'a_med','low_a_chi2','low_a_p',
                            'low_a_OR','low_a_fisher_p',
                            'high_a_chi2','high_a_p',
                            'high_a_OR','high_a_fisher_p',
                            'm_med','low_m_chi2','low_m_p',
                            'low_m_OR','low_m_fisher_p',
                            'high_m_chi2','high_m_p',
                            'high_m_OR','high_m_fisher_p'])
for GO in go_list:
    df=gene_info_df.loc[gene_info_df[GO] == True].copy(deep=True)
    num_genes = df.shape[0]
    try:
        #Neuron
        n_med=df[df['Neuron-specific']=='In Neuron']['avg_dNdS'].median()
        #lower than threshold 
        low_n_table=pd.crosstab(df.loc[:,'Neuron-specific'],df.loc[:,'low_dN/dS'])
        (low_n_OR,low_n_fisher_p)=stats.fisher_exact(low_n_table)
        (low_n_chi2,low_n_p,low_n_dof,low_n_expected_table)=stats.chi2_contingency(low_n_table)
        #higher than threshold
        high_n_table=pd.crosstab(df.loc[:,'Neuron-specific'],df.loc[:,'high_dN/dS'])
        (high_n_OR,high_n_fisher_p)=stats.fisher_exact(high_n_table)
        (high_n_chi2,high_n_p,high_n_dof,high_n_expected_table)=stats.chi2_contingency(high_n_table)
        
        #Endothelia
        e_med=df[df['Endothelia-specific']=='In Endothelia']['avg_dNdS'].median()
        #lower than threshold 
        low_e_table=pd.crosstab(df.loc[:,'Endothelia-specific'],df.loc[:,'low_dN/dS'])
        (low_e_OR,low_e_fisher_p)=stats.fisher_exact(low_e_table)
        (low_e_chi2,low_e_p,low_e_dof,low_e_expected_table)=stats.chi2_contingency(low_e_table)
        #higher than threshold
        high_e_table=pd.crosstab(df.loc[:,'Endothelia-specific'],df.loc[:,'high_dN/dS'])
        (high_e_OR,high_e_fisher_p)=stats.fisher_exact(high_e_table)
        (high_e_chi2,high_e_p,high_e_dof,high_e_expected_table)=stats.chi2_contingency(high_e_table)
        
        #Oligodendrocyte
        o_med=df[df['Oligodendrocyte-specific']=='In Oligodendrocyte']['avg_dNdS'].median()
        #lower than threshold 
        low_o_table=pd.crosstab(df.loc[:,'Oligodendrocyte-specific'],df.loc[:,'low_dN/dS'])
        (low_o_OR,low_o_fisher_p)=stats.fisher_exact(low_o_table)
        (low_o_chi2,low_o_p,low_o_dof,low_o_expected_table)=stats.chi2_contingency(low_o_table)
        #higher than threshold
        high_o_table=pd.crosstab(df.loc[:,'Oligodendrocyte-specific'],df.loc[:,'high_dN/dS'])
        (high_o_OR,high_o_fisher_p)=stats.fisher_exact(high_o_table)
        (high_o_chi2,high_o_p,high_o_dof,high_o_expected_table)=stats.chi2_contingency(high_o_table)
        
        #Astrocyte
        a_med=df[df['Astrocyte-specific']=='In Astrocyte']['avg_dNdS'].median()
        #lower than threshold 
        low_a_table=pd.crosstab(df.loc[:,'Astrocyte-specific'],df.loc[:,'low_dN/dS'])
        (low_a_OR,low_a_fisher_p)=stats.fisher_exact(low_a_table)
        (low_a_chi2,low_a_p,low_a_dof,low_a_expected_table)=stats.chi2_contingency(low_a_table)
        #higher than threshold
        high_a_table=pd.crosstab(df.loc[:,'Astrocyte-specific'],df.loc[:,'high_dN/dS'])
        (high_a_OR,high_a_fisher_p)=stats.fisher_exact(high_a_table)
        (high_a_chi2,high_a_p,high_a_dof,high_a_expected_table)=stats.chi2_contingency(high_a_table)
        
        #Microglia
        m_med=df[df['Microglia-specific']=='In Microglia']['avg_dNdS'].median()
        #lower than threshold 
        low_m_table=pd.crosstab(df.loc[:,'Microglia-specific'],df.loc[:,'low_dN/dS'])
        (low_m_OR,low_m_fisher_p)=stats.fisher_exact(low_m_table)
        (low_m_chi2,low_m_p,low_m_dof,low_m_expected_table)=stats.chi2_contingency(low_m_table)
        #higher than threshold
        high_m_table=pd.crosstab(df.loc[:,'Microglia-specific'],df.loc[:,'high_dN/dS'])
        (high_m_OR,high_m_fisher_p)=stats.fisher_exact(high_m_table)
        (high_m_chi2,high_m_p,high_m_dof,high_m_expected_table)=stats.chi2_contingency(high_m_table)
        
        table_df=table_df.append({'GO':GO,'num_genes':num_genes,
                                  'n_med':n_med,'low_n_chi2':low_n_chi2,'low_n_p':low_n_p,
                                  'low_n_OR':low_n_OR,'low_n_fisher_p':low_n_fisher_p,
                                  'high_n_chi2':high_n_chi2,'high_n_p':high_n_p,
                                  'high_n_OR':high_n_OR,'high_n_fisher_p':high_n_fisher_p,
                                  'e_med':e_med,'low_e_chi2':low_e_chi2,'low_e_p':low_e_p,
                                  'low_e_OR':low_e_OR,'low_e_fisher_p':low_e_fisher_p,
                                  'high_e_chi2':high_e_chi2,'high_e_p':high_e_p,
                                  'high_e_OR':high_e_OR,'high_e_fisher_p':high_e_fisher_p,
                                  'o_med':o_med,'low_o_chi2':low_o_chi2,'low_o_p':low_o_p,
                                  'low_o_OR':low_o_OR,'low_o_fisher_p':low_o_fisher_p,
                                  'high_o_chi2':high_o_chi2,'high_o_p':high_o_p,
                                  'high_o_OR':high_o_OR,'high_o_fisher_p':high_o_fisher_p,
                                  'a_med':a_med,'low_a_chi2':low_a_chi2,'low_a_p':low_a_p,
                                  'low_a_OR':low_a_OR,'low_a_fisher_p':low_a_fisher_p,
                                  'high_a_chi2':high_a_chi2,'high_a_p':high_a_p,
                                  'high_a_OR':high_a_OR,'high_a_fisher_p':high_a_fisher_p,
                                  'm_med':m_med,'low_m_chi2':low_m_chi2,'low_m_p':low_m_p,
                                  'low_m_OR':low_m_OR,'low_m_fisher_p':low_m_fisher_p,
                                  'high_m_chi2':high_m_chi2,'high_m_p':high_m_p,
                                  'high_m_OR':high_m_OR,'high_m_fisher_p':high_m_fisher_p},ignore_index=True)
    except ValueError:
        pass
table_df

Unnamed: 0,GO,num_genes,n_med,low_n_chi2,low_n_p,low_n_OR,low_n_fisher_p,high_n_chi2,high_n_p,high_n_OR,...,high_a_fisher_p,m_med,low_m_chi2,low_m_p,low_m_OR,low_m_fisher_p,high_m_chi2,high_m_p,high_m_OR,high_m_fisher_p
0,anatomical structure development,1869,0.063210,47.597100,5.234585e-12,2.119090,9.044195e-12,30.798977,2.861887e-08,0.424984,...,0.151994,0.123924,31.725829,1.775452e-08,0.465951,5.659953e-09,66.614376,3.301695e-16,2.764076,4.534195e-15
1,cellular nitrogen compound metabolic process,1125,0.066106,27.176686,1.856844e-07,2.160191,2.565345e-07,13.725623,2.115486e-04,0.465263,...,0.577807,0.123924,20.743359,5.251339e-06,0.453216,2.693546e-06,14.997500,1.076537e-04,1.881017,1.368559e-04
2,biosynthetic process,1215,0.064167,24.268278,8.380800e-07,2.049371,1.208392e-06,10.906164,9.584479e-04,0.504907,...,0.036100,0.121065,14.266388,1.586731e-04,0.547072,1.144259e-04,16.513254,4.831119e-05,1.896961,5.831116e-05
3,transport,1418,0.061445,78.406864,8.386028e-19,2.922507,2.109942e-18,63.032457,2.033282e-15,0.223607,...,0.313113,0.131164,36.424246,1.587165e-09,0.406776,3.436219e-10,50.998890,9.241822e-13,2.687332,3.474776e-12
4,cell differentiation,1407,0.062169,35.819677,2.164507e-09,2.108750,2.815874e-09,22.556113,2.040941e-06,0.427589,...,0.794031,0.121836,30.594086,3.180618e-08,0.433082,9.463105e-09,62.924732,2.147586e-15,3.069347,2.701717e-14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,cytoskeleton-dependent intracellular transport,39,0.078257,1.053241,3.047619e-01,3.000000,2.552356e-01,0.602206,4.377376e-01,0.250000,...,1.000000,0.149288,0.280893,5.961166e-01,0.000000,5.557501e-01,0.000410,9.838518e-01,2.583333,4.361649e-01
58,protein folding,24,0.079997,0.259304,6.105989e-01,1.250000,1.000000e+00,0.630252,4.272629e-01,0.238095,...,0.249012,0.226065,0.259304,6.105989e-01,0.000000,5.296443e-01,1.235294,2.663799e-01,4.333333,1.670481e-01
59,vacuolar transport,22,0.082688,0.071837,7.886810e-01,0.800000,1.000000e+00,0.367586,5.443233e-01,1.083333,...,1.000000,0.057516,0.009860,9.209023e-01,1.600000,1.000000e+00,0.072652,7.875136e-01,1.875000,1.000000e+00
60,plasma membrane organization,44,0.053500,2.234921,1.349236e-01,3.750000,1.317389e-01,0.676923,4.106483e-01,0.000000,...,0.267158,0.121203,1.766249,1.838470e-01,0.000000,1.547337e-01,1.282591,2.574178e-01,5.833333,1.299954e-01


In [118]:
table_df.to_excel('GO_contingency_stats.xlsx')

In [75]:
df = gene_info_df
for GO in go_list:
    GO_df = df.loc[df[GO] == True][['Neuron-specific','Endothelia-specific','Astrocyte-specific','Microglia-specific','Oligodendrocyte-specific','avg_dNdS']].copy(deep=True)

    GO_df['Neuron-specific']=GO_df['Neuron-specific'].map({'In Neuron':1, 'NOT In Neuron':np.nan})
    GO_df['Endothelia-specific']=GO_df['Endothelia-specific'].map({'In Endothelia':1, 'NOT In Endothelia':np.nan})
    GO_df['Astrocyte-specific']=GO_df['Astrocyte-specific'].map({'In Astrocyte':1, 'NOT In Astrocyte':np.nan})
    GO_df['Microglia-specific']=GO_df['Microglia-specific'].map({'In Microglia':1, 'NOT In Microglia':np.nan})
    GO_df['Oligodendrocyte-specific']=GO_df['Oligodendrocyte-specific'].map({'In Oligodendrocyte':1, 'NOT In Oligodendrocyte':np.nan})

    GO_df['Neuron-specific_dN/dS'] = GO_df['avg_dNdS']*GO_df['Neuron-specific']
    GO_df['Endothelia-specific_dN/dS'] = GO_df['avg_dNdS']*GO_df['Endothelia-specific']
    GO_df['Astrocyte-specific_dN/dS'] = GO_df['avg_dNdS']*GO_df['Astrocyte-specific']
    GO_df['Microglia-specific_dN/dS'] = GO_df['avg_dNdS']*GO_df['Microglia-specific']
    GO_df['Oligodendrocyte-specific_dN/dS'] = GO_df['avg_dNdS']*GO_df['Oligodendrocyte-specific']

    GO_df.drop(GO_df.columns[0:5],axis=1,inplace=True)

    GO_avg = GO_df['avg_dNdS'].mean(axis=0)
    GO_med = GO_df['avg_dNdS'].median(axis=0)

    # get numpy array of each cell type's dN/dS scores without NaN
    N_arr = GO_df['Neuron-specific_dN/dS'].dropna().values
    E_arr = GO_df['Endothelia-specific_dN/dS'].dropna().values
    A_arr = GO_df['Astrocyte-specific_dN/dS'].dropna().values
    M_arr = GO_df['Microglia-specific_dN/dS'].dropna().values
    O_arr = GO_df['Oligodendrocyte-specific_dN/dS'].dropna().values

    # Creating a figure 
    fig = plt.figure(figsize=(7.5,10)) # Size of a letter size paper
    fig.suptitle('Five Celltypes Distribution of Average dN/dS Scores of Genes Related to'+"\n"+GO, fontsize=14)

    # Setting subplot space
    grid = plt.GridSpec(nrows=15,ncols=2)
    grid.update(wspace=0.3, hspace=0.3)

    # The subplot for distribution histogram 
    distr_plot = fig.add_subplot(grid[0:4,:])
    #distr_hist = dist_GO_df.plot(ax=distr_plot, kind='hist',alpha=0.3,bins=np.logspace(np.log10(0.001),np.log10(10), 100), logx=True, color=['royalblue','orange','lightgreen'])

    # Set up the bins for log scale x-axis, and get the centers
    bins=np.logspace(np.log10(0.001),np.log10(10), 100) # pizza
    bins_cntr = (bins[1:] + bins[:-1]) / 2

    # Distribution Histograms of the three cell types
    countsN, bin_edgesN, ignoredN = distr_plot.hist(N_arr, bins, histtype='stepfilled', alpha=0.2, 
                                               color='tab:red')
    countsE, bin_edgesE, ignoredE = distr_plot.hist(E_arr, bins, histtype='stepfilled', alpha=0.2, 
                                               color='tab:orange')
    countsA, bin_edgesA, ignoredA = distr_plot.hist(A_arr, bins, histtype='stepfilled', alpha=0.2, 
                                               color='lime')
    countsM, bin_edgesM, ignoredM = distr_plot.hist(M_arr, bins, histtype='stepfilled', alpha=0.2, 
                                               color='deepskyblue')
    countsO, bin_edgesO, ignoredO = distr_plot.hist(O_arr, bins, histtype='stepfilled', alpha=0.2, 
                                               color='aquamarine')

    
    # Log-normal Curve for Neuron
    try:
        # calculate area of histograms (area under PDF should be 1)
        area_histN = ((bin_edgesN[1:] - bin_edgesN[:-1]) * countsN).sum()
        # Fitting log normal to cell type specific dN/dS of this GO
        #shapeN, locN, scaleN = stats.lognorm.fit(N_arr, floc=0)
        shapeN, locN, scaleN = stats.lognorm.fit(N_arr)
        # pdf-values using cdf 
        N_fit_log_cntr_ = stats.lognorm.cdf(bins, shapeN, loc=locN, scale=scaleN)
        N_fit_log_cntr = np.diff(N_fit_log_cntr_)
        # plot fitted and scaled PDFs into histogram
        distr_plot.plot(bins_cntr, N_fit_log_cntr * countsN.sum(),color='tab:red',ls='-', 
                        label='Neuron(med={0:.3f})'.format(np.median(N_arr)), linewidth=2)
    except ValueError:
        pass

    # Log-normal Curve for Endothelial cells
    try:
        area_histE = ((bin_edgesE[1:] - bin_edgesE[:-1]) * countsE).sum()
        #shapeE, locE, scaleE = stats.lognorm.fit(E_arr, floc=0)
        shapeE, locE, scaleE = stats.lognorm.fit(E_arr)
        E_fit_log_cntr_ = stats.lognorm.cdf(bins, shapeE, loc=locE, scale=scaleE)
        E_fit_log_cntr = np.diff(E_fit_log_cntr_)
        distr_plot.plot(bins_cntr, E_fit_log_cntr * countsE.sum(),color='tab:orange',ls='-', 
                        label='Endothelia(med={0:.3f})'.format(np.median(E_arr)), linewidth=2)
    except ValueError:
         pass

    # Log-normal Curve for Microglia Cells
    try: 
        area_histM = ((bin_edgesM[1:] - bin_edgesM[:-1]) * countsM).sum()
        #shapeM, locM, scaleM = stats.lognorm.fit(M_arr, floc=0)
        shapeM, locM, scaleM = stats.lognorm.fit(M_arr)
        M_fit_log_cntr_ = stats.lognorm.cdf(bins, shapeM, loc=locM, scale=scaleM)
        M_fit_log_cntr = np.diff(M_fit_log_cntr_)
        distr_plot.plot(bins_cntr, M_fit_log_cntr * countsM.sum(),color='deepskyblue',ls='-', 
                        label='Microglia(med={0:.3f})'.format(np.median(M_arr)), linewidth=2)
    except ValueError:
        pass
    
    # Log-normal Curve for Oligodendrocyte Cells
    try: 
        area_histO = ((bin_edgesO[1:] - bin_edgesO[:-1]) * countsO).sum()
        #shapeO, locO, scaleO = stats.lognorm.fit(O_arr, floc=0)
        shapeO, locO, scaleO = stats.lognorm.fit(O_arr)
        O_fit_log_cntr_ = stats.lognorm.cdf(bins, shapeO, loc=locO, scale=scaleO)
        O_fit_log_cntr = np.diff(O_fit_log_cntr_)
        distr_plot.plot(bins_cntr, O_fit_log_cntr * countsO.sum(),color='aquamarine',ls='-', 
                        label='Oligodendrocyte(med={0:.3f})'.format(np.median(O_arr)), linewidth=2)
    except ValueError:
        pass
    
    # Log-normal Curve for Astrocyte Cells
    try: 
        area_histA = ((bin_edgesA[1:] - bin_edgesA[:-1]) * countsA).sum()
        #shapeA, locA, scaleA = stats.lognorm.fit(A_arr, floc=0)
        shapeA, locA, scaleA = stats.lognorm.fit(A_arr)
        A_fit_log_cntr_ = stats.lognorm.cdf(bins, shapeA, loc=locA, scale=scaleA)
        A_fit_log_cntr = np.diff(A_fit_log_cntr_)
        distr_plot.plot(bins_cntr, A_fit_log_cntr * countsA.sum(),color='lime',ls='-', 
                        label='Astrocyte(med={0:.3f})'.format(np.median(A_arr)), linewidth=2)
    except ValueError:
        pass

    # # If I want to include a shadow of all genes' distribution 
    # #dist_df.plot(ax=distr_plot,kind='hist',alpha=0.2,bins=bins, logx=True, color='beige')

    distr_plot.axvline(med, color='black', linestyle='-', label='All Brain Genes Median')
    distr_plot.axvline(high_cutoff, color='silver', linestyle='--', label='Quantile Cuttoffs')
    distr_plot.axvline(low_cutoff, color='silver', linestyle='--')
    distr_plot.axvline(GO_med, color='chocolate', linestyle='-.', label='Median of this GO')

    distr_plot.set_xlabel(xlabel='dN/dS')
    distr_plot.set_ylabel(ylabel='number of genes')
    distr_plot.set_xscale('log')
    distr_plot.legend(loc='best')

    # The contingency tables and the statisitcs tables 
    try:
        tab_plot(fig,df,GO)
#         Nchi2_num=tab_plot.Nchi2_num
#         Nchi2_num=f"{Nchi2_num:.3f}"
        Np_num=tab_plot.Np_num
        Np_num=f"{Np_num:.5f}"
    except IndexError:
        pass
    except ValueError:
        pass
    
    plt.subplots_adjust(right=0.98)
    fig.savefig('/Users/xulinhe/OneDrive/Herculano_Lab/Zhang_et_al_2014/Feb_17_2020_Ensembl98_contingency_analysis/'+Np_num+' '+GO+' contingency analysis.pdf')
    plt.close()

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nanatomical structure development')

[<matplotlib.lines.Line2D at 0x1c2d763bd0>]

[<matplotlib.lines.Line2D at 0x1c2d76d690>]

[<matplotlib.lines.Line2D at 0x1c2d6f5ad0>]

[<matplotlib.lines.Line2D at 0x1c2d784990>]

[<matplotlib.lines.Line2D at 0x1c2d7840d0>]

<matplotlib.lines.Line2D at 0x1c2d7746d0>

<matplotlib.lines.Line2D at 0x1c2d774d50>

<matplotlib.lines.Line2D at 0x1c2d774790>

<matplotlib.lines.Line2D at 0x1c2d901810>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c2d6f7b90>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\ncellular nitrogen compound metabolic process')

[<matplotlib.lines.Line2D at 0x1c2e437e10>]

[<matplotlib.lines.Line2D at 0x1c2e48d9d0>]

[<matplotlib.lines.Line2D at 0x1c2e437a50>]

[<matplotlib.lines.Line2D at 0x1c2e48db50>]

[<matplotlib.lines.Line2D at 0x1c2e48d7d0>]

<matplotlib.lines.Line2D at 0x1c2e421f90>

<matplotlib.lines.Line2D at 0x1c2e435690>

<matplotlib.lines.Line2D at 0x1c2e435c50>

<matplotlib.lines.Line2D at 0x1c2e435d90>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c2e435490>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nbiosynthetic process')

[<matplotlib.lines.Line2D at 0x1c2f21b8d0>]

[<matplotlib.lines.Line2D at 0x1c2f234d50>]

[<matplotlib.lines.Line2D at 0x1c2f234bd0>]

[<matplotlib.lines.Line2D at 0x1c2e48d310>]

[<matplotlib.lines.Line2D at 0x1c2f212990>]

<matplotlib.lines.Line2D at 0x1c2f212850>

<matplotlib.lines.Line2D at 0x1c2f214ad0>

<matplotlib.lines.Line2D at 0x1c2f214f50>

<matplotlib.lines.Line2D at 0x1c2f21d410>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c2f2147d0>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\ntransport')

[<matplotlib.lines.Line2D at 0x1c2fc17bd0>]

[<matplotlib.lines.Line2D at 0x1c2fc0ad90>]

[<matplotlib.lines.Line2D at 0x1c2fd30cd0>]

[<matplotlib.lines.Line2D at 0x1c2fc35990>]

[<matplotlib.lines.Line2D at 0x1c2fc35c50>]

<matplotlib.lines.Line2D at 0x1c2f9e82d0>

<matplotlib.lines.Line2D at 0x1c2fc358d0>

<matplotlib.lines.Line2D at 0x1c2fd05e50>

<matplotlib.lines.Line2D at 0x1c2fd0b990>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c2fd05bd0>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\ncell differentiation')

[<matplotlib.lines.Line2D at 0x1c3099ec50>]

[<matplotlib.lines.Line2D at 0x1c309af350>]

[<matplotlib.lines.Line2D at 0x1c309cce10>]

[<matplotlib.lines.Line2D at 0x1c309cccd0>]

[<matplotlib.lines.Line2D at 0x1c309ccc10>]

<matplotlib.lines.Line2D at 0x1c30a7ccd0>

<matplotlib.lines.Line2D at 0x1c30a7cdd0>

<matplotlib.lines.Line2D at 0x1c30a7e890>

<matplotlib.lines.Line2D at 0x1c30a7ee10>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c30a7e2d0>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nresponse to stress')

[<matplotlib.lines.Line2D at 0x1c317a1e10>]

[<matplotlib.lines.Line2D at 0x1c318109d0>]

[<matplotlib.lines.Line2D at 0x1c31810f50>]

[<matplotlib.lines.Line2D at 0x1c3179c590>]

[<matplotlib.lines.Line2D at 0x1c3179ca90>]

<matplotlib.lines.Line2D at 0x1c3179cd10>

<matplotlib.lines.Line2D at 0x1c31810a90>

<matplotlib.lines.Line2D at 0x1c31810a50>

<matplotlib.lines.Line2D at 0x1c317a0d10>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c317a0750>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\ncellular protein modification process')

[<matplotlib.lines.Line2D at 0x1c32498750>]

[<matplotlib.lines.Line2D at 0x1c31810310>]

[<matplotlib.lines.Line2D at 0x1c324a3b10>]

[<matplotlib.lines.Line2D at 0x1c324a3a90>]

[<matplotlib.lines.Line2D at 0x1c324b4d50>]

<matplotlib.lines.Line2D at 0x1c324d8310>

<matplotlib.lines.Line2D at 0x1c324d8a10>

<matplotlib.lines.Line2D at 0x1c324d88d0>

<matplotlib.lines.Line2D at 0x1c324d8fd0>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c324b8110>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\ncellular component assembly')

[<matplotlib.lines.Line2D at 0x1c324bc990>]

[<matplotlib.lines.Line2D at 0x1c33275a50>]

[<matplotlib.lines.Line2D at 0x1c33207b10>]

[<matplotlib.lines.Line2D at 0x1c3323a690>]

[<matplotlib.lines.Line2D at 0x1c33207fd0>]

<matplotlib.lines.Line2D at 0x1c33247750>

<matplotlib.lines.Line2D at 0x1c33247950>

<matplotlib.lines.Line2D at 0x1c332b6190>

<matplotlib.lines.Line2D at 0x1c332b68d0>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c3323d510>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nimmune system process')

[<matplotlib.lines.Line2D at 0x1c3481fbd0>]

[<matplotlib.lines.Line2D at 0x1c34837f90>]

[<matplotlib.lines.Line2D at 0x1c34818750>]

[<matplotlib.lines.Line2D at 0x1c34818950>]

[<matplotlib.lines.Line2D at 0x1c34818c10>]

<matplotlib.lines.Line2D at 0x1c34818e10>

<matplotlib.lines.Line2D at 0x1c34829d50>

<matplotlib.lines.Line2D at 0x1c34831450>

<matplotlib.lines.Line2D at 0x1c348312d0>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c347f2590>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\ncatabolic process')

[<matplotlib.lines.Line2D at 0x1c34837950>]

[<matplotlib.lines.Line2D at 0x1c35e34cd0>]

[<matplotlib.lines.Line2D at 0x1c35dff750>]

[<matplotlib.lines.Line2D at 0x1c35dffa10>]

[<matplotlib.lines.Line2D at 0x1c35dffd10>]

<matplotlib.lines.Line2D at 0x1c35e09810>

<matplotlib.lines.Line2D at 0x1c35e09dd0>

<matplotlib.lines.Line2D at 0x1c35e0b350>

<matplotlib.lines.Line2D at 0x1c35e0b290>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c35dffc50>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nnervous system process')

[<matplotlib.lines.Line2D at 0x1c37617d90>]

[<matplotlib.lines.Line2D at 0x1c376967d0>]

[<matplotlib.lines.Line2D at 0x1c365f8c50>]

[<matplotlib.lines.Line2D at 0x1c37696150>]

[<matplotlib.lines.Line2D at 0x1c37696850>]

<matplotlib.lines.Line2D at 0x1c37738f10>

<matplotlib.lines.Line2D at 0x1c37738d10>

<matplotlib.lines.Line2D at 0x1c37755a90>

<matplotlib.lines.Line2D at 0x1c37755990>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c377553d0>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\ncell death')

[<matplotlib.lines.Line2D at 0x1c39fd6ad0>]

[<matplotlib.lines.Line2D at 0x1c39feee10>]

[<matplotlib.lines.Line2D at 0x1c3a02ec90>]

[<matplotlib.lines.Line2D at 0x1c3a0454d0>]

[<matplotlib.lines.Line2D at 0x1c3a045410>]

<matplotlib.lines.Line2D at 0x1c3a030750>

<matplotlib.lines.Line2D at 0x1c3a03a1d0>

<matplotlib.lines.Line2D at 0x1c3a03a750>

<matplotlib.lines.Line2D at 0x1c3a03acd0>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c3a02ef10>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\ncell proliferation')

[<matplotlib.lines.Line2D at 0x1c3c589f50>]

[<matplotlib.lines.Line2D at 0x1c3c5a3c10>]

[<matplotlib.lines.Line2D at 0x1c3c5a3f10>]

[<matplotlib.lines.Line2D at 0x1c3c63dfd0>]

[<matplotlib.lines.Line2D at 0x1c3c63df90>]

<matplotlib.lines.Line2D at 0x1c3c58c910>

<matplotlib.lines.Line2D at 0x1c3c58cf10>

<matplotlib.lines.Line2D at 0x1c3c598590>

<matplotlib.lines.Line2D at 0x1c3c58ced0>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c3c579dd0>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nhomeostatic process')

[<matplotlib.lines.Line2D at 0x1c3db7cf50>]

[<matplotlib.lines.Line2D at 0x1c3db6da10>]

[<matplotlib.lines.Line2D at 0x1c3db93910>]

[<matplotlib.lines.Line2D at 0x1c3db93050>]

[<matplotlib.lines.Line2D at 0x1c3db93d50>]

<matplotlib.lines.Line2D at 0x1c3dbc3c50>

<matplotlib.lines.Line2D at 0x1c3dbc38d0>

<matplotlib.lines.Line2D at 0x1c3dbb2950>

<matplotlib.lines.Line2D at 0x1c3dbb2850>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c3dbb2fd0>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nlocomotion')

[<matplotlib.lines.Line2D at 0x1c3e31ec90>]

[<matplotlib.lines.Line2D at 0x1c3e2b3ad0>]

[<matplotlib.lines.Line2D at 0x1c3e2b3a50>]

[<matplotlib.lines.Line2D at 0x1c3e29a790>]

[<matplotlib.lines.Line2D at 0x1c3e2c0f50>]

<matplotlib.lines.Line2D at 0x1c3e2c0f10>

<matplotlib.lines.Line2D at 0x1c3e2e4a50>

<matplotlib.lines.Line2D at 0x1c3d503a50>

<matplotlib.lines.Line2D at 0x1c3e2fcd90>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c3e2a1750>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nsmall molecule metabolic process')

[<matplotlib.lines.Line2D at 0x1c2b137710>]

[<matplotlib.lines.Line2D at 0x1c2acb61d0>]

[<matplotlib.lines.Line2D at 0x1c2a39f8d0>]

[<matplotlib.lines.Line2D at 0x1c2a3bb6d0>]

[<matplotlib.lines.Line2D at 0x1c2a3bb190>]

<matplotlib.lines.Line2D at 0x1c2a3b6f10>

<matplotlib.lines.Line2D at 0x1c2a3b6d50>

<matplotlib.lines.Line2D at 0x1c2a1b5490>

<matplotlib.lines.Line2D at 0x1c2a1b5c90>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c2a1b5150>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nprotein-containing complex assembly')

[<matplotlib.lines.Line2D at 0x1c2609c650>]

[<matplotlib.lines.Line2D at 0x1c26112510>]

[<matplotlib.lines.Line2D at 0x1c2569a990>]

[<matplotlib.lines.Line2D at 0x1c2569ae50>]

[<matplotlib.lines.Line2D at 0x1c2552c990>]

<matplotlib.lines.Line2D at 0x1c25474310>

<matplotlib.lines.Line2D at 0x1c25474c50>

<matplotlib.lines.Line2D at 0x1c25474cd0>

<matplotlib.lines.Line2D at 0x1c2561fe50>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c254742d0>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\ncell cycle')

[<matplotlib.lines.Line2D at 0x1c39afc090>]

[<matplotlib.lines.Line2D at 0x1c38ce4d10>]

[<matplotlib.lines.Line2D at 0x1c3b073fd0>]

[<matplotlib.lines.Line2D at 0x1c384f59d0>]

[<matplotlib.lines.Line2D at 0x1c384f5350>]

<matplotlib.lines.Line2D at 0x1c385ff510>

<matplotlib.lines.Line2D at 0x1c385ffbd0>

<matplotlib.lines.Line2D at 0x1c37ec37d0>

<matplotlib.lines.Line2D at 0x1c37ec39d0>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c37ec32d0>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\ncell motility')

[<matplotlib.lines.Line2D at 0x1c30659f10>]

[<matplotlib.lines.Line2D at 0x1c2ef20bd0>]

[<matplotlib.lines.Line2D at 0x1c35955050>]

[<matplotlib.lines.Line2D at 0x1c2ff2ab50>]

[<matplotlib.lines.Line2D at 0x1c2f7fc0d0>]

<matplotlib.lines.Line2D at 0x1c2a06ddd0>

<matplotlib.lines.Line2D at 0x1c2a06d790>

<matplotlib.lines.Line2D at 0x1c2a06d4d0>

<matplotlib.lines.Line2D at 0x1c2cd935d0>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c3b8c1ad0>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\ncell-cell signaling')

[<matplotlib.lines.Line2D at 0x1c36f80bd0>]

[<matplotlib.lines.Line2D at 0x1c2ccc1d90>]

[<matplotlib.lines.Line2D at 0x1c36f7bed0>]

[<matplotlib.lines.Line2D at 0x1c36fa8a90>]

[<matplotlib.lines.Line2D at 0x1c36fa8bd0>]

<matplotlib.lines.Line2D at 0x1c36f7d790>

<matplotlib.lines.Line2D at 0x1c36fa8b90>

<matplotlib.lines.Line2D at 0x1c36f8f2d0>

<matplotlib.lines.Line2D at 0x1c36f8f250>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c36fa8f50>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nreproduction')

[<matplotlib.lines.Line2D at 0x1c3c153f90>]

[<matplotlib.lines.Line2D at 0x1c3c153990>]

[<matplotlib.lines.Line2D at 0x1c3c173a90>]

[<matplotlib.lines.Line2D at 0x1c3c173150>]

[<matplotlib.lines.Line2D at 0x1c3c1737d0>]

<matplotlib.lines.Line2D at 0x1c3c151ed0>

<matplotlib.lines.Line2D at 0x1c3c171590>

<matplotlib.lines.Line2D at 0x1c3c171b90>

<matplotlib.lines.Line2D at 0x1c3c171c90>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c2bee8090>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nvesicle-mediated transport')

[<matplotlib.lines.Line2D at 0x1c3c173250>]

[<matplotlib.lines.Line2D at 0x1c2bf4e7d0>]

[<matplotlib.lines.Line2D at 0x1c26029f50>]

[<matplotlib.lines.Line2D at 0x1c26029510>]

[<matplotlib.lines.Line2D at 0x1c3aa10e50>]

<matplotlib.lines.Line2D at 0x1c3aa10a90>

<matplotlib.lines.Line2D at 0x1c3aa09b50>

<matplotlib.lines.Line2D at 0x1c3aa097d0>

<matplotlib.lines.Line2D at 0x1c3aa2e6d0>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c3aa09590>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\ncytoskeleton organization')

[<matplotlib.lines.Line2D at 0x1c25fb1c50>]

[<matplotlib.lines.Line2D at 0x1c256a5a10>]

[<matplotlib.lines.Line2D at 0x1c256a56d0>]

[<matplotlib.lines.Line2D at 0x1c25692f50>]

[<matplotlib.lines.Line2D at 0x1c25692e10>]

<matplotlib.lines.Line2D at 0x1c25fae510>

<matplotlib.lines.Line2D at 0x1c25faefd0>

<matplotlib.lines.Line2D at 0x1c256b5550>

<matplotlib.lines.Line2D at 0x1c256b5bd0>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c25faed90>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\ncell adhesion')

[<matplotlib.lines.Line2D at 0x1c358eb710>]

[<matplotlib.lines.Line2D at 0x1c358c4c50>]

[<matplotlib.lines.Line2D at 0x1c33bfd790>]

[<matplotlib.lines.Line2D at 0x1c33bfd090>]

[<matplotlib.lines.Line2D at 0x1c33c31c10>]

<matplotlib.lines.Line2D at 0x1c33c038d0>

<matplotlib.lines.Line2D at 0x1c33c03e50>

<matplotlib.lines.Line2D at 0x1c33c19410>

<matplotlib.lines.Line2D at 0x1c33c19a50>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c33bfda10>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\ntransmembrane transport')

[<matplotlib.lines.Line2D at 0x1c20f69d90>]

[<matplotlib.lines.Line2D at 0x1c20f33490>]

[<matplotlib.lines.Line2D at 0x1c3bb5da90>]

[<matplotlib.lines.Line2D at 0x1c21850610>]

[<matplotlib.lines.Line2D at 0x1c3bb5ddd0>]

<matplotlib.lines.Line2D at 0x1c3bb5ded0>

<matplotlib.lines.Line2D at 0x1c2183b550>

<matplotlib.lines.Line2D at 0x1c2183bb90>

<matplotlib.lines.Line2D at 0x1c2183b510>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c21850310>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nlipid metabolic process')

[<matplotlib.lines.Line2D at 0x1c27513d10>]

[<matplotlib.lines.Line2D at 0x1c2752c6d0>]

[<matplotlib.lines.Line2D at 0x1c280722d0>]

[<matplotlib.lines.Line2D at 0x1c3c3c5fd0>]

[<matplotlib.lines.Line2D at 0x1c3c3c5e90>]

<matplotlib.lines.Line2D at 0x1c27525a90>

<matplotlib.lines.Line2D at 0x1c27525290>

<matplotlib.lines.Line2D at 0x1c275335d0>

<matplotlib.lines.Line2D at 0x1c27533bd0>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c2752cc10>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nembryo development')

[<matplotlib.lines.Line2D at 0x1c2c142d10>]

[<matplotlib.lines.Line2D at 0x1c380efa90>]

[<matplotlib.lines.Line2D at 0x1c380e8c50>]

[<matplotlib.lines.Line2D at 0x1c380e8e50>]

[<matplotlib.lines.Line2D at 0x1c380e8c90>]

<matplotlib.lines.Line2D at 0x1c380ccd10>

<matplotlib.lines.Line2D at 0x1c3a884450>

<matplotlib.lines.Line2D at 0x1c3a884950>

<matplotlib.lines.Line2D at 0x1c3a884f50>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c3a884250>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nanatomical structure formation involved in morphogenesis')

[<matplotlib.lines.Line2D at 0x1c2b9d6dd0>]

[<matplotlib.lines.Line2D at 0x1c2abdc590>]

[<matplotlib.lines.Line2D at 0x1c2b647090>]

[<matplotlib.lines.Line2D at 0x1c2abbe950>]

[<matplotlib.lines.Line2D at 0x1c2abbefd0>]

<matplotlib.lines.Line2D at 0x1c2abc6790>

<matplotlib.lines.Line2D at 0x1c2abc6450>

<matplotlib.lines.Line2D at 0x1c2abbeed0>

<matplotlib.lines.Line2D at 0x1c2abc0a90>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c2abc6dd0>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nchromosome organization')

[<matplotlib.lines.Line2D at 0x1c3ba9d790>]

[<matplotlib.lines.Line2D at 0x1c3a503510>]

[<matplotlib.lines.Line2D at 0x1c3ba8ebd0>]

[<matplotlib.lines.Line2D at 0x1c3a51d850>]

[<matplotlib.lines.Line2D at 0x1c3a51ddd0>]

<matplotlib.lines.Line2D at 0x1c38817a90>

<matplotlib.lines.Line2D at 0x1c38817f50>

<matplotlib.lines.Line2D at 0x1c38821490>

<matplotlib.lines.Line2D at 0x1c38821990>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c38817ed0>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\ncell morphogenesis')

[<matplotlib.lines.Line2D at 0x1c3a503ad0>]

[<matplotlib.lines.Line2D at 0x1c35c94990>]

[<matplotlib.lines.Line2D at 0x1c35c94f10>]

[<matplotlib.lines.Line2D at 0x1c35c94f50>]

[<matplotlib.lines.Line2D at 0x1c35c94f90>]

<matplotlib.lines.Line2D at 0x1c35cadfd0>

<matplotlib.lines.Line2D at 0x1c35cad750>

<matplotlib.lines.Line2D at 0x1c3c32bd50>

<matplotlib.lines.Line2D at 0x1c3c32b710>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c35cad4d0>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\ngrowth')

[<matplotlib.lines.Line2D at 0x1c3b77add0>]

[<matplotlib.lines.Line2D at 0x1c3b785850>]

[<matplotlib.lines.Line2D at 0x1c3b7978d0>]

[<matplotlib.lines.Line2D at 0x1c3b796690>]

[<matplotlib.lines.Line2D at 0x1c3b796310>]

<matplotlib.lines.Line2D at 0x1c38e6aa10>

<matplotlib.lines.Line2D at 0x1c38e6ac10>

<matplotlib.lines.Line2D at 0x1c38e6f7d0>

<matplotlib.lines.Line2D at 0x1c38e6fdd0>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c3b785d90>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nDNA metabolic process')

[<matplotlib.lines.Line2D at 0x1c3b4cdc50>]

[<matplotlib.lines.Line2D at 0x1c3b4c7a90>]

[<matplotlib.lines.Line2D at 0x1c3b4dba10>]

[<matplotlib.lines.Line2D at 0x1c3b4d1b10>]

[<matplotlib.lines.Line2D at 0x1c3b4d1190>]

<matplotlib.lines.Line2D at 0x1c3b4e9950>

<matplotlib.lines.Line2D at 0x1c3b4e9f10>

<matplotlib.lines.Line2D at 0x1c2a0db390>

<matplotlib.lines.Line2D at 0x1c2a0db110>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c3b4c7c10>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nmitotic cell cycle')

[<matplotlib.lines.Line2D at 0x1c3b4c7c90>]

[<matplotlib.lines.Line2D at 0x1c296c77d0>]

[<matplotlib.lines.Line2D at 0x1c2526e5d0>]

[<matplotlib.lines.Line2D at 0x1c2526eb10>]

[<matplotlib.lines.Line2D at 0x1c2526ed10>]

<matplotlib.lines.Line2D at 0x1c239ded90>

<matplotlib.lines.Line2D at 0x1c252565d0>

<matplotlib.lines.Line2D at 0x1c252741d0>

<matplotlib.lines.Line2D at 0x1c252401d0>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c25256a90>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nmembrane organization')

[<matplotlib.lines.Line2D at 0x1c3a36ab90>]

[<matplotlib.lines.Line2D at 0x1c3a373f90>]

[<matplotlib.lines.Line2D at 0x1c3a373610>]

[<matplotlib.lines.Line2D at 0x1c3a512e90>]

[<matplotlib.lines.Line2D at 0x1c3a373690>]

<matplotlib.lines.Line2D at 0x1c3c49c590>

<matplotlib.lines.Line2D at 0x1c3c49ccd0>

<matplotlib.lines.Line2D at 0x1c3c483250>

<matplotlib.lines.Line2D at 0x1c3c483b10>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c3c4882d0>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\ncell division')

[<matplotlib.lines.Line2D at 0x1c26c949d0>]

[<matplotlib.lines.Line2D at 0x1c27be5610>]

[<matplotlib.lines.Line2D at 0x1c26c7aad0>]

[<matplotlib.lines.Line2D at 0x1c24d87f10>]

[<matplotlib.lines.Line2D at 0x1c24d870d0>]

<matplotlib.lines.Line2D at 0x1c27bc0a50>

<matplotlib.lines.Line2D at 0x1c27bda290>

<matplotlib.lines.Line2D at 0x1c27bda9d0>

<matplotlib.lines.Line2D at 0x1c27bda8d0>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c26c9f350>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\ntranslation')

[<matplotlib.lines.Line2D at 0x1c2b71dc10>]

[<matplotlib.lines.Line2D at 0x1c2b71b690>]

[<matplotlib.lines.Line2D at 0x1c2b71b9d0>]

[<matplotlib.lines.Line2D at 0x1c2b74fa10>]

[<matplotlib.lines.Line2D at 0x1c2b74f150>]

<matplotlib.lines.Line2D at 0x1c2b6fea10>

<matplotlib.lines.Line2D at 0x1c2b6fee10>

<matplotlib.lines.Line2D at 0x1c2b7062d0>

<matplotlib.lines.Line2D at 0x1c2b706a90>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c2b6fee50>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\ncarbohydrate metabolic process')

[<matplotlib.lines.Line2D at 0x1c30272c50>]

[<matplotlib.lines.Line2D at 0x1c2385de90>]

[<matplotlib.lines.Line2D at 0x1c30d9b690>]

[<matplotlib.lines.Line2D at 0x1c2385ded0>]

[<matplotlib.lines.Line2D at 0x1c30d9b850>]

<matplotlib.lines.Line2D at 0x1c23845990>

<matplotlib.lines.Line2D at 0x1c238441d0>

<matplotlib.lines.Line2D at 0x1c23844910>

<matplotlib.lines.Line2D at 0x1c23844690>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c238741d0>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\ncofactor metabolic process')

[<matplotlib.lines.Line2D at 0x1c237945d0>]

[<matplotlib.lines.Line2D at 0x1c23f2f510>]

[<matplotlib.lines.Line2D at 0x1c32a50b50>]

[<matplotlib.lines.Line2D at 0x1c32a50090>]

[<matplotlib.lines.Line2D at 0x1c32a50bd0>]

<matplotlib.lines.Line2D at 0x1c23f10b10>

<matplotlib.lines.Line2D at 0x1c23f2c1d0>

<matplotlib.lines.Line2D at 0x1c23f2c8d0>

<matplotlib.lines.Line2D at 0x1c23f2c7d0>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c23f2ced0>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\ncirculatory system process')

[<matplotlib.lines.Line2D at 0x1c2cc82c10>]

[<matplotlib.lines.Line2D at 0x1c378caa50>]

[<matplotlib.lines.Line2D at 0x1c378cad10>]

[<matplotlib.lines.Line2D at 0x1c378bd890>]

[<matplotlib.lines.Line2D at 0x1c378bdd10>]

<matplotlib.lines.Line2D at 0x1c3a171c10>

<matplotlib.lines.Line2D at 0x1c3a171bd0>

<matplotlib.lines.Line2D at 0x1c3a143910>

<matplotlib.lines.Line2D at 0x1c3a143dd0>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c3a143250>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nmitochondrion organization')

[<matplotlib.lines.Line2D at 0x1c29462d10>]

[<matplotlib.lines.Line2D at 0x1c2945eb50>]

[<matplotlib.lines.Line2D at 0x1c376e8810>]

[<matplotlib.lines.Line2D at 0x1c36a9af10>]

[<matplotlib.lines.Line2D at 0x1c36a9a250>]

<matplotlib.lines.Line2D at 0x1c29471b90>

<matplotlib.lines.Line2D at 0x1c29471f90>

<matplotlib.lines.Line2D at 0x1c29471390>

<matplotlib.lines.Line2D at 0x1c29477d10>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c2945ecd0>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nmRNA processing')

[<matplotlib.lines.Line2D at 0x1c283b0310>]

[<matplotlib.lines.Line2D at 0x1c283b0bd0>]

[<matplotlib.lines.Line2D at 0x1c28391cd0>]

[<matplotlib.lines.Line2D at 0x1c28391590>]

[<matplotlib.lines.Line2D at 0x1c2a5cfd50>]

<matplotlib.lines.Line2D at 0x1c2a5df290>

<matplotlib.lines.Line2D at 0x1c2a5df990>

<matplotlib.lines.Line2D at 0x1c2a5dfb90>

<matplotlib.lines.Line2D at 0x1c2a5cf990>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c2a5df950>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nnucleobase-containing compound catabolic process')

[<matplotlib.lines.Line2D at 0x1c39954750>]

[<matplotlib.lines.Line2D at 0x1c2b1d2110>]

[<matplotlib.lines.Line2D at 0x1c39975a10>]

[<matplotlib.lines.Line2D at 0x1c39975c10>]

[<matplotlib.lines.Line2D at 0x1c266c7d50>]

<matplotlib.lines.Line2D at 0x1c266cb110>

<matplotlib.lines.Line2D at 0x1c266cb610>

<matplotlib.lines.Line2D at 0x1c266cbe10>

<matplotlib.lines.Line2D at 0x1c39949390>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c266e8050>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nautophagy')

[<matplotlib.lines.Line2D at 0x1c2d07bcd0>]

[<matplotlib.lines.Line2D at 0x1c2d0ab750>]

[<matplotlib.lines.Line2D at 0x1c272d6750>]

[<matplotlib.lines.Line2D at 0x1c2ef7bed0>]

[<matplotlib.lines.Line2D at 0x1c2ef7bf10>]

<matplotlib.lines.Line2D at 0x1c2d09db50>

<matplotlib.lines.Line2D at 0x1c2d0a3250>

<matplotlib.lines.Line2D at 0x1c2d0a3990>

<matplotlib.lines.Line2D at 0x1c2d0a30d0>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c24b4aad0>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\ngeneration of precursor metabolites and energy')

[<matplotlib.lines.Line2D at 0x1c2674c810>]

[<matplotlib.lines.Line2D at 0x1c2d0abe50>]

[<matplotlib.lines.Line2D at 0x1c26758a90>]

[<matplotlib.lines.Line2D at 0x1c26751cd0>]

[<matplotlib.lines.Line2D at 0x1c26771d90>]

<matplotlib.lines.Line2D at 0x1c2e90f110>

<matplotlib.lines.Line2D at 0x1c2e90f650>

<matplotlib.lines.Line2D at 0x1c2e90fe50>

<matplotlib.lines.Line2D at 0x1c2e907210>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c2e8f90d0>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nsymbiont process')

[<matplotlib.lines.Line2D at 0x1c26751590>]

[<matplotlib.lines.Line2D at 0x1c36b20290>]

[<matplotlib.lines.Line2D at 0x1c2f5c4c50>]

[<matplotlib.lines.Line2D at 0x1c2f5c4cd0>]

[<matplotlib.lines.Line2D at 0x1c2776eed0>]

<matplotlib.lines.Line2D at 0x1c2776e9d0>

<matplotlib.lines.Line2D at 0x1c2774c790>

<matplotlib.lines.Line2D at 0x1c2f5ec250>

<matplotlib.lines.Line2D at 0x1c2f5ec090>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c27753490>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nchromosome segregation')

[<matplotlib.lines.Line2D at 0x1c26990bd0>]

[<matplotlib.lines.Line2D at 0x1c31aa17d0>]

[<matplotlib.lines.Line2D at 0x1c269a0e10>]

[<matplotlib.lines.Line2D at 0x1c269a0290>]

[<matplotlib.lines.Line2D at 0x1c26979e50>]

<matplotlib.lines.Line2D at 0x1c26979ad0>

<matplotlib.lines.Line2D at 0x1c269965d0>

<matplotlib.lines.Line2D at 0x1c26979dd0>

<matplotlib.lines.Line2D at 0x1c269ac790>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c269a08d0>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\ndevelopmental maturation')

[<matplotlib.lines.Line2D at 0x1c23127dd0>]

[<matplotlib.lines.Line2D at 0x1c23914bd0>]

[<matplotlib.lines.Line2D at 0x1c23924e50>]

[<matplotlib.lines.Line2D at 0x1c239240d0>]

[<matplotlib.lines.Line2D at 0x1c23903410>]

<matplotlib.lines.Line2D at 0x1c23903d10>

<matplotlib.lines.Line2D at 0x1c23903f10>

<matplotlib.lines.Line2D at 0x1c37071590>

<matplotlib.lines.Line2D at 0x1c23903ed0>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c37071410>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nprotein maturation')

[<matplotlib.lines.Line2D at 0x1c239241d0>]

[<matplotlib.lines.Line2D at 0x1c251c5610>]

[<matplotlib.lines.Line2D at 0x1c251c5f90>]

[<matplotlib.lines.Line2D at 0x1c251e8590>]

[<matplotlib.lines.Line2D at 0x1c251e84d0>]

<matplotlib.lines.Line2D at 0x1c251c50d0>

<matplotlib.lines.Line2D at 0x1c251e8950>

<matplotlib.lines.Line2D at 0x1c251c2310>

<matplotlib.lines.Line2D at 0x1c251e6810>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c251e6ad0>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nnucleocytoplasmic transport')

[<matplotlib.lines.Line2D at 0x1c224b3e50>]

[<matplotlib.lines.Line2D at 0x1c224b3a90>]

[<matplotlib.lines.Line2D at 0x1c224afad0>]

[<matplotlib.lines.Line2D at 0x1c224c4790>]

[<matplotlib.lines.Line2D at 0x1c224c4590>]

<matplotlib.lines.Line2D at 0x1c224c4ed0>

<matplotlib.lines.Line2D at 0x1c224c4690>

<matplotlib.lines.Line2D at 0x1c224b8e10>

<matplotlib.lines.Line2D at 0x1c224b8c10>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c224b8610>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nribosome biogenesis')

[<matplotlib.lines.Line2D at 0x1c28f77e90>]

[<matplotlib.lines.Line2D at 0x1c28f55c90>]

[<matplotlib.lines.Line2D at 0x1c293c9f10>]

  muhat = tmp.mean()
  ret = ret.dtype.type(ret / rcount)
  mu2hat = tmp.var()
  arrmean, rcount, out=arrmean, casting='unsafe', subok=False)
  ret = ret.dtype.type(ret / rcount)


[<matplotlib.lines.Line2D at 0x1c293c9fd0>]

<matplotlib.lines.Line2D at 0x1c250a0a50>

<matplotlib.lines.Line2D at 0x1c250a0bd0>

<matplotlib.lines.Line2D at 0x1c2508f510>

<matplotlib.lines.Line2D at 0x1c2508fc90>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c293c9190>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nsulfur compound metabolic process')

[<matplotlib.lines.Line2D at 0x1c293c9210>]

[<matplotlib.lines.Line2D at 0x1c23125850>]

[<matplotlib.lines.Line2D at 0x1c266b2d90>]

[<matplotlib.lines.Line2D at 0x1c266b2190>]

[<matplotlib.lines.Line2D at 0x1c2a231d90>]

<matplotlib.lines.Line2D at 0x1c2aef0f10>

<matplotlib.lines.Line2D at 0x1c2aef0f50>

<matplotlib.lines.Line2D at 0x1c2aec4190>

<matplotlib.lines.Line2D at 0x1c2aec4710>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c26696d10>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nextracellular matrix organization')

[<matplotlib.lines.Line2D at 0x1c27e233d0>]

[<matplotlib.lines.Line2D at 0x1c357e8d50>]

[<matplotlib.lines.Line2D at 0x1c25f3ed50>]

[<matplotlib.lines.Line2D at 0x1c25f3ecd0>]

[<matplotlib.lines.Line2D at 0x1c2bf08b50>]

<matplotlib.lines.Line2D at 0x1c2bf08f50>

<matplotlib.lines.Line2D at 0x1c2bf13390>

<matplotlib.lines.Line2D at 0x1c2bf08d50>

<matplotlib.lines.Line2D at 0x1c2bf08dd0>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c25f3e690>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\ncell junction organization')

[<matplotlib.lines.Line2D at 0x1c25f3e710>]

[<matplotlib.lines.Line2D at 0x1c243fa810>]

[<matplotlib.lines.Line2D at 0x1c25a27650>]

[<matplotlib.lines.Line2D at 0x1c243fac90>]

[<matplotlib.lines.Line2D at 0x1c25a27690>]

<matplotlib.lines.Line2D at 0x1c25a27f10>

<matplotlib.lines.Line2D at 0x1c25a02750>

<matplotlib.lines.Line2D at 0x1c25a02c10>

<matplotlib.lines.Line2D at 0x1c25a0e090>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c25a029d0>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nprotein targeting')

[<matplotlib.lines.Line2D at 0x1c243fa790>]

[<matplotlib.lines.Line2D at 0x1c2f4dce50>]

[<matplotlib.lines.Line2D at 0x1c2b23bd50>]

[<matplotlib.lines.Line2D at 0x1c2b23bf90>]

[<matplotlib.lines.Line2D at 0x1c340e7f10>]

<matplotlib.lines.Line2D at 0x1c340e7b10>

<matplotlib.lines.Line2D at 0x1c340e7fd0>

<matplotlib.lines.Line2D at 0x1c2b243210>

<matplotlib.lines.Line2D at 0x1c2b25fc90>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c340e4450>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nmitotic nuclear division')

[<matplotlib.lines.Line2D at 0x1c3c554c10>]

[<matplotlib.lines.Line2D at 0x1c355e8a10>]

[<matplotlib.lines.Line2D at 0x1c355f6c50>]

[<matplotlib.lines.Line2D at 0x1c355f6d50>]

[<matplotlib.lines.Line2D at 0x1c355cac50>]

<matplotlib.lines.Line2D at 0x1c3929ec10>

<matplotlib.lines.Line2D at 0x1c3929eb90>

<matplotlib.lines.Line2D at 0x1c392ac910>

<matplotlib.lines.Line2D at 0x1c392ac390>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c355c9cd0>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\ncellular amino acid metabolic process')

[<matplotlib.lines.Line2D at 0x1c2c0e8bd0>]

[<matplotlib.lines.Line2D at 0x1c2828fa50>]

[<matplotlib.lines.Line2D at 0x1c2c0f5ed0>]

[<matplotlib.lines.Line2D at 0x1c2c0f5290>]

[<matplotlib.lines.Line2D at 0x1c2c0c2450>]

<matplotlib.lines.Line2D at 0x1c2c0cb790>

<matplotlib.lines.Line2D at 0x1c2c0cbad0>

<matplotlib.lines.Line2D at 0x1c2c0cbcd0>

<matplotlib.lines.Line2D at 0x1c2c0d1890>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c2c0cbb10>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nribonucleoprotein complex assembly')

[<matplotlib.lines.Line2D at 0x1c2c0f5950>]

[<matplotlib.lines.Line2D at 0x1c2ff21f10>]

[<matplotlib.lines.Line2D at 0x1c2ff29dd0>]

[<matplotlib.lines.Line2D at 0x1c2ff29d50>]

[<matplotlib.lines.Line2D at 0x1c2bc9cfd0>]

<matplotlib.lines.Line2D at 0x1c2bc9c3d0>

<matplotlib.lines.Line2D at 0x1c2bc9c610>

<matplotlib.lines.Line2D at 0x1c2bca32d0>

<matplotlib.lines.Line2D at 0x1c2bcb5ad0>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c2bcb5a50>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\naging')

[<matplotlib.lines.Line2D at 0x1c3148d790>]

[<matplotlib.lines.Line2D at 0x1c314b7310>]

[<matplotlib.lines.Line2D at 0x1c2d5c4290>]

[<matplotlib.lines.Line2D at 0x1c2d5c4310>]

[<matplotlib.lines.Line2D at 0x1c2d5c4ed0>]

<matplotlib.lines.Line2D at 0x1c2d5c4f90>

<matplotlib.lines.Line2D at 0x1c314b7bd0>

<matplotlib.lines.Line2D at 0x1c2d5da950>

<matplotlib.lines.Line2D at 0x1c2d5f4390>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c2d5f6150>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\ncytoskeleton-dependent intracellular transport')

[<matplotlib.lines.Line2D at 0x1c2fb09cd0>]

[<matplotlib.lines.Line2D at 0x1c368bf1d0>]

[<matplotlib.lines.Line2D at 0x1c3a64bc90>]

[<matplotlib.lines.Line2D at 0x1c3a64b050>]

[<matplotlib.lines.Line2D at 0x1c3a64b8d0>]

<matplotlib.lines.Line2D at 0x1c3a654d10>

<matplotlib.lines.Line2D at 0x1c3a654e10>

<matplotlib.lines.Line2D at 0x1c3a642a10>

<matplotlib.lines.Line2D at 0x1c3a642910>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c3a642350>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\ntRNA metabolic process')

[<matplotlib.lines.Line2D at 0x1c244e7d50>]

[<matplotlib.lines.Line2D at 0x1c244f38d0>]

[<matplotlib.lines.Line2D at 0x1c244efad0>]

  muhat = tmp.mean()
  ret = ret.dtype.type(ret / rcount)
  mu2hat = tmp.var()
  arrmean, rcount, out=arrmean, casting='unsafe', subok=False)
  ret = ret.dtype.type(ret / rcount)


[<matplotlib.lines.Line2D at 0x1c244f5e90>]

<matplotlib.lines.Line2D at 0x1c361a87d0>

<matplotlib.lines.Line2D at 0x1c361a8f10>

<matplotlib.lines.Line2D at 0x1c2eaa2450>

<matplotlib.lines.Line2D at 0x1c3617b650>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c244f3d10>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nprotein folding')

[<matplotlib.lines.Line2D at 0x1c244f3d90>]

[<matplotlib.lines.Line2D at 0x1c3befcb90>]

[<matplotlib.lines.Line2D at 0x1c3befd950>]

[<matplotlib.lines.Line2D at 0x1c3befd090>]

[<matplotlib.lines.Line2D at 0x1c3befdf50>]

<matplotlib.lines.Line2D at 0x1c3bf0cd90>

<matplotlib.lines.Line2D at 0x1c3bf0ced0>

<matplotlib.lines.Line2D at 0x1c3bf24b90>

<matplotlib.lines.Line2D at 0x1c3bf24550>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c3617b790>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nvacuolar transport')

[<matplotlib.lines.Line2D at 0x1c21da29d0>]

[<matplotlib.lines.Line2D at 0x1c21db0590>]

[<matplotlib.lines.Line2D at 0x1c21d98b50>]

[<matplotlib.lines.Line2D at 0x1c21e7da10>]

[<matplotlib.lines.Line2D at 0x1c21e7d050>]

<matplotlib.lines.Line2D at 0x1c21dea850>

<matplotlib.lines.Line2D at 0x1c21deae50>

<matplotlib.lines.Line2D at 0x1c21dea890>

<matplotlib.lines.Line2D at 0x1c21dea9d0>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c21dea210>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nplasma membrane organization')

[<matplotlib.lines.Line2D at 0x1c238357d0>]

[<matplotlib.lines.Line2D at 0x1c236e7fd0>]

[<matplotlib.lines.Line2D at 0x1c23745ad0>]

[<matplotlib.lines.Line2D at 0x1c237450d0>]

[<matplotlib.lines.Line2D at 0x1c23751b50>]

<matplotlib.lines.Line2D at 0x1c2380a090>

<matplotlib.lines.Line2D at 0x1c2380a7d0>

<matplotlib.lines.Line2D at 0x1c2380add0>

<matplotlib.lines.Line2D at 0x1c2380f150>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c237f8110>

Text(0.5, 0.98, 'Five Celltypes Distribution of Average dN/dS Scores of Genes Related to\nregulation of cell size')

[<matplotlib.lines.Line2D at 0x1c27288dd0>]

[<matplotlib.lines.Line2D at 0x1c272558d0>]

[<matplotlib.lines.Line2D at 0x1c272a7890>]

[<matplotlib.lines.Line2D at 0x1c273023d0>]

[<matplotlib.lines.Line2D at 0x1c27302fd0>]

<matplotlib.lines.Line2D at 0x1c272ada90>

<matplotlib.lines.Line2D at 0x1c272ada50>

<matplotlib.lines.Line2D at 0x1c272b5790>

<matplotlib.lines.Line2D at 0x1c272b5650>

Text(0.5, 0, 'dN/dS')

Text(0, 0.5, 'number of genes')

<matplotlib.legend.Legend at 0x1c27302e50>