Previously in mouse.celltype-specific-genes.ipynb I identified neuron, glia, endothelia, microglia, astrocyte, and oligodendrocyte-specific genes based on the Barres mouse expression data. However, they are not all confirmed protein-coding genes, nor do they necessearily all have confident orthologs in other species to get a dN/dS score. In human.all_genes.ipynb I have generated dN/dS scores for all mouse protein-coding genes that have at least one 1-to-1 ortholog with one other mammalian species based on the Ensembl 98 database. Here I merge those genes with my celltype-specific gene lists to get the celltype-specific protein-coding gene with at least one species with dN/dS ratios against mouse. 

In [1]:
import numpy as np
import pandas as pd
import glob

In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [3]:
pd.set_option('display.max_rows', 10)

In [4]:
# Pairwise dN/dS ratios of 93 mammalian species against mouse
dnds_df = pd.read_csv('../results/Ensembl98_mouse/mouse.92_species_dNdS.all_genes.tsv',sep='\t',index_col=1)

In [5]:
# Import stats on each gene for average of 93 species' dN/dS 
stats_df = pd.read_csv('../results/Ensembl98_mouse/mouse.dNdS_stats.all_genes.tsv',sep='\t',index_col=1)

In [37]:
# human_celltype_dict={} # pairwise dN/dS of human against mouse reference genome
# rat_celltype_dict={}
# opposum_celltype_dict={}
# megabat_celltype_dict={}
# tasmaniandevil_celltype_dict={}
# cat_celltype_dict={}
# pig_celltype_dict={}
# avg93_celltype_dict={} # averaged dN/dS across 93 mammalian species

for file in glob.glob('../results/mouse.celltype-specific_genes/all_barres/*.list.txt'):
    celltype=file[52:-9]
    df=pd.read_csv(file,names=['Gene name'],index_col=0)
    
    # human against mouse reference genome pairwise dN/dS for cell type specific genes
    human_df=pd.merge(df,dnds_df['hsapiens_dNdS'],how='left',left_index=True,right_index=True)
    human_df.dropna(inplace=True)
    human_df.to_csv('../results/mouse.celltype-specific_genes/protein-coding_w_dNdS/human/'+celltype+'.tsv', sep='\t')
#     human_celltype_dict[celltype]=human_df
    
    # rat against mouse reference genome pairwise dN/dS for cell type specific genes
    rat_df=pd.merge(df,dnds_df['rnorvegicus_dNdS'],how='left',left_index=True,right_index=True)
    rat_df.dropna(inplace=True)
    rat_df.to_csv('../results/mouse.celltype-specific_genes/protein-coding_w_dNdS/rat/'+celltype+'.tsv', sep='\t')
#     rat_celltype_dict[celltype]=rat_df
    
    # opposum against mouse reference genome pairwise dN/dS for cell type specific genes
    opposum_df=pd.merge(df,dnds_df['mdomestica_dNdS'],how='left',left_index=True,right_index=True)
    opposum_df.dropna(inplace=True)
    opposum_df.to_csv('../results/mouse.celltype-specific_genes/protein-coding_w_dNdS/opposum/'+celltype+'.tsv', sep='\t')
#     opposum_celltype_dict[celltype]=opposum_df
    
    # megabat against mouse reference genome pairwise dN/dS for cell type specific genes
    megabat_df=pd.merge(df,dnds_df['pvampyrus_dNdS'],how='left',left_index=True,right_index=True)
    megabat_df.dropna(inplace=True)
    megabat_df.to_csv('../results/mouse.celltype-specific_genes/protein-coding_w_dNdS/megabat/'+celltype+'.tsv', sep='\t')
#     megabat_celltype_dict[celltype]=megabat_df
    
    # tasmanian devil against mouse reference genome pairwise dN/dS for cell type specific genes
    tasmaniandevil_df=pd.merge(df,dnds_df['sharrisii_dNdS'],how='left',left_index=True,right_index=True)
    tasmaniandevil_df.dropna(inplace=True)
    tasmaniandevil_df.to_csv('../results/mouse.celltype-specific_genes/protein-coding_w_dNdS/tasmaniandevil/'+celltype+'.tsv', sep='\t')
#     tasmaniandevil_celltype_dict[celltype]=tasmaniandevil_df
    
    # pig against mouse reference genome pairwise dN/dS for cell type specific genes
    pig_df=pd.merge(df,dnds_df['sscrofa_dNdS'],how='left',left_index=True,right_index=True)
    pig_df.dropna(inplace=True)
    pig_df.to_csv('../results/mouse.celltype-specific_genes/protein-coding_w_dNdS/pig/'+celltype+'.tsv', sep='\t')
#     pig_celltype_dict[celltype]=pig_df
    
    # cat against mouse reference genome pairwise dN/dS for cell type specific genes
    cat_df=pd.merge(df,dnds_df['fcatus_dNdS'],how='left',left_index=True,right_index=True)
    cat_df.dropna(inplace=True)
    cat_df.to_csv('../results/mouse.celltype-specific_genes/protein-coding_w_dNdS/cat/'+celltype+'.tsv', sep='\t')
#     cat_celltype_dict[celltype]=cat_df
    
    # Average of 93 mammals against mouse reference genome pairwise dN/dS for cell type specific genes
    avg_df=pd.merge(df,stats_df['mean'],how='left',left_index=True,right_index=True)
    avg_df.dropna(inplace=True)
    avg_df.to_csv('../results/mouse.celltype-specific_genes/protein-coding_w_dNdS/avg_93species/'+celltype+'.tsv',sep='\t')
    
    # Output lists of celltype-specific protein-coding genes with dN/dS
    MyFile=open('../results/mouse.celltype-specific_genes/protein-coding_w_dNdS/'+celltype+'.list.txt','w')
    MyList=map(lambda x:x+'\n', avg_df.index.to_list())
    MyFile.writelines(MyList)
    MyFile.close()
#     avg93_celltype_dict[celltype]=avg_df

    # Export all 93 mammals against mouse reference genome pairwise dN/dS for cell type specific genes
    all_df=pd.merge(df,dnds_df.loc[:,'amelanoleuca_dNdS':'vvulpes_dNdS'],how='left',left_index=True,right_index=True)
    all_df.dropna(inplace=True)
    all_df.to_csv('../results/mouse.celltype-specific_genes/protein-coding_w_dNdS/all_93species/'+celltype+'.tsv',sep='\t')