## Differentially Expressed Genes (DEGs)

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
from matplotlib.pyplot import rc_context
import scanpy as sc
import scipy.io
import os
import math
from pylab import *
from matplotlib.colors import ListedColormap,LinearSegmentedColormap 
import seaborn as sns
from pathlib import Path
%matplotlib inline

In [6]:
adata=sc.read_h5ad('CellType.h5ad')
difference_JH_DUR = adata[adata.obs['BATCH'].isin(['JH_JE' , 'DUR_JE' ])]
difference_SS_JH = adata[adata.obs['BATCH'].isin(['JH_JE' , 'SS_JE' ])]

In [8]:
adata.uns['log1p']["base"] = None

In [9]:
groups_list=difference_JH_DUR.obs['BATCH'].cat.categories.tolist()
cell_list=difference_JH_DUR.obs['CellType'].cat.categories.tolist()
for i in groups_list:
    for j in cell_list:
        groups=i
        cells=j
        filename='JH_DUR_'+groups+'_'+cells
        x = difference_JH_DUR[difference_JH_DUR.obs['CellType'].isin([cells])]
        sc.tl.rank_genes_groups(x, groupby='BATCH', groups=[groups], reference='rest', method='wilcoxon', corr_method='benjamini-hochberg', key_added=filename)
        result = x.uns[filename]
        groups = result['names'].dtype.names
        marker=pd.DataFrame(
            {group + '_' + key: result[key][group]
            for group in groups for key in ['names', 'scores', 'pvals_adj', 'logfoldchanges']})
        marker.columns = ['gene', 'scores', 'Padj', 'log2FC']
        marker.to_csv(filename+'.csv')

In [10]:
difference_SS_JH = adata[adata.obs['BATCH'].isin(['JH_JE' , 'SS_JE' ])]
groups_list=difference_SS_JH.obs['BATCH'].cat.categories.tolist()
cell_list=difference_SS_JH.obs['CellType'].cat.categories.tolist()
for i in groups_list:
    for j in cell_list:
        groups=i
        cells=j
        filename='SS_JH_'+groups+'_'+cells
        x = difference_SS_JH[difference_SS_JH.obs['CellType'].isin([cells])]
        sc.tl.rank_genes_groups(x, groupby='BATCH', groups=[groups], reference='rest', method='wilcoxon', corr_method='benjamini-hochberg', key_added=filename)
        result = x.uns[filename]
        groups = result['names'].dtype.names
        marker=pd.DataFrame(
            {group + '_' + key: result[key][group]
            for group in groups for key in ['names', 'scores', 'pvals_adj', 'logfoldchanges']})
        marker.columns = ['gene', 'scores', 'Padj', 'log2FC']
        marker.to_csv(filename+'.csv')

In [11]:
files = os.listdir('./')

def file_filter(f):
    if f[-4:] in ['.csv']:
        return True
    else:
        return False

files = list(filter(file_filter, files))
print(files)

['JH_DUR_DUR_JE_B cells.csv', 'JH_DUR_DUR_JE_Myeloid.csv', 'JH_DUR_DUR_JE_Plasma.csv', 'JH_DUR_DUR_JE_CD4 αβ T cells.csv', 'JH_DUR_DUR_JE_CD8 αβ T cells.csv', 'JH_DUR_DUR_JE_γδ T cells.csv', 'JH_DUR_DUR_JE_NK cells.csv', 'JH_DUR_DUR_JE_ILC.csv', 'JH_DUR_JH_JE_B cells.csv', 'JH_DUR_JH_JE_Myeloid.csv', 'JH_DUR_JH_JE_Plasma.csv', 'JH_DUR_JH_JE_CD4 αβ T cells.csv', 'JH_DUR_JH_JE_CD8 αβ T cells.csv', 'JH_DUR_JH_JE_γδ T cells.csv', 'JH_DUR_JH_JE_NK cells.csv', 'JH_DUR_JH_JE_ILC.csv', 'SS_JH_JH_JE_B cells.csv', 'SS_JH_JH_JE_Myeloid.csv', 'SS_JH_JH_JE_Plasma.csv', 'SS_JH_JH_JE_CD4 αβ T cells.csv', 'SS_JH_JH_JE_CD8 αβ T cells.csv', 'SS_JH_JH_JE_γδ T cells.csv', 'SS_JH_JH_JE_NK cells.csv', 'SS_JH_JH_JE_ILC.csv', 'SS_JH_SS_JE_B cells.csv', 'SS_JH_SS_JE_Myeloid.csv', 'SS_JH_SS_JE_Plasma.csv', 'SS_JH_SS_JE_CD4 αβ T cells.csv', 'SS_JH_SS_JE_CD8 αβ T cells.csv', 'SS_JH_SS_JE_γδ T cells.csv', 'SS_JH_SS_JE_NK cells.csv', 'SS_JH_SS_JE_ILC.csv']


In [12]:
for i in files:
    filename=i.replace(".csv", "")
    data = pd.read_csv(i, index_col=0)
    out = data[data.Padj < 0.8]
    out = out[abs(out.log2FC) > 1.5]
    out = out[out.scores > 1]
    out = out.reset_index(drop = True)
    out.to_csv(filename+'_padj.csv')

In [13]:
files = os.listdir('./')

def file_filter(f):
    if f[-9:] in ['_padj.csv']:
        return True
    else:
        return False

files = list(filter(file_filter, files))
print(files)

['JH_DUR_DUR_JE_B cells_padj.csv', 'JH_DUR_DUR_JE_Myeloid_padj.csv', 'JH_DUR_DUR_JE_Plasma_padj.csv', 'JH_DUR_DUR_JE_CD4 αβ T cells_padj.csv', 'JH_DUR_DUR_JE_CD8 αβ T cells_padj.csv', 'JH_DUR_DUR_JE_γδ T cells_padj.csv', 'JH_DUR_DUR_JE_NK cells_padj.csv', 'JH_DUR_DUR_JE_ILC_padj.csv', 'JH_DUR_JH_JE_B cells_padj.csv', 'JH_DUR_JH_JE_Myeloid_padj.csv', 'JH_DUR_JH_JE_Plasma_padj.csv', 'JH_DUR_JH_JE_CD4 αβ T cells_padj.csv', 'JH_DUR_JH_JE_CD8 αβ T cells_padj.csv', 'JH_DUR_JH_JE_γδ T cells_padj.csv', 'JH_DUR_JH_JE_NK cells_padj.csv', 'JH_DUR_JH_JE_ILC_padj.csv', 'SS_JH_JH_JE_B cells_padj.csv', 'SS_JH_JH_JE_Myeloid_padj.csv', 'SS_JH_JH_JE_Plasma_padj.csv', 'SS_JH_JH_JE_CD4 αβ T cells_padj.csv', 'SS_JH_JH_JE_CD8 αβ T cells_padj.csv', 'SS_JH_JH_JE_γδ T cells_padj.csv', 'SS_JH_JH_JE_NK cells_padj.csv', 'SS_JH_JH_JE_ILC_padj.csv', 'SS_JH_SS_JE_B cells_padj.csv', 'SS_JH_SS_JE_Myeloid_padj.csv', 'SS_JH_SS_JE_Plasma_padj.csv', 'SS_JH_SS_JE_CD4 αβ T cells_padj.csv', 'SS_JH_SS_JE_CD8 αβ T cells_padj.c