In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
from tqdm import tqdm
import scanpy as sc
import h5py
import sys

print(f'PID number is {os.getpid()}')

import matplotlib as mpl

figure_folder = r'figures'

mpl.rc('figure', dpi=300)
mpl.rc('figure', titlesize=10.) 
mpl.rc('figure.constrained_layout', use=True)

mpl.rc('axes', labelsize=8.)
mpl.rc('axes', titlesize=9.)
mpl.rc('axes', linewidth=1)

mpl.rc('font', family='sans-serif')

mpl.rc('savefig', dpi=300)
mpl.rc('savefig', format='svg')

mpl.rc('xtick', labelsize=7.)
mpl.rc('xtick.major', size=3)
mpl.rc('xtick.major', width=1.)

mpl.rc('ytick', labelsize=7.)
mpl.rc('ytick.major', size=3)
mpl.rc('ytick.major', width=1.)

PID number is 25328


# Load adata

In [3]:
adata = sc.read_h5ad(r'resources/GSE113901_sc_raw_labeled_clustered_with_chr_info.h5ad')

In [4]:
adata_raw = adata.copy()

sc.pp.normalize_per_cell(adata)
sc.pp.log1p(adata)

In [5]:
### define function for DEG
def calculate_deg(adata, cell_type_key, adata_raw):
    _cl = adata[adata.obs['majorType']==cell_type_key,:].copy()
    sc.tl.rank_genes_groups(_cl, 'genotype',groups=['WT', 'KO'], method='wilcoxon', key_added = "wilcoxon")
    _df_wt = sc.get.rank_genes_groups_df(_cl, group = 'WT', key='wilcoxon', pval_cutoff=1.01, log2fc_min=0)
    _df_ko = sc.get.rank_genes_groups_df(_cl, group = 'KO', key='wilcoxon', pval_cutoff=1.01, log2fc_min=0)
    _df_wt['logfoldchanges'] = -_df_wt['logfoldchanges']
    _df_wt['scores'] = -_df_wt['scores']
    _df = pd.concat([_df_wt, _df_ko])
    _df['cell_type'] = cell_type_key
    
    return _df

In [6]:
deg_dfs = []

for sel_type in majorTypes:
    df = calculate_deg(adata, sel_type, adata_raw)
    deg_dfs.append(df)

df_deg = pd.concat(deg_dfs, ignore_index=True)

In [7]:
df_deg

Unnamed: 0,names,scores,logfoldchanges,pvals,pvals_adj,cell_type
0,Mecp2,-20.902115,-2.242671,5.122538e-97,4.318129e-93,Gluta
1,Ctxn1,-13.814858,-0.598430,2.073612e-43,2.383617e-40,Gluta
2,Eef1a2,-13.691930,-0.457822,1.134548e-42,1.247460e-39,Gluta
3,Dynll1,-12.686783,-0.553000,7.000236e-37,5.532155e-34,Gluta
4,Junb,-12.331744,-1.387881,6.110939e-35,4.545280e-32,Gluta
...,...,...,...,...,...,...
105130,Rad23b,-0.450307,0.006460,6.524889e-01,1.000000e+00,Endo
105131,Dnajc10,-0.488479,0.013695,6.252107e-01,1.000000e+00,Endo
105132,Krit1,-0.511848,0.019395,6.087574e-01,1.000000e+00,Endo
105133,Ubn1,-0.516860,0.036409,6.052540e-01,1.000000e+00,Endo


In [8]:
df_deg.to_csv(r'resources/Greenberg_scRNA_male_Mecp2_DEG_majortype.csv')