In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
from tqdm import tqdm
import scanpy as sc
import h5py
import sys

print(f'PID number is {os.getpid()}')

import matplotlib as mpl

figure_folder = r'figures'

mpl.rc('figure', dpi=300)
mpl.rc('figure', titlesize=10.) 
mpl.rc('figure.constrained_layout', use=True)

mpl.rc('axes', labelsize=8.)
mpl.rc('axes', titlesize=9.)
mpl.rc('axes', linewidth=1)

mpl.rc('font', family='sans-serif')

mpl.rc('savefig', dpi=300)
mpl.rc('savefig', format='svg')

mpl.rc('xtick', labelsize=7.)
mpl.rc('xtick.major', size=3)
mpl.rc('xtick.major', width=1.)

mpl.rc('ytick', labelsize=7.)
mpl.rc('ytick.major', size=3)
mpl.rc('ytick.major', width=1.)

PID number is 22220


# Load adata

In [2]:
adata = sc.read_h5ad(r"C:\Users\cosmosyw\Documents\Research\Mecp2_v2\analysis\scRNA_Greenberg\sc_raw_labeled_clustered_with_chr_info.h5ad")

In [3]:
adata_raw = adata.copy()

sc.pp.normalize_per_cell(adata)
sc.pp.log1p(adata)

In [4]:
### define function for DEG
def calculate_deg(adata, cell_type_key, adata_raw):
    _cl = adata[adata.obs['majorType']==cell_type_key,:].copy()
    sc.tl.rank_genes_groups(_cl, 'genotype',groups=['WT', 'KO'], method='t-test', key_added = "t-test")
    _df_wt = sc.get.rank_genes_groups_df(_cl, group = 'WT', key='t-test', pval_cutoff=1.01, log2fc_min=0)
    _df_ko = sc.get.rank_genes_groups_df(_cl, group = 'KO', key='t-test', pval_cutoff=1.01, log2fc_min=0)
    _df_wt['logfoldchanges'] = -_df_wt['logfoldchanges']
    _df_wt['scores'] = -_df_wt['scores']
    _df = pd.concat([_df_wt, _df_ko])
    _df['cell_type'] = cell_type_key
    
    return _df

In [5]:
majorTypes = ['Gluta', 'Oligo', 'Endo', 'Micro']
deg_dfs = []

for sel_type in majorTypes:
    df = calculate_deg(adata, sel_type, adata_raw)
    deg_dfs.append(df)

df_deg = pd.concat(deg_dfs, ignore_index=True)

In [6]:
df_deg

Unnamed: 0,names,scores,logfoldchanges,pvals,pvals_adj,cell_type
0,Mecp2,-23.167492,-2.242671,1.907345e-105,4.823484e-101,Gluta
1,Junb,-15.078729,-1.387881,6.169385e-49,7.800878e-46,Gluta
2,Arc,-14.795500,-1.546607,3.118615e-47,3.597041e-44,Gluta
3,Ctxn1,-13.829779,-0.598430,4.470588e-42,3.904597e-39,Gluta
4,Sepw1,-13.407591,-0.796429,1.258520e-39,9.360799e-37,Gluta
...,...,...,...,...,...,...
76893,Cyp2d22,0.000950,0.001066,9.992422e-01,1.000000e+00,Micro
76894,Ap1g2,0.000531,0.000384,9.995766e-01,1.000000e+00,Micro
76895,Olfr1052,0.000474,0.000968,9.996218e-01,1.000000e+00,Micro
76896,Zfp286,0.000298,0.000208,9.997623e-01,1.000000e+00,Micro


In [7]:
df_deg.to_csv(r'resources/Greenberg_scRNA_male_Mecp2_DEG_majortype.csv')