<a href="https://colab.research.google.com/github/dtabuena/Workshop/blob/main/RNA_Workshop/Gabaergic_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install scanpy --quiet
!pip install pybiomart --quiet
!pip install python-igraph --quiet
!pip install louvain --quiet
!pip install pynndescent --quiet


In [None]:
import h5py
import numpy as np
import scipy as sci
from matplotlib import pyplot as plt
import scanpy as sc
import tarfile
import os
import anndata as ad
import pandas as pd
import pybiomart
from tqdm import tqdm
import urllib.request
from IPython.display import clear_output
from matplotlib.pyplot import rc_context
from scipy import stats as st
os.chdir("C:/Users/dennis.tabuena/Dropbox (Gladstone)/0_Projects/_ReAnalyze_Zalocusky_2021")

def publishable_plots(FS=6):
    plt.rcParams.update({'font.size': FS,'axes.linewidth':.5,'figure.dpi':300,
                         'xtick.major.width': 0.5,'ytick.major.width': 0.5,
                         'figure.titlesize':FS,'axes.titlesize': FS,'xtick.labelsize': FS,
                         'ytick.labelsize':FS,'axes.labelsize': FS,'legend.fontsize': FS,
                         'figure.labelsize':FS})

    import urllib.request
    arial_link = 'https://raw.githubusercontent.com/dtabuena/Resources/main/Fonts/arial.ttf'
    filename = './arial.ttf'
    urllib.request.urlretrieve(arial_link, filename)
    plt.rcParams.update({'font.family': 'arial'})
    return None
publishable_plots(6)
sc.settings.verbosity = 'error'             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.set_figure_params(dpi=300, facecolor='white',fontsize=6,)
pd.set_option('display.max_rows',100)

In [None]:
def trim_key(k):
    floxed_dict = {'GSM5106175_YH_KZ03_01':('E3fKI_Syn_Cre602_15m','GSM5106175_602_E3fKI_15_XX'),
                   'GSM5106176_YH_KZ03_03':('E4fKI_Syn_Cre475_15m','GSM5106176_475_E4fKI_15_XX')}
    for f in floxed_dict.keys():
        if f in k: return floxed_dict[f][1]
    k = k.replace('_raw_gene_bc_matrices_h5.h5',"")
    return k
def query_capitilaziation(gene,adata):
    try:
        return adata.var.index[ [g.lower() for g in list(adata.var.index)].index(gene.lower()) ]
    except:
        return gene + ' not_found'
def z_score(x,axis=-1):
    x=np.array(x)
    return (x-np.mean(x,axis=axis))/np.std(x,axis=axis)



In [None]:
os.chdir("C:/Users/dennis.tabuena/Dropbox (Gladstone)/0_Projects/_ReAnalyze_Zalocusky_2021")
url = 'https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE167497&format=file'
filename = './'+'zalocusky_indiv.tar'

try:
    for f in os.listdir('./indiv_animal_results'):
        print(f)
except:
    urllib.request.urlretrieve(url, filename)
    my_tar = tarfile.open(filename)
    my_tar.extractall('./indiv_animal_results') # specify which folder to extract to
    my_tar.close()
    for f in os.listdir('./indiv_animal_results'):
        print(f)


In [None]:
### Read, Combine, and Sample Split multiple 10x's

adata_dict = {}
for f in tqdm( os.listdir('./indiv_animal_results') ):
    a = sc.read_10x_h5('./indiv_animal_results/'+f)
    a.var_names_make_unique()
    sample_code = trim_key(f)
    a.obs['age_bin'] = str(int(np.ceil( int(sample_code.split("_")[3])/5)*5))+'m'
    a.obs['E_type'] = sample_code.split("_")[2]
    a.obs['mouse_ID'] = sample_code.split("_")[1]
    a.obs['well'] = sample_code.split("_")[4]
    a.obs['GSM'] = sample_code.split("_")[0]
    adata_dict[sample_code.split("_")[0]] = a
adata = ad.concat(adata_dict,axis = 0,label="Sample",index_unique="_")
adata = adata[['fKI' not in t for t in adata.obs.E_type], :]
adata_dict = {}
clear_output()
print('data_loaded.')




In [None]:
def pull_gene_annots(csv_loc='./mmusculus_coding_noncoding.csv',
                     my_git='https://raw.githubusercontent.com/dtabuena/Resources/main/Genetics/mmusculus_coding_noncoding.csv',
                     biomart_name='mmusculus',
                     biomart_keys=["ensembl_gene_id", "chromosome_name","transcript_biotype","external_gene_name","peptide"]):


    if os.path.exists('./mmusculus_coding_noncoding.csv'):
        print( 'Use local copy of musmus')
        annot_dd = pd.read_csv('./mmusculus_coding_noncoding.csv').set_index("external_gene_name")
    else:
        try:
            print( 'attempting to pull mus mus from git...')
            musmus_link = 'https://raw.githubusercontent.com/dtabuena/Resources/main/Genetics/mmusculus_coding_noncoding.csv'
            filename = './mmusculus_coding_noncoding.csv'
            urllib.request.urlretrieve(musmus_link, filename)
            annot_dd = pd.read_csv('./mmusculus_coding_noncoding.csv').set_index("external_gene_name")
        except:
            print('attempting to pull mus mus from biomart...')
            annot = sc.queries.biomart_annotations("mmusculus",["ensembl_gene_id", "chromosome_name","transcript_biotype","external_gene_name"],).set_index('ensembl_gene_id')
            uniq_inds = list(set(list(annot.index)))
            for r in tqdm(uniq_inds):
                match_bool = annot.index.str.contains(r)
                if np.sum(match_bool)>1:
                    new_val ='__'.join(list(annot.loc[r,'transcript_biotype']))
                    annot.at[r,'transcript_biotype']=new_val
            annot['is_coding']= annot.transcript_biotype.str.contains('coding')
            annot_dd = annot.drop_duplicates().set_index("external_gene_name")
            annot_dd.to_csv('./mmusculus_coding_noncoding.csv')

    coding_list = annot_dd.index[ annot_dd['is_coding'] ].to_list()
    return coding_list, annot_dd




def preprocess_andata10x(adata_og,pct_mito=0.25,min_genes=500,max_genes=2400,min_counts=500,max_counts=4500):

    print('pulling gene annotations...')
    coding_list, _ = pull_gene_annots()
    adata_og.var['mt'] = adata_og.var_names.str.startswith('mt-')
    adata_og.var['coding'] = [gene in coding_list for gene in adata_og.var_names]
    sc.pp.calculate_qc_metrics(adata_og, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)

    adata_QC = adata_og.copy()

    print('Filtering...')
    adata_QC = adata_QC[adata_QC.obs.pct_counts_mt < pct_mito, :]
    print(str(np.sum(adata_og.obs.pct_counts_mt <pct_mito)) + f' cells with >{pct_mito}% removed')
    adata_QC = adata_QC[:, adata_QC.var.coding]
    print(str(np.sum(np.logical_not(adata_og.var.coding))) + ' non coding genes removed')
    sc.pp.filter_cells(adata_QC, min_genes=min_genes)
    sc.pp.filter_cells(adata_QC, max_genes=max_genes)
    sc.pp.filter_cells(adata_QC, min_counts=min_counts)
    sc.pp.filter_cells(adata_QC, max_counts=max_counts)
    fig,ax=plt.subplots(1,figsize=(1.5,1.5))
    sc.pl.scatter(adata_QC, x='total_counts', y='n_genes_by_counts',ax=ax)

    return adata_QC,adata_og

adata_QC = preprocess_andata10x(adata)[0]




In [None]:
def high_var_genes_dim_reduc(adata,min_mean = 0.25,max_mean = 4,min_disp=0.55):
    ''' The gene expression matrices were then log-normalized with a scale factor of 10,000,
    using the Seurat NormalizeData function57,58. Highly dispersed genes were selected using
    the Seurat FindVariableGenes function57,58,filtering for an average expression range of
    0.25–4 and a minimum dispersion of 0.55, resulting in a list of 2,197 genes.'''
    adata.raw = adata
    sc.pp.normalize_total(adata, target_sum=10000)
    sc.pp.log1p(adata)
    sc.pp.highly_variable_genes(adata, min_mean=min_mean, max_mean=max_mean, min_disp=min_disp)
    with rc_context({'figure.figsize': (1.5, 1.5)}):
        sc.pl.highly_variable_genes(adata)
    plt.tight_layout()
    print(np.sum(adata.var['highly_variable']),'hv genes')


    #### PCA
    sc.tl.pca(adata, svd_solver='arpack',n_comps=50)
    fig,ax=plt.subplots(figsize=(1,1))
    ax.plot(adata.uns['pca']['variance_ratio'][:25],'ok',markersize=1)
    quiet_PCA_plots(adata,['E_type','age_bin','mouse_ID'],pc_pairs=[(0,1),(2,3),(4,5),(6,7)])

    return adata

def quiet_PCA_plots(adata,key_list,figsize=(2,2),pc_pairs=[(0,1)]):
    fig,ax=plt.subplots(1*len(pc_pairs),len(key_list),figsize=(figsize[0]*len(key_list),figsize[1]*len(pc_pairs)))
    for ip,pair in enumerate(pc_pairs):
        if len(key_list) == 1: ax=[ax]
        for key_ind,key in enumerate(key_list):
            key_types = sorted(list(set( adata.obs[key] )))
            for k in key_types:
                is_k = adata.obs[key]==k
                ax[ip,key_ind].scatter(adata.obsm['X_pca'][is_k,pair[0]],adata.obsm['X_pca'][is_k,pair[1]],s=2,marker='.',linewidth=0,edgecolors=None,label=k)
                ax[ip,key_ind].set_xlabel(f'PC{pair[0]}')
                ax[ip,key_ind].set_ylabel(f'PC{pair[1]}')
            if len(key_types)<8: ax[ip,key_ind].legend(key_types,loc='best',markerscale=3)
            ax[ip,key_ind].set_title(key)
            plt.tight_layout()
    return None

############## GABAERGIC Filter #####################
adata_GABA = adata_QC.copy()
age_dict = {'5m':'05m', '10m': '10m','15m': '15m+','20m': '15m+'}
adata_GABA.obs['age_bin'] = [ age_dict[a] for a in adata_GABA.obs['age_bin'] ]

adata_GABA.obs['Gad1_pos'] = z_score(sc.get.obs_df(adata_GABA,'Gad1'))>0.5
adata_GABA.obs['Gad2_pos'] = z_score(sc.get.obs_df(adata_GABA,'Gad2'))>0.5
adata_GABA.obs['Syn1_pos'] = z_score(sc.get.obs_df(adata_GABA,'Syn1'))>-100
is_gaba = np.logical_and( np.logical_or(adata_GABA.obs['Gad1_pos'] , adata_GABA.obs['Gad2_pos'] ), adata_GABA.obs['Syn1_pos'])


adata_GABA.obs['Gabaergic'] = is_gaba
adata_GABA = adata_GABA[ adata_GABA.obs['Gabaergic'] ,:]
sc.pp.filter_genes(adata_GABA, min_counts=50)


############## Dim Reduction #####################
adata_GABA = high_var_genes_dim_reduc(adata_GABA)

In [None]:
def umap_and_cluster(adata, n_neighbors=10, n_pcs=20,resolution=.6,plot_keys=['Cluster (nn)'],size = 1,to_plot=True):
    sc.pp.neighbors(adata, n_neighbors=n_neighbors, n_pcs=n_pcs,random_state=42)
    sc.tl.louvain(adata,resolution=resolution,random_state=42)
    sc.tl.paga(adata)
    sc.tl.umap(adata,random_state=42)
    adata.obs['Cluster (nn)']= adata.obs['louvain']
    if to_plot:
        with rc_context({'figure.figsize': (2.5, 2.5)}):
            sc.pl.umap(adata,add_outline=False, legend_loc='on data', color=plot_keys,size=size)
    return adata

def explore_umap(adata_GABA,key_list=[],size=1,legend_loc=None):
    with rc_context({'figure.figsize': (1.5,1.5)}):
        sc.pl.umap(adata_GABA, legend_loc=legend_loc, color=key_list,vmin=0,size=size) # add_outline=True,
        plt.tight_layout()

def get_gene_cluster(adata,gene):
    gene_bool = z_score(sc.get.obs_df(adata,gene))>2
    gene_cluster_list = np.array(adata.obs['louvain'][gene_bool])
    gene_clust = st.mode(gene_cluster_list)
    return gene_clust[0][0]

def marker_analysis(adata):
    #### Get Marker Genes
    sc.tl.rank_genes_groups(adata, 'louvain', method='wilcoxon',key_added='maker_genes',pts=True,use_raw =True)
    sc.tl.rank_genes_groups(adata, 'louvain', method='logreg',pts=True,use_raw =True)
    maker_genes_df = pd.DataFrame(adata.uns['maker_genes']['names'])
    maker_genes_df_LFC = pd.DataFrame(adata.uns['maker_genes']['logfoldchanges'])
    top_30 = maker_genes_df[:30]
    lfc_thresh=4
    marker_genes = [m for m in set(top_30[maker_genes_df_LFC[:30]>lfc_thresh].values.flatten()) if isinstance(m,str)]
    sc.pl.stacked_violin(adata, marker_genes, groupby='louvain');
    return marker_genes

adata_GABA = umap_and_cluster(adata_GABA, n_neighbors=15, n_pcs=20,resolution=.6,to_plot=False)
explore_umap(adata_GABA,['Cluster (nn)','Sst', 'Pvalb'],legend_loc='on data')
explore_umap(adata_GABA,['Cluster (nn)','E_type', 'age_bin'])
explore_umap(adata_GABA,['Cluster (nn)','Vip', 'Reln'])
explore_umap(adata_GABA,['Cluster (nn)','Gfap', 'Syn1'])
explore_umap(adata_GABA,['Cluster (nn)','Apoe','E_type'])

sst_clust = get_gene_cluster(adata_GABA,'Sst')
pv_clust = get_gene_cluster(adata_GABA,'Pvalb')

print('sst_clust',sst_clust)
print('pv_clust',pv_clust)

In [None]:
kcn_list = [n for n in adata_GABA.var.index if 'kcn' in str(n).lower()]
kcn_list.sort()


In [None]:
def deg_set_up(adata,keys_of_interest):
    combo_key = '_'.join(keys_of_interest)
    adata.obs[combo_key] = adata.obs[keys_of_interest[0]]
    for i,v in enumerate(keys_of_interest[1:]):
        # to_add = adata.obs[v]
        adata.obs[combo_key] = adata.obs[combo_key].str.cat(adata.obs[v].astype(str),sep='_')
    return adata,combo_key


def deg_analysis(adata_temp,new_file_path,analysis_pairs,group_key,adata_name='volcano',log2fc_extrema=[-15,15],label_cutoff=1,to_plot=True):
    '''
    Takes in anndata RNA object and a stratifying key and returns
    DEGs based on given grouppings. Also builds volcano plots.    '''
    adata_temp.raw = adata_temp
    '''Perform statistics and write to DF'''
    # print(os.getcwd())
    deg_df_dict={}
    for key,pair in analysis_pairs.items():
        sc.tl.rank_genes_groups(adata_temp,use_raw=True,groupby=group_key,groups=pair,reference=pair[1],key_added=key,method='wilcoxon',tie_correct=True)
        deg_df = sc.get.rank_genes_groups_df(adata_temp, group=pair[0],key=key,pval_cutoff=1,log2fc_min=log2fc_extrema[0], log2fc_max=log2fc_extrema[1])
        deg_df = deg_df.set_index('names')
        deg_df_dict[key] = deg_df
        new_file_name = os.path.join(new_file_path,adata_name+'_'+key+'.csv')
        deg_df.to_csv(new_file_name)
    return adata_temp, deg_df_dict

def deg_volc_plot(deg_df_dict,gene_list=[],lfcmax = 15,label_cutoff=0.1,suptitle='',lock_y_max=True):
    ''' Plot Volcanoes '''

    fig_volcano,ax=plt.subplots(1,len(deg_df_dict),figsize=(len(deg_df_dict)*1.5,2))
    if len(deg_df_dict) == 1:  ax=[ax]
    for key,df in deg_df_dict.items():
        key_ind = list(deg_df_dict.keys()).index(key)
        ax[key_ind].set_title(key)
        genes= df.index
        lfc = df['logfoldchanges']
        pval = df['pvals']
        neg_log10_pval = -np.log10(pval)
        for i in range(len(genes)):
            if str(genes[i]) in gene_list:
                if not np.isnan(lfc[i]*neg_log10_pval[i]):
                    ax[key_ind].scatter(lfc[i],neg_log10_pval[i],c='k',s=3)
                    if pval[i]<label_cutoff:
                        ax[key_ind].text(lfc[i],neg_log10_pval[i],genes[i],rotation=45,fontsize=4)
    x_lim=[]
    y_lim=[]
    for a in ax:
        x_lim.append(a.get_xlim())
        y_lim.append(a.get_ylim())

    x_etr = np.max(np.abs(x_lim))
    y_etr = np.max(np.abs(y_lim))

    for a in ax:
        a.set_xlim(-x_etr*1.1,x_etr*1.1)
        if lock_y_max: a.set_ylim(0,y_etr*1.1)
        else: a.set_ylim(0,a.get_ylim()[1]*1.1)
        a.set_xlabel('Log2 Fold Change')
        a.set_ylabel('-log10(pvalue)')
        a.axhline(-np.log10(0.05),c='k',linestyle=":",linewidth=.6,label='p=0.05')

    fig_volcano.suptitle(suptitle)
    plt.tight_layout()
    return fig_volcano,ax

def deg_heatmap(deg_dict,gene_list=[],ax=None,lfcmax = 2.5,ax_cbar=None,suptitle=''):

    try: ax.grid(visible=False)
    except:
        fig,axs = plt.subplots(1,2,figsize=(1+.5*len(deg_dict.keys()),len(gene_list)/14),width_ratios=[9,.5])

        ax=axs[0]
        ax_cbar = axs[1]
        ax.grid(visible=False)
    if len(gene_list)==0:gene_list = list()

    heat_map_null=np.zeros([len(gene_list),len(deg_dict)])
    heat_map_lfc = heat_map_null.copy()*np.nan
    heat_map_p = heat_map_null.copy()*np.nan
    for key,deg_df in deg_dict.items():
        key_id = list(deg_dict.keys()).index(key)
        for gene in gene_list:
            i_gene = gene_list.index(gene)
            try:
                heat_map_lfc[i_gene,key_id] = deg_df.loc[gene,'logfoldchanges']
            except:
                None
    cbar = ax.pcolorfast(heat_map_lfc,vmin=-lfcmax,vmax=lfcmax,cmap='bwr')
    ax.set_yticks(np.arange(len(gene_list))+0.5, labels=gene_list,rotation=0)
    ax.tick_params(length=0)

    fmt_xlabels = list(deg_dict.keys())
    fmt_xlabels = [xl.replace('_','\n') for xl in fmt_xlabels]
    ax.set_xticks(np.arange(len(deg_dict.keys()))+0.5, labels=fmt_xlabels,rotation=0)
    ax.grid(visible=False)


    for key,deg_df in deg_dict.items():
        key_id = list(deg_dict.keys()).index(key)
        for gene in gene_list:
            i_gene = gene_list.index(gene)
            try:
                p = deg_df.loc[gene,'pvals']
                if p<.1:
                    p_str = str(p)[:5]
                    l2fc = str(2**deg_df.loc[gene,'logfoldchanges'])[:3]
                    tag = f"{l2fc}X; p={p_str}"
                    tag = pval_to_star(p)
                    ax.text(key_id+.5,i_gene+.5,tag,ha='center',va='top',rotation=0)
            except: None

    ax.tick_params(top=True, labeltop=True, bottom=False, labelbottom=False)

    try:
        plt.colorbar(cbar,cax=ax_cbar)
        ax_cbar.set_ylabel('log2(Fold Change)')
    except: plt.colorbar(cbar)
    plt.suptitle(suptitle)
    plt.tight_layout()
    return plt.gcf(), ax, cbar

def pval_to_star(p,specifics={(.05,.1):'\'',(.05,1):'ns'},max_star=5):
    for range,tag in specifics.items():
        if p<=np.max(range) and p>np.min(range): return tag
    if p<=0.5 and p>0.01: return '*'
    nlogp=-np.log10(p)
    return ('*'*int(nlogp))[:max_star]

In [None]:
###### SST Cluster KCN DEGs #####
path='./DEG_Reuslts/KCN/DEG_Reuslts/SST_Cluster_DEG_KCN_4v3_across_ages/'
try: os.makedirs(path)
except: None
adata_sst_cluster = adata_GABA.copy()[adata_GABA.obs['louvain']==sst_clust,:]
adata_sst_cluster.var['is_KCN'] = [g in kcn_list for g in adata_sst_cluster.var.index]
adata_sst_cluster.obs['age_geno'] = adata_sst_cluster.obs['age_bin'].str.cat(adata_sst_cluster.obs['E_type'],sep='_')
ages = sorted(list(set(adata_sst_cluster.obs['age_bin'])))
analysis_pairs = { a+'_E4vE3':(a+"_E4", a+"_E3") for a in ages}
print(analysis_pairs)
adata_name = 'SST_cluster_KCNs'
sst_cluster_deg_data = deg_analysis(adata_sst_cluster,path,analysis_pairs,group_key = 'age_geno',log2fc_extrema=[-15,15],to_plot=False)
## Plot+Save
fig_volcano,ax = deg_volc_plot(sst_cluster_deg_data[1],gene_list=kcn_list,suptitle='SST Cluster')
fig_volcano.savefig(path+'SST_Cluster_Age-Geno_volcano.svg', dpi=300, format='svg')
fig_heat,ax,cax = deg_heatmap(sst_cluster_deg_data[1],gene_list=kcn_list,lfcmax = 2.5,suptitle='SST Cluster')
fig_heat.savefig(path+'SST_Cluster_Age-Geno_heatmap.svg', dpi=300, format='svg')

###### PV Cluster KCN DEGs #####
path='./DEG_Reuslts/KCN/PV_Cluster_DEG_KCN_4v3_across_ages/'
try: os.makedirs(path)
except: None
adata_pv_cluster = adata_GABA.copy()[adata_GABA.obs['louvain']==pv_clust,:]
adata_pv_cluster.var['is_KCN'] = [g in kcn_list for g in adata_pv_cluster.var.index]
adata_pv_cluster.obs['age_geno'] = adata_pv_cluster.obs['age_bin'].str.cat(adata_pv_cluster.obs['E_type'],sep='_')
ages = sorted(list(set(adata_pv_cluster.obs['age_bin'])))
analysis_pairs = { a+'_E4vE3':(a+"_E4", a+"_E3") for a in ages}
adata_name = 'pv_cluster_KCNs'
pv_cluster_deg_data = deg_analysis(adata_pv_cluster,path,analysis_pairs,group_key = 'age_geno',log2fc_extrema=[-15,15],to_plot=False)
### Plot+Save
fig_volcano,ax = deg_volc_plot(pv_cluster_deg_data[1],gene_list=kcn_list,suptitle='PV Cluster')
fig_volcano.savefig(path+'PV_Cluster_Age-Geno_volcano.svg', dpi=300, format='svg')
fig_heat,ax,cax = deg_heatmap(pv_cluster_deg_data[1],gene_list=kcn_list,lfcmax = 2.5,suptitle='PV Cluster')
fig_heat.savefig(path+'PV_Cluster_Age-Geno_heatmap.svg', dpi=300, format='svg')

###### SST Positive KCN DEGs #####
path='./DEG_Reuslts/KCN/SST_Positive_DEG_KCN_4v3_across_ages/'
try: os.makedirs(path)
except: None
adata_sst_positive = adata_GABA.copy()[sc.get.obs_df(adata_GABA,'Sst')>1,:]
adata_sst_positive.var['is_KCN'] = [g in kcn_list for g in adata_sst_positive.var.index]
adata_sst_positive.obs['age_geno'] = adata_sst_positive.obs['age_bin'].str.cat(adata_sst_positive.obs['E_type'],sep='_')
ages = sorted(list(set(adata_sst_positive.obs['age_bin'])))
analysis_pairs = { a+'_E4vE3':(a+"_E4", a+"_E3") for a in ages}
adata_name = 'SST_cluster_KCNs'
sst_positive_deg_data = deg_analysis(adata_sst_positive,path,analysis_pairs,group_key = 'age_geno',log2fc_extrema=[-15,15],to_plot=False)
### Plot+Save
fig_volcano,ax = deg_volc_plot(sst_positive_deg_data[1],gene_list=kcn_list,suptitle='SST Pos')
fig_volcano.savefig(path+'SST_Pos_Age-Geno_volcano.svg', dpi=300, format='svg')
fig_heat,ax,cax =deg_heatmap(sst_positive_deg_data[1],gene_list=kcn_list,lfcmax = 2.5,suptitle='SST Pos')
fig_heat.savefig(path+'SST_Pos_Age-Geno_heatmap.svg', dpi=300, format='svg')



###### PV positive KCN DEGs #####
path='./DEG_Reuslts/KCN/PV_Positive_DEG_KCN_4v3_across_ages/'
try: os.makedirs(path)
except: None
adata_pv_positive = adata_GABA.copy()[sc.get.obs_df(adata_GABA,'Pvalb')>1,:]
adata_pv_positive.var['is_KCN'] = [g in kcn_list for g in adata_pv_positive.var.index]
adata_pv_positive.obs['age_geno'] = adata_pv_positive.obs['age_bin'].str.cat(adata_pv_positive.obs['E_type'],sep='_')
ages = sorted(list(set(adata_pv_positive.obs['age_bin'])))
analysis_pairs = { a+'_E4vE3':(a+"_E4", a+"_E3") for a in ages}
pv_positive_deg_data = deg_analysis(adata_pv_positive,path,analysis_pairs,group_key = 'age_geno',log2fc_extrema=[-15,15],to_plot=False)
### Plot+Save
fig_volcano,ax = deg_volc_plot(pv_positive_deg_data[1],gene_list=kcn_list,suptitle='PV Pos')
fig_volcano.savefig(path+'PV_Pos_Age-Geno_volcano.svg', dpi=300, format='svg')
fig_heat,ax,cax =deg_heatmap(pv_positive_deg_data[1],gene_list=kcn_list,lfcmax = 2.5,suptitle='PV Pos')
fig_heat.savefig(path+'PV_Pos_Age-Geno_heatmap.svg', dpi=300, format='svg')


In [None]:
########### MULTIMODAL DEGS #######################
#### Clusters
path = './DEG_Reuslts/KCN/Cluster_DEGs_multivar/'
try: os.makedirs(path)
except: None
adata_sstpv_clusters = adata_GABA.copy()[np.logical_or(adata_GABA.obs['louvain']==sst_clust,adata_GABA.obs['louvain']==pv_clust),:]
keys_of_interest = ['age_bin','louvain','E_type']
adata_sstpv_clusters,combo_key = deg_set_up(adata_sstpv_clusters,keys_of_interest)
analysis_pairs = {'15m_SST_E4 vs E3_(genotype)': (f'15m+_{sst_clust}_E4', f'15m+_{sst_clust}_E3'),
                  'SST_E4_15m vs 5m_(age)': (f'15m+_{sst_clust}_E4', f'05m_{sst_clust}_E4'),
                  'SST_E4_15m vs 10m_(age)': (f'15m+_{sst_clust}_E4', f'10m_{sst_clust}_E4'),
                  '15m_E4_SST vs PV_(celltype)': (f'15m+_{sst_clust}_E4', f'15m+_{pv_clust}_E4')}
multimodal_cluster_deg_data = deg_analysis(adata_sstpv_clusters,path,analysis_pairs,group_key = combo_key,log2fc_extrema=[-15,15],to_plot=False)

n=str(np.sum(adata_GABA.obs['louvain']==sst_clust))+'v'+str(np.sum(adata_GABA.obs['louvain']==pv_clust))
fig_volcano,ax = deg_volc_plot(multimodal_cluster_deg_data[1],gene_list=kcn_list,suptitle=f'Cluster Analysis\n({n})',lock_y_max=False)
fig_volcano.savefig(path+'volc_Cluster_DEGs_multivar.svg', dpi=300, format='svg')
fig_heat,ax,cax =deg_heatmap(multimodal_cluster_deg_data[1],gene_list=kcn_list,lfcmax = 2.5,suptitle=f'Cluster Analysis\n({n})')
fig_heat.savefig(path+'heatmap_Cluster_DEGs_multivar.svg', dpi=300, format='svg')

#### Marker Positive
path = './DEG_Reuslts/KCN/MarkerPos_DEGs_multivar/'
try: os.makedirs(path)
except: None
adata_sstpv_positive = adata_GABA.copy()[np.logical_or(sc.get.obs_df(adata_GABA,'Sst')>1,sc.get.obs_df(adata_GABA,'Pvalb')>1),:]
adata_sstpv_positive.obs['Sst_Pos'] = sc.get.obs_df(adata_sstpv_positive,'Sst')>1
adata_sstpv_positive.obs['Pv_Pos'] = sc.get.obs_df(adata_sstpv_positive,'Pvalb')>1
keys_of_interest = ['age_bin','Sst_Pos','E_type']

adata_sstpv_positive,combo_key = deg_set_up(adata_sstpv_positive,keys_of_interest)
display(adata_sstpv_positive.obs.head(3))
analysis_pairs = {'15m_SST_E4 vs E3_(genotype)': (f'15m+_True_E4', f'15m+_True_E3'),
                  'SST_E4_15m vs 5m_(age)': (f'15m+_True_E4', f'05m_True_E4'),
                  'SST_E4_15m vs 10m_(age)': (f'15m+_True_E4', f'10m_True_E4'),
                  '15m_E4_SST vs PV_(celltype)': (f'15m+_True_E4', f'15m+_False_E4')}
display(analysis_pairs)
multimodal_markerpos_deg_data = deg_analysis(adata_sstpv_positive,path,analysis_pairs,group_key = combo_key,log2fc_extrema=[-15,15],to_plot=False)
n=str(np.sum(sc.get.obs_df(adata_GABA,'Sst')>1))+'v'+str(np.sum(sc.get.obs_df(adata_GABA,'Pvalb')>1))
fig_volcano,ax = deg_volc_plot(multimodal_markerpos_deg_data[1],gene_list=kcn_list,suptitle=f'MarkerPos Analysis\n({n})',lock_y_max=False)
fig_volcano.savefig(path+'volc_MarkerPos_DEGs_multivar.svg', dpi=300, format='svg')
fig_heat,ax,cax =deg_heatmap(multimodal_markerpos_deg_data[1],gene_list=kcn_list,lfcmax = 2.5,suptitle=f'MarkerPos Analysis\n({n})')
fig_heat.savefig(path+'heatmap_MarkerPos_DEGs_multivar.svg', dpi=300, format='svg')


In [None]:
# ######### CHLORIDE #######################
gene_sheets = ['C:/Users/dennis.tabuena/Dropbox (Gladstone)/0_Projects/_ReAnalyze_Zalocusky_2021/Chloride Gene Lists/HUGO_Chloride_Channels.xlsx',
                'C:/Users/dennis.tabuena/Dropbox (Gladstone)/0_Projects/_ReAnalyze_Zalocusky_2021/Chloride Gene Lists/HUGO_SLC_(N)KCC.xlsx',
                'C:/Users/dennis.tabuena/Dropbox (Gladstone)/0_Projects/_ReAnalyze_Zalocusky_2021/Chloride Gene Lists/HUGO_SLC_22.xlsx',
                'C:/Users/dennis.tabuena/Dropbox (Gladstone)/0_Projects/_ReAnalyze_Zalocusky_2021/Chloride Gene Lists/HUGO_SLC_AnionicNT.xlsx',
                'C:/Users/dennis.tabuena/Dropbox (Gladstone)/0_Projects/_ReAnalyze_Zalocusky_2021/Chloride Gene Lists/HUGO_SLC_AnionicTransporter.xlsx',
                'C:/Users/dennis.tabuena/Dropbox (Gladstone)/0_Projects/_ReAnalyze_Zalocusky_2021/Chloride Gene Lists/HUGO_SLC_AnionicTransporter_Multi.xlsx',
                ]
Chloride_list = []
for s in gene_sheets:
    df = pd.read_excel(s)
    Chloride_list = Chloride_list + list(df['Approved symbol'])
Chloride_list = [n.capitalize() for n in Chloride_list]
print(Chloride_list)

In [None]:
###### SST Cluster Chloride DEGs #####
path='./DEG_Reuslts/Chloride/DEG_Reuslts/SST_Cluster_DEG_Chloride_4v3_across_ages/'
try: os.makedirs(path)
except: None
adata_sst_cluster = adata_GABA.copy()[adata_GABA.obs['louvain']==sst_clust,:]
adata_sst_cluster.var['is_Chloride'] = [g in Chloride_list for g in adata_sst_cluster.var.index]
adata_sst_cluster.obs['age_geno'] = adata_sst_cluster.obs['age_bin'].str.cat(adata_sst_cluster.obs['E_type'],sep='_')
ages = sorted(list(set(adata_sst_cluster.obs['age_bin'])))
analysis_pairs = { a+'_E4vE3':(a+"_E4", a+"_E3") for a in ages}
print(analysis_pairs)
adata_name = 'SST_cluster_Chlorides'
sst_cluster_deg_data = deg_analysis(adata_sst_cluster,path,analysis_pairs,group_key = 'age_geno',log2fc_extrema=[-15,15],to_plot=False)
## Plot+Save
fig_volcano,ax = deg_volc_plot(sst_cluster_deg_data[1],gene_list=Chloride_list,suptitle='SST Cluster')
fig_volcano.savefig(path+'SST_Cluster_Age-Geno_volcano.svg', dpi=300, format='svg')
fig_heat,ax,cax = deg_heatmap(sst_cluster_deg_data[1],gene_list=Chloride_list,lfcmax = 2.5,suptitle='SST Cluster')
fig_heat.savefig(path+'SST_Cluster_Age-Geno_heatmap.svg', dpi=300, format='svg')

###### PV Cluster Chloride DEGs #####
path='./DEG_Reuslts/Chloride/PV_Cluster_DEG_Chloride_4v3_across_ages/'
try: os.makedirs(path)
except: None
adata_pv_cluster = adata_GABA.copy()[adata_GABA.obs['louvain']==pv_clust,:]
adata_pv_cluster.var['is_Chloride'] = [g in Chloride_list for g in adata_pv_cluster.var.index]
adata_pv_cluster.obs['age_geno'] = adata_pv_cluster.obs['age_bin'].str.cat(adata_pv_cluster.obs['E_type'],sep='_')
ages = sorted(list(set(adata_pv_cluster.obs['age_bin'])))
analysis_pairs = { a+'_E4vE3':(a+"_E4", a+"_E3") for a in ages}
adata_name = 'pv_cluster_Chlorides'
pv_cluster_deg_data = deg_analysis(adata_pv_cluster,path,analysis_pairs,group_key = 'age_geno',log2fc_extrema=[-15,15],to_plot=False)
### Plot+Save
fig_volcano,ax = deg_volc_plot(pv_cluster_deg_data[1],gene_list=Chloride_list,suptitle='PV Cluster')
fig_volcano.savefig(path+'PV_Cluster_Age-Geno_volcano.svg', dpi=300, format='svg')
fig_heat,ax,cax = deg_heatmap(pv_cluster_deg_data[1],gene_list=Chloride_list,lfcmax = 2.5,suptitle='PV Cluster')
fig_heat.savefig(path+'PV_Cluster_Age-Geno_heatmap.svg', dpi=300, format='svg')

###### SST Positive Chloride DEGs #####
path='./DEG_Reuslts/Chloride/SST_Positive_DEG_Chloride_4v3_across_ages/'
try: os.makedirs(path)
except: None
adata_sst_positive = adata_GABA.copy()[sc.get.obs_df(adata_GABA,'Sst')>1,:]
adata_sst_positive.var['is_Chloride'] = [g in Chloride_list for g in adata_sst_positive.var.index]
adata_sst_positive.obs['age_geno'] = adata_sst_positive.obs['age_bin'].str.cat(adata_sst_positive.obs['E_type'],sep='_')
ages = sorted(list(set(adata_sst_positive.obs['age_bin'])))
analysis_pairs = { a+'_E4vE3':(a+"_E4", a+"_E3") for a in ages}
adata_name = 'SST_cluster_Chlorides'
sst_positive_deg_data = deg_analysis(adata_sst_positive,path,analysis_pairs,group_key = 'age_geno',log2fc_extrema=[-15,15],to_plot=False)
### Plot+Save
fig_volcano,ax = deg_volc_plot(sst_positive_deg_data[1],gene_list=Chloride_list,suptitle='SST Pos')
fig_volcano.savefig(path+'SST_Pos_Age-Geno_volcano.svg', dpi=300, format='svg')
fig_heat,ax,cax =deg_heatmap(sst_positive_deg_data[1],gene_list=Chloride_list,lfcmax = 2.5,suptitle='SST Pos')
fig_heat.savefig(path+'SST_Pos_Age-Geno_heatmap.svg', dpi=300, format='svg')



###### PV positive Chloride DEGs #####
path='./DEG_Reuslts/Chloride/PV_Positive_DEG_Chloride_4v3_across_ages/'
try: os.makedirs(path)
except: None
adata_pv_positive = adata_GABA.copy()[sc.get.obs_df(adata_GABA,'Pvalb')>1,:]
adata_pv_positive.var['is_Chloride'] = [g in Chloride_list for g in adata_pv_positive.var.index]
adata_pv_positive.obs['age_geno'] = adata_pv_positive.obs['age_bin'].str.cat(adata_pv_positive.obs['E_type'],sep='_')
ages = sorted(list(set(adata_pv_positive.obs['age_bin'])))
analysis_pairs = { a+'_E4vE3':(a+"_E4", a+"_E3") for a in ages}
pv_positive_deg_data = deg_analysis(adata_pv_positive,path,analysis_pairs,group_key = 'age_geno',log2fc_extrema=[-15,15],to_plot=False)
### Plot+Save
fig_volcano,ax = deg_volc_plot(pv_positive_deg_data[1],gene_list=Chloride_list,suptitle='PV Pos')
fig_volcano.savefig(path+'PV_Pos_Age-Geno_volcano.svg', dpi=300, format='svg')
fig_heat,ax,cax =deg_heatmap(pv_positive_deg_data[1],gene_list=Chloride_list,lfcmax = 2.5,suptitle='PV Pos')
fig_heat.savefig(path+'PV_Pos_Age-Geno_heatmap.svg', dpi=300, format='svg')

In [None]:
########### MULTIMODAL DEGS #######################
#### Clusters
path = './DEG_Reuslts/Chloride/Cluster_DEGs_multivar/'
try: os.makedirs(path)
except: None
adata_sstpv_clusters = adata_GABA.copy()[np.logical_or(adata_GABA.obs['louvain']==sst_clust,adata_GABA.obs['louvain']==pv_clust),:]
keys_of_interest = ['age_bin','louvain','E_type']
adata_sstpv_clusters,combo_key = deg_set_up(adata_sstpv_clusters,keys_of_interest)
analysis_pairs = {'15m_SST_E4 vs E3_(genotype)': (f'15m+_{sst_clust}_E4', f'15m+_{sst_clust}_E3'),
                  'SST_E4_15m vs 5m_(age)': (f'15m+_{sst_clust}_E4', f'05m_{sst_clust}_E4'),
                  'SST_E4_15m vs 10m_(age)': (f'15m+_{sst_clust}_E4', f'10m_{sst_clust}_E4'),
                  '15m_E4_SST vs PV_(celltype)': (f'15m+_{sst_clust}_E4', f'15m+_{pv_clust}_E4')}
multimodal_cluster_deg_data = deg_analysis(adata_sstpv_clusters,path,analysis_pairs,group_key = combo_key,log2fc_extrema=[-15,15],to_plot=False)

n=str(np.sum(adata_GABA.obs['louvain']==sst_clust))+'v'+str(np.sum(adata_GABA.obs['louvain']==pv_clust))
fig_volcano,ax = deg_volc_plot(multimodal_cluster_deg_data[1],gene_list=Chloride_list,suptitle=f'Cluster Analysis\n({n})',lock_y_max=False)
fig_volcano.savefig(path+'volc_Cluster_DEGs_multivar.svg', dpi=300, format='svg')
fig_heat,ax,cax =deg_heatmap(multimodal_cluster_deg_data[1],gene_list=Chloride_list,lfcmax = 2.5,suptitle=f'Cluster Analysis\n({n})')
fig_heat.savefig(path+'heatmap_Cluster_DEGs_multivar.svg', dpi=300, format='svg')

#### Marker Positive
path = './DEG_Reuslts/Chloride/MarkerPos_DEGs_multivar/'
try: os.makedirs(path)
except: None
adata_sstpv_positive = adata_GABA.copy()[np.logical_or(sc.get.obs_df(adata_GABA,'Sst')>1,sc.get.obs_df(adata_GABA,'Pvalb')>1),:]
adata_sstpv_positive.obs['Sst_Pos'] = sc.get.obs_df(adata_sstpv_positive,'Sst')>1
adata_sstpv_positive.obs['Pv_Pos'] = sc.get.obs_df(adata_sstpv_positive,'Pvalb')>1
keys_of_interest = ['age_bin','Sst_Pos','E_type']

adata_sstpv_positive,combo_key = deg_set_up(adata_sstpv_positive,keys_of_interest)
display(adata_sstpv_positive.obs.head(3))
analysis_pairs = {'15m_SST_E4 vs E3_(genotype)': (f'15m+_True_E4', f'15m+_True_E3'),
                  'SST_E4_15m vs 5m_(age)': (f'15m+_True_E4', f'05m_True_E4'),
                  'SST_E4_15m vs 10m_(age)': (f'15m+_True_E4', f'10m_True_E4'),
                  '15m_E4_SST vs PV_(celltype)': (f'15m+_True_E4', f'15m+_False_E4')}
display(analysis_pairs)
multimodal_markerpos_deg_data = deg_analysis(adata_sstpv_positive,path,analysis_pairs,group_key = combo_key,log2fc_extrema=[-15,15],to_plot=False)
n=str(np.sum(sc.get.obs_df(adata_GABA,'Sst')>1))+'v'+str(np.sum(sc.get.obs_df(adata_GABA,'Pvalb')>1))
fig_volcano,ax = deg_volc_plot(multimodal_markerpos_deg_data[1],gene_list=Chloride_list,suptitle=f'MarkerPos Analysis\n({n})',lock_y_max=False)
fig_volcano.savefig(path+'volc_MarkerPos_DEGs_multivar.svg', dpi=300, format='svg')
fig_heat,ax,cax =deg_heatmap(multimodal_markerpos_deg_data[1],gene_list=Chloride_list,lfcmax = 2.5,suptitle=f'MarkerPos Analysis\n({n})')
fig_heat.savefig(path+'heatmap_MarkerPos_DEGs_multivar.svg', dpi=300, format='svg')

In [None]:
explore_umap(adata_GABA,['Cluster (nn)','Sst','Kcnt2'])

explore_umap(adata_GABA,['Cluster (nn)','Reln','Lamp5'])