<a href="https://colab.research.google.com/github/dtabuena/Workshop/blob/main/RNA_Workshop/KZ_Kegg.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install scanpy --quiet
!pip install pybiomart --quiet
!pip install python-igraph --quiet
!pip install louvain --quiet
!pip install pynndescent --quiet


In [None]:
import h5py
import numpy as np
import scipy as sci
from matplotlib import pyplot as plt
import scanpy as sc
import tarfile
import os
import anndata as ad
import pandas as pd
import pybiomart
from tqdm import tqdm
import urllib.request
from IPython.display import clear_output
from matplotlib.pyplot import rc_context
from scipy import stats as st
os.chdir("C:/Users/dennis.tabuena/Dropbox (Gladstone)/0_Projects/_ReAnalyze_Zalocusky_2021")

def publishable_plots(FS=6):
    plt.rcParams.update({'font.size': FS,'axes.linewidth':.5,'figure.dpi':300,
                         'xtick.major.width': 0.5,'ytick.major.width': 0.5,
                         'figure.titlesize':FS,'axes.titlesize': FS,'xtick.labelsize': FS,
                         'ytick.labelsize':FS,'axes.labelsize': FS,'legend.fontsize': FS,
                         'figure.labelsize':FS})

    import urllib.request
    arial_link = 'https://raw.githubusercontent.com/dtabuena/Resources/main/Fonts/arial.ttf'
    filename = './arial.ttf'
    urllib.request.urlretrieve(arial_link, filename)
    plt.rcParams.update({'font.family': 'arial'})
    return None
publishable_plots(6)
sc.settings.verbosity = 'error'             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.set_figure_params(dpi=300, facecolor='white',fontsize=6,)
pd.set_option('display.max_rows',100)

In [None]:
def trim_key(k):
    floxed_dict = {'GSM5106175_YH_KZ03_01':('E3fKI_Syn_Cre602_15m','GSM5106175_602_E3fKI_15_XX'),
                   'GSM5106176_YH_KZ03_03':('E4fKI_Syn_Cre475_15m','GSM5106176_475_E4fKI_15_XX')}
    for f in floxed_dict.keys():
        if f in k: return floxed_dict[f][1]
    k = k.replace('_raw_gene_bc_matrices_h5.h5',"")
    return k
def query_capitilaziation(gene,adata):
    try:
        return adata.var.index[ [g.lower() for g in list(adata.var.index)].index(gene.lower()) ]
    except:
        return gene + ' not_found'
def z_score(x,axis=-1):
    x=np.array(x)
    return (x-np.mean(x,axis=axis))/np.std(x,axis=axis)



In [None]:
os.chdir("C:/Users/dennis.tabuena/Dropbox (Gladstone)/0_Projects/Seurat_Scanpy/Seurat_data")
adata = sc.read_h5ad('2023_11_07_KZ_anndata.h5ad')

In [None]:
display(adata)

In [None]:
display(adata.obs.head())

meta_df = pd.read_csv('C:\\Users\\dennis.tabuena\\Dropbox (Gladstone)\\0_Projects\\Seurat_Scanpy\\Seurat_data\\kz_metadata.csv').set_index('Barcodes')

adata_meta= adata.copy()
adata_meta.obs["Cluster_ID"]=np.nan
adata_meta.obs["Cluster_ID"]= meta_df["Cluster_ID"]
display(adata_meta.obs.head())

all_cats = list(set(meta_df["Cluster_ID"]))
print(all_cats)

In [None]:
_ =[print(c) for c in sorted(all_cats)]

In [None]:
dgc_01_adata =  adata_meta[adata_meta.obs["Cluster_ID"] == '01 Dentate Gyrus Granule Cells'].copy()
display(dgc_01_adata)
dgc_02_adata =  adata_meta[adata_meta.obs["Cluster_ID"] == '02 Dentate Gyrus Granule Cells'].copy()
display(dgc_02_adata)
CA3_06_adata =  adata_meta[adata_meta.obs["Cluster_ID"] == '06 CA2/CA3 Pyramids'].copy()
display(CA3_06_adata)




In [None]:
def find_corr_genes(adata,gene,pct=1):
    X_ = adata.X.toarray()
    x_corr = np.corrcoef(X_.T)
    gene_ind = list(adata.var['name']).index(gene)
    gene_corr = x_corr[gene_ind]
    gene_corr[gene_ind]=np.nan


    low,high = np.nanpercentile(gene_corr,[pct,100-pct])

    high_bool = gene_corr>high
    low_bool = gene_corr<low

    high_names=adata.var['name'][high_bool]
    low_names=adata.var['name'][low_bool]

    fig,ax=plt.subplots(1,figsize=(6,1))
    ax.scatter(range(x_corr.shape[1]),gene_corr,s=1)

    for i,c in enumerate(gene_corr):
        if c>high or c < low:
            ax.text(i, c,adata.var['name'][i], rotation=45)

    plt.show()

    results_dict = {'gene_corr':gene_corr, 'high_bool':high_bool,'low_bool':low_bool ,'high_names':high_names,'low_names':low_names}


    return results_dict

pct = .25
dgc_01_nell_results = find_corr_genes(dgc_01_adata,'Nell2',pct=pct)
dgc_01_apoe_results = find_corr_genes(dgc_01_adata,'Apoe',pct=pct)

dgc_02_nell_results = find_corr_genes(dgc_02_adata,'Nell2',pct=pct)
dgc_02_apoe_results = find_corr_genes(dgc_02_adata,'Apoe',pct=pct)

CA3_06_nell_results = find_corr_genes(CA3_06_adata,'Nell2',pct=pct)
CA3_06_apoe_results = find_corr_genes(CA3_06_adata,'Apoe',pct=pct)

In [None]:
!pip install keggtools