<a href="https://colab.research.google.com/github/dtabuena/Workshop/blob/main/RNA_Workshop/KZ_Kegg.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install scanpy --quiet
!pip install pybiomart --quiet
!pip install python-igraph --quiet
!pip install louvain --quiet
!pip install pynndescent --quiet
# !pip install keggtools --quiet

!pip install scipy

In [None]:
import h5py
import numpy as np
from matplotlib import pyplot as plt
import scanpy as sc
import tarfile
import os
import anndata as ad
import pandas as pd
import pybiomart
from tqdm import tqdm
import urllib.request
from IPython.display import clear_output
from matplotlib.pyplot import rc_context
import scipy
import logging

In [None]:
def trim_key(k):
    floxed_dict = {'GSM5106175_YH_KZ03_01':('E3fKI_Syn_Cre602_15m','GSM5106175_602_E3fKI_15_XX'),
                   'GSM5106176_YH_KZ03_03':('E4fKI_Syn_Cre475_15m','GSM5106176_475_E4fKI_15_XX')}
    for f in floxed_dict.keys():
        if f in k: return floxed_dict[f][1]
    k = k.replace('_raw_gene_bc_matrices_h5.h5',"")
    return k
def query_capitilaziation(gene,adata):
    try:
        return adata.var.index[ [g.lower() for g in list(adata.var.index)].index(gene.lower()) ]
    except:
        return gene + ' not_found'
def z_score(x,axis=-1):
    x=np.array(x)
    return (x-np.mean(x,axis=axis))/np.std(x,axis=axis)



In [None]:
os.chdir("C:/Users/dennis.tabuena/Dropbox (Gladstone)/0_Projects/_Seurat_Scanpy/Scanpy_data/")
os.listdir()

In [None]:
os.chdir("C:/Users/dennis.tabuena/Dropbox (Gladstone)/0_Projects/_Seurat_Scanpy/Scanpy_data/")
adata = sc.read_h5ad('./2023_11_07_KZ_anndata.h5ad')

In [None]:
display(adata.obs.head())

meta_df = pd.read_csv('./kz_metadata.csv').set_index('Barcodes')

adata_meta= adata.copy()
adata_meta.obs["Cluster_ID"]=np.nan
adata_meta.obs["Cluster_ID"]= meta_df["Cluster_ID"]
display(adata_meta.obs.head())

all_cats = list(set(meta_df["Cluster_ID"]))
print(all_cats)

In [None]:
########### Sub Divide Clusters of Interest
os.chdir("C:/Users/dennis.tabuena/Dropbox (Gladstone)/0_Projects/_Hyper+Crisper/_Nell2_enrichment/")
# dgc_01_adata =  adata_meta[adata_meta.obs["Cluster_ID"] == '01 Dentate Gyrus Granule Cells'].copy()
# display(dgc_01_adata)
dgc_02_adata =  adata_meta[adata_meta.obs["Cluster_ID"] == '02 Dentate Gyrus Granule Cells'].copy()
display(dgc_02_adata)
CA3_06_adata =  adata_meta[adata_meta.obs["Cluster_ID"] == '06 CA2/CA3 Pyramids'].copy()
display(CA3_06_adata)


In [None]:
def find_corr_genes(adata,gene,pct=1,to_plot=True):
    X_ = adata.X.toarray()
    gene_ind = list(adata.var['name']).index(gene)
    target_row = X_[:,gene_ind]
    num_genes= X_.shape[1]
    gene_corr = np.ones([num_genes])
    p_vals = np.zeros([num_genes])
    for g in range(num_genes):
        gene_corr[g],p_vals[g] = scipy.stats.pearsonr( target_row,X_[:,g])

    gene_rank =np.argsort(np.argsort(-gene_corr))
    gene_corr_plot=gene_corr.copy()
    gene_corr_plot[gene_ind]=np.nan

    low,high = np.nanpercentile(gene_corr,[pct,100-pct])

    high_bool = gene_corr>high
    low_bool = gene_corr<low

    high_names=adata.var['name'][high_bool]
    low_names=adata.var['name'][low_bool]


    if to_plot:
        # fig,ax=plt.subplots(1,2,figsize=(3,1),width_ratios=(5,1),dpi=300)
        fig,ax=plt.subplots(1,1,figsize=(3,1),dpi=300)
        ax=[ax]
        ax[0].scatter(gene_rank,gene_corr_plot,s=1)

        ax[0].axhline(high,color='k',linewidth=1)
        ax[0].axhline(low,color='k',linewidth=1)

        ax[0].axhline(high,color='k',linewidth=1)
        ax[0].axhline(low,color='k',linewidth=1)



    ax[0].set_xlabel('Rank')
    ax[0].set_ylabel('Pearson R')
    plt.show()


    FDR_p_vals = p_vals*np.nan
    results_dict = {'gene_corr':gene_corr, 'high_bool':high_bool,'low_bool':low_bool ,'high_names':high_names,'low_names':low_names,'gene_rank':gene_rank}
    results_dict['table']= pd.DataFrame({'gene_name':adata.var['name'],
                                         'gene_rank':gene_rank,
                                         'gene_corr':gene_corr,
                                         'p_vals':p_vals,
                                         'FDR_p_vals':FDR_p_vals,
                                         'high_bool':high_bool,
                                         'low_bool':low_bool}).set_index('gene_name').sort_values('gene_rank',axis=0)

    return results_dict, fig, ax

pct =10
dgc_02_nell_results,_,_ = find_corr_genes(dgc_02_adata,'Nell2',pct=pct)
dgc_02_apoe_results,_,_ = find_corr_genes(dgc_02_adata,'Apoe',pct=pct)

CA3_06_nell_results,_,_ = find_corr_genes(CA3_06_adata,'Nell2',pct=pct)
CA3_06_apoe_results,_,_ = find_corr_genes(CA3_06_adata,'Apoe',pct=pct)


In [None]:

def write_tables(res,prefix):
    high_table = res['table']
    high_table = high_table[high_table['high_bool']]
    high_table.to_csv(prefix+'_high_corr_names.csv')

    low_table = CA3_06_nell_results['table']
    low_table = high_table[low_table['low_bool']]
    low_table.to_csv(prefix+'_low_corr_names.csv')
    return None

os.chdir("C:/Users/dennis.tabuena/Dropbox (Gladstone)/0_Projects/_Hyper+Crisper/_Nell2_enrichment/")
write_tables(CA3_06_nell_results,'CA3_06_nell')
write_tables(CA3_06_apoe_results,'CA3_06_apoe')
write_tables(dgc_02_nell_results,'dgc_02_nell')
write_tables(dgc_02_apoe_results,'dgc_02_apoe')



In [None]:
apoe_ind = list(adata.var['name']).index('Apoe')
print(apoe_ind)
dgc_02_nell2xapoe_corr = dgc_02_nell_results['gene_corr'][apoe_ind]
CA3_06_nell2xapoe_corr = CA3_06_nell_results['gene_corr'][apoe_ind]


print('dgc_02',dgc_02_nell2xapoe_corr)
print('CA3_06',CA3_06_nell2xapoe_corr)

In [None]:
go_it = init_GO()

In [None]:
# try: os.makedirs('./gene_lists')
# except: None
# # dgc_01_double_high = [n for n in dgc_01_nell_results['high_names'] if n in dgc_01_apoe_results['high_names']]
# # dgc_01_double_low = [n for n in dgc_01_nell_results['low_names'] if n in dgc_01_apoe_results['low_names']]
# # pd.DataFrame(dgc_01_double_high).to_csv('./gene_lists/dgc_01_double_high.csv')
# # pd.DataFrame(dgc_01_double_low).to_csv('./gene_lists/dgc_01_double_low.csv')

# dgc_02_double_high = [n for n in dgc_02_nell_results['high_names'] if n in dgc_02_apoe_results['high_names']]
# dgc_02_double_low = [n for n in dgc_02_nell_results['low_names'] if n in dgc_02_apoe_results['low_names']]
# pd.DataFrame(dgc_02_double_high).to_csv('./gene_lists/dgc_02_double_high.csv')
# pd.DataFrame(dgc_02_double_low).to_csv('./gene_lists/dgc_02_double_low.csv')

# CA3_06_double_high = [n for n in CA3_06_nell_results['high_names'] if n in CA3_06_apoe_results['high_names']]
# CA3_06_double_low = [n for n in CA3_06_nell_results['low_names'] if n in CA3_06_apoe_results['low_names']]
# pd.DataFrame(CA3_06_double_high).to_csv('./gene_lists/CA3_06_double_high.csv')
# pd.DataFrame(CA3_06_double_low).to_csv('./gene_lists/CA3_06_double_low.csv')

In [None]:
!pip install goatools


In [None]:

from goatools.base import download_go_basic_obo
from goatools.base import download_ncbi_associations
from goatools.obo_parser import GODag
from goatools.anno.genetogo_reader import Gene2GoReader
from goatools.goea.go_enrichment_ns import GOEnrichmentStudyNS



In [None]:
def init_GO():
    # !pip install goatools --quiet


    '''Get Gene Lists and metadata from ncbi'''
    import os
    import urllib.request
    gene_list_url='https://github.com/dtabuena/Resources/raw/1e3f0ef18ba127b71a9e6b93f7624e3a28fe87c1/GO%20Files/gene_result.txt'
    urllib.request.urlretrieve(gene_list_url, 'gene_result.txt')
    scripts_path = [p for p in os.environ['PATH'].split(';') if 'Scripts' in p][0]
    ncbi_path = os.path.join(scripts_path,'ncbi_gene_results_to_python.py')
    !python $ncbi_path -o genes_ncbi_mus_musculus_proteincoding.py gene_result.txt
    from genes_ncbi_mus_musculus_proteincoding import GENEID2NT as GeneID2nt_mus



    '''Get Key Funcs'''
    from goatools.base import download_go_basic_obo
    from goatools.base import download_ncbi_associations
    from goatools.obo_parser import GODag
    from goatools.anno.genetogo_reader import Gene2GoReader
    from goatools.goea.go_enrichment_ns import GOEnrichmentStudyNS

    '''Download Current Go Annotations'''
    obo_fname = download_go_basic_obo()
    fin_gene2go = download_ncbi_associations()
    obodag = GODag("go-basic.obo")




    '''Get Mapper from Symbol to Gene and Inv'''
    mapper = {}
    for key in GeneID2nt_mus:
        mapper[GeneID2nt_mus[key].Symbol] = GeneID2nt_mus[key].GeneID
    inv_map = {v: k for k, v in mapper.items()}



    '''Read NCBI's gene2go. Store annotations in a list of namedtuples '''
    objanno = Gene2GoReader(fin_gene2go, taxids=[10090])
    # Get namespace2association where:
    #    namespace is:
    #        BP: biological_process
    #        MF: molecular_function
    #        CC: cellular_component
    #    assocation is a dict:
    #        key: NCBI GeneID
    #        value: A set of GO IDs associated with that gene
    ns2assoc = objanno.get_ns2assc()


    '''Create a GO Object'''
    goeaobj = GOEnrichmentStudyNS(
            GeneID2nt_mus.keys(), # List of mouse protein-coding genes
            ns2assoc, # geneid/GO associations
            obodag, # Ontologies
            propagate_counts = False,
            alpha = 0.05, # default significance cut-off
            methods = ['fdr_bh']) # defult multipletest correction method


    ''' PASS '''
    GO_items = []
    temp = goeaobj.ns2objgoea['BP'].assoc
    for item in temp:
        GO_items += temp[item]
    temp = goeaobj.ns2objgoea['CC'].assoc
    for item in temp:
        GO_items += temp[item]
    temp = goeaobj.ns2objgoea['MF'].assoc
    for item in temp:
        GO_items += temp[item]



    def go_it(test_genes):
        ''' Quick Access Function for doing the GO associations '''
        logging.info(f'input genes: {len(test_genes)}')
        mapped_genes = []
        for gene in test_genes:
            try:
                mapped_genes.append(mapper[gene])
            except:
                pass
        logging.info(f'mapped genes: {len(mapped_genes)}')
        goea_results_all = goeaobj.run_study(mapped_genes)
        goea_results_sig = [r for r in goea_results_all if r.p_fdr_bh < 0.05]
        GO = pd.DataFrame(list(map(lambda x: [x.GO, x.goterm.name, x.goterm.namespace, x.p_uncorrected, x.p_fdr_bh,\
                    x.ratio_in_study[0], x.ratio_in_study[1], GO_items.count(x.GO), list(map(lambda y: inv_map[y], x.study_items)),\
                    ], goea_results_sig)), columns = ['GO', 'term', 'class', 'p', 'p_corr', 'n_genes',\
                                                        'n_study', 'n_go', 'study_genes'])
        GO = GO[GO.n_genes > 1]
        return GO

    return go_it







In [None]:
GeneID2nt_mus

In [None]:
obo_fname = download_go_basic_obo()

In [None]:
obo_fname

In [None]:
!pip install goatools

In [None]:
import scanpy as sc

In [None]:
!pip install goatools

from goatools.base import download_go_basic_obo
from goatools.base import download_ncbi_associations
from goatools.obo_parser import GODag
from goatools.anno.genetogo_reader import Gene2GoReader
from goatools.goea.go_enrichment_ns import GOEnrichmentStudyNS

import os
import urllib.request
gene_list_url='https://github.com/dtabuena/Resources/raw/1e3f0ef18ba127b71a9e6b93f7624e3a28fe87c1/GO%20Files/gene_result.txt'
urllib.request.urlretrieve(gene_list_url, 'gene_result.txt')
scripts_path = [p for p in os.environ['PATH'].split(';') if 'Scripts' in p][0]
ncbi_path = os.path.join(scripts_path,'ncbi_gene_results_to_python.py')
!python $ncbi_path -o genes_ncbi_mus_musculus_proteincoding.py gene_result.txt

In [None]:
from goatools.base import download_go_basic_obo
from goatools.base import download_ncbi_associations
from goatools.obo_parser import GODag
from goatools.anno.genetogo_reader import Gene2GoReader
from goatools.goea.go_enrichment_ns import GOEnrichmentStudyNS

In [None]:
obo_fname = download_go_basic_obo()
fin_gene2go = download_ncbi_associations()
obodag = GODag("go-basic.obo")

In [None]:
mapper = {}

for key in GeneID2nt_mus:
    mapper[GeneID2nt_mus[key].Symbol] = GeneID2nt_mus[key].GeneID

inv_map = {v: k for k, v in mapper.items()}

In [None]:
#run one time to initialize

# Read NCBI's gene2go. Store annotations in a list of namedtuples
objanno = Gene2GoReader(fin_gene2go, taxids=[10090])
# Get namespace2association where:
#    namespace is:
#        BP: biological_process
#        MF: molecular_function
#        CC: cellular_component
#    assocation is a dict:
#        key: NCBI GeneID
#        value: A set of GO IDs associated with that gene
ns2assoc = objanno.get_ns2assc()

In [None]:
#run one time to initialize
goeaobj = GOEnrichmentStudyNS(
        GeneID2nt_mus.keys(), # List of mouse protein-coding genes
        ns2assoc, # geneid/GO associations
        obodag, # Ontologies
        propagate_counts = False,
        alpha = 0.05, # default significance cut-off
        methods = ['fdr_bh']) # defult multipletest correction method

In [None]:
#run one time to initialize
GO_items = []

temp = goeaobj.ns2objgoea['BP'].assoc
for item in temp:
    GO_items += temp[item]


temp = goeaobj.ns2objgoea['CC'].assoc
for item in temp:
    GO_items += temp[item]


temp = goeaobj.ns2objgoea['MF'].assoc
for item in temp:
    GO_items += temp[item]



In [None]:
def go_it(test_genes):
    print(f'input genes: {len(test_genes)}')

    mapped_genes = []
    for gene in test_genes:
        try:
            mapped_genes.append(mapper[gene])
        except:
            pass
    print(f'mapped genes: {len(mapped_genes)}')

    goea_results_all = goeaobj.run_study(mapped_genes)
    goea_results_sig = [r for r in goea_results_all if r.p_fdr_bh < 0.05]
    GO = pd.DataFrame(list(map(lambda x: [x.GO, x.goterm.name, x.goterm.namespace, x.p_uncorrected, x.p_fdr_bh,\
                   x.ratio_in_study[0], x.ratio_in_study[1], GO_items.count(x.GO), list(map(lambda y: inv_map[y], x.study_items)),\
                   ], goea_results_sig)), columns = ['GO ID', 'GO term', 'class', 'p', 'p_corr', 'n_genes',\
                                                    'n_study', 'n_go', 'study_genes'])

    GO = GO[GO.n_genes > 1]
    GO['Percent_Enriched'] = GO.n_genes/GO.n_go
    return GO

In [None]:
df_dgc_01_double_high = go_it(dgc_01_double_high)
df_dgc_02_double_high = go_it(dgc_02_double_high)
df_CA3_06_double_high = go_it(CA3_06_double_high)

df_dgc_01_double_low = go_it(dgc_01_double_low)
df_dgc_02_double_low = go_it(dgc_02_double_low)
df_CA3_06_double_low = go_it(CA3_06_double_low)

In [None]:
try: os.makedirs('./Nell2ApoE_go_lists')
except: None

df_dgc_02_double_low.to_csv('./Nell2ApoE_go_lists/df_dgc_02_double_low.csv')
df_dgc_02_double_high.to_csv('./Nell2ApoE_go_lists/df_dgc_02_double_high.csv')
df_CA3_06_double_low.to_csv('./Nell2ApoE_go_lists/df_CA3_06_double_low.csv')
df_CA3_06_double_high.to_csv('./Nell2ApoE_go_lists/df_CA3_06_double_high.csv')

In [None]:
common_high = [g for g in df_dgc_02_double_high['GO term'] if g in list(df_CA3_06_double_high['GO term'])]
common_low = [g for g in df_dgc_02_double_low['GO term'] if g in list(df_CA3_06_double_low['GO term'])]


print(common_high)
print(common_low)

In [None]:
# display(df_dgc_01_double_high)
# display(df_dgc_01_double_low)


display(df_dgc_02_double_high)
display(df_dgc_02_double_low)

In [None]:

display(df_CA3_06_double_high)
display(df_CA3_06_double_low)

In [None]:
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib import cm
import seaborn as sns
import textwrap


In [None]:

def set_font_all(FS):
    plt.rcParams.update({'font.size': FS, 'font.family': 'arial','axes.linewidth':.5,
                         'xtick.major.width': 0.5,'ytick.major.width': 0.5,
                         'figure.titlesize':FS,'axes.titlesize': FS,'xtick.labelsize': FS,
                         'ytick.labelsize': FS,'axes.labelsize': FS,'legend.fontsize': FS})
    return None
set_font_all(6)

In [None]:
print(cm_mapper.to_rgba(df_dgc_01_double_high.p_corr.values))

In [None]:
pct = 1
dgc_02_nell_results, fig, ax = find_corr_genes(dgc_02_adata,'Nell2',pct=pct)
ax[0].title('Correlation with Nell2 in DGC_02')
plt.show()
CA3_06_nell_results, fig, ax = find_corr_genes(CA3_06_adata,'Nell2',pct=pct)
ax[0].title('Correlation with Nell2 in CA3_06')
plt.show()
# df_dgc_01_double_high = go_it(dgc_01_double_high)
# df_dgc_01_double_high = go_it(dgc_01_double_high)

In [None]:
plt.scatter(dgc_02_nell_results['gene_corr'],CA3_06_nell_results['gene_corr'])

In [None]:

dgc_02_nell_results['low_names']



df_dgc_02_GO_high = go_it(dgc_02_nell_results['high_names'])
df_dgc_02_GO_low = go_it(dgc_02_nell_results['low_names'])
df_CA3_06_GO_high = go_it(CA3_06_nell_results['high_names'])
df_CA3_06_GO_low = go_it(CA3_06_nell_results['low_names'])

In [None]:
display(df_dgc_02_GO_high)
display(df_CA3_06_GO_high)

In [None]:
try: os.makedirs('./Nell2_go_lists')
except: None

df_dgc_02_GO_high.to_csv('./Nell2_go_lists/df_dgc_02_GO_high.csv')
df_dgc_02_GO_low.to_csv('./Nell2_go_lists/df_dgc_02_GO_low.csv')
df_CA3_06_GO_high.to_csv('./Nell2_go_lists/df_CA3_06_GO_high.csv')
df_CA3_06_GO_low.to_csv('./Nell2_go_lists/df_CA3_06_GO_low.csv')

In [None]:
df_dgc_02_GO_high = df_dgc_02_GO_high.set_index('GO ID')
df_dgc_02_GO_low = df_dgc_02_GO_low.set_index('GO ID')
df_CA3_06_GO_high = df_CA3_06_GO_high.set_index('GO ID')
df_CA3_06_GO_low = df_CA3_06_GO_low.set_index('GO ID')

In [None]:
display(df_dgc_02_GO_high.head(50))

In [None]:
df_dgc_02_GO_high.index

In [None]:
common_high = [g for g  in df_dgc_02_GO_high.index if g in list(df_CA3_06_GO_high.index)]
print(len(common_high))

common_high = [c for c in common_high if df_dgc_02_GO_high.loc[c,'class'] not in ['cellular_component']]
print(len(common_high))

# common_low = [g for g  in df_dgc_02_GO_low["GO ID"] if g in list(df_CA3_06_GO_low["GO ID"])]
# print(common_low)

In [None]:
fig_go_terms,ax=plt.subplots(1,figsize=(1,6),dpi=300)


cmap = mpl.cm.viridis_r
norm = mpl.colors.Normalize(vmin = np.log10(df_dgc_02_GO_high.loc[common_high,'p_corr'].min()), vmax = np.log10(df_dgc_02_GO_high.loc[common_high,'p_corr'].max()))
cm_mapper = cm.ScalarMappable(norm = norm, cmap = cm.viridis_r)


for i,gi in enumerate(common_high):
    x=1
    y=i
    s = df_dgc_02_GO_high.loc[gi,'Percent_Enriched']
    c = cm_mapper.to_rgba( np.log10(df_dgc_02_GO_high.loc[gi,'p_corr']) )
    ax.scatter(1,y,s=s*100,color = c)

    s = df_CA3_06_GO_high.loc[gi,'Percent_Enriched']
    c = cm_mapper.to_rgba( np.log10(df_CA3_06_GO_high.loc[gi,'p_corr']) )
    ax.scatter(2,y,s=s*100,color = c)

ax.set_yticks(ticks=range(len(common_high)),labels=df_dgc_02_GO_high.loc[common_high,'GO term'])
ax.set_xticks([1,2],['DGC','CA3'])
ax.set_xlim([0.5,2.5])
plt.tight_layout()


fig_color_scale,ax=plt.subplots(1,figsize=(.25,1),dpi=300)
cbl = mpl.colorbar.ColorbarBase(ax, cmap = cmap, norm = norm, orientation = 'vertical')
ax.set_ylabel('-log10(p)')
plt.tight_layout()


fig_size_scale,ax=plt.subplots(1,figsize=(.25,1),dpi=300)
ax.scatter([1,1,1],[1,2,3],s=np.array([.1,.2,.3])*100,color='k')
ax.set_xticks([])
ax.set_yticks([1,2,3],np.array(np.array([.1,.2,.3])*100,int))
ax.set_ylim(0,4)
ax.set_ylabel('Percent Enrichment')
plt.tight_layout()

fig_go_terms.savefig('./fig_go_terms.svg',format='svg',bbox_inches="tight")
fig_color_scale.savefig('./fig_color_scale.svg',format='svg',bbox_inches="tight")
fig_size_scale.savefig('./fig_size_scale.svg',format='svg',bbox_inches="tight")

In [None]:
common_high = [g for g  in df_dgc_02_GO_high["GO ID"] if g in list(df_CA3_06_GO_high["GO ID"])]

In [None]:
meta_df

In [None]:
# CA3 counts
# CA3_06_adata.obs
CA3_06_adata

In [None]:


adata_GABA.obs['High_E'] ='low'
adata_GABA.obs['High_E'][z_score(sc.get.obs_df(adata_GABA,'Apoe'))>2] = 'high'


# display(adata_GABA.obs.head(4))
ages = list(set(adata_GABA.obs['age_bin']))
clusters = list(set(adata_GABA.obs['louvain']))
etype = list(set(adata_GABA.obs['E_type']))
e_level = list(set(adata_GABA.obs['High_E']))

count_dict_HE = {}
for a in ages:
    for c in ['5','2']:
        for t in etype:
            for l in e_level:
                count_dict_HE[a+"_"+c+"c_"+t+'_'+l] = np.sum(np.logical_and(np.logical_and(adata_GABA.obs['age_bin']==a, adata_GABA.obs['louvain']==c),
                                                                           np.logical_and(adata_GABA.obs['E_type']==t, adata_GABA.obs['High_E']==l)))


cell_count_HE_df = pd.DataFrame({'Group': count_dict_HE.keys(), 'Counts':count_dict_HE.values()}).set_index('Group')
display(cell_count_HE_df)
cell_count_HE_df.to_csv('./cell_count_HE_df.csv')


