In [None]:
import pandas as pd
import numpy as np
import anndata
from pathlib import Path
import matplotlib.pyplot as plt

from abc_atlas_access.abc_atlas_cache.abc_project_cache import AbcProjectCache
from abc_atlas_access.abc_atlas_cache.anndata_utils import get_gene_data

In [None]:
basePath = 'Z:\\Common\\Transcriptomics\\ABC_Atlas'
download_base = Path(basePath)
abc_cache = AbcProjectCache.from_cache_dir(download_base)

abc_cache.current_manifest

In [None]:
# Load the cell metadata.
cell = abc_cache.get_metadata_dataframe(
    directory='WHB-10Xv3',
    file_name='cell_metadata',
    dtype={'cell_label': str}
)
cell.set_index('cell_label', inplace=True)
print("Number of cells = ", len(cell))

# Load the cluster memembership metadata and combine the data with the cell data.
membership = abc_cache.get_metadata_dataframe(
    directory='WHB-taxonomy',
    file_name='cluster_to_cluster_annotation_membership'
)

term_sets = abc_cache.get_metadata_dataframe(directory='WHB-taxonomy', file_name='cluster_annotation_term_set').set_index('label')
cluster_details = membership.groupby(['cluster_alias', 'cluster_annotation_term_set_name'])['cluster_annotation_term_name'].first().unstack()
cluster_details = cluster_details[term_sets['name']] # order columns
cluster_details.fillna('Other', inplace=True)

cluster_details.sort_values(['supercluster', 'cluster', 'subcluster'], inplace=True)
cluster_colors = membership.groupby(['cluster_alias', 'cluster_annotation_term_set_name'])['color_hex_triplet'].first().unstack()
cluster_colors = cluster_colors[term_sets['name']]
cluster_colors.sort_values(['supercluster', 'cluster', 'subcluster'], inplace=True)
cluster_colors

roi = abc_cache.get_metadata_dataframe(directory='WHB-10Xv3', file_name='region_of_interest_structure_map')
roi.set_index('region_of_interest_label', inplace=True)
roi.rename(columns={'color_hex_triplet': 'region_of_interest_color'},
           inplace=True)

del membership
del term_sets

cell_ext = cell.join(cluster_details, on='cluster_alias')
# cell_ext = cell_ext.join(cluster_colors, on='cluster_alias', rsuffix='_color')
# cell_ext = cell_ext.join(roi[['region_of_interest_color']], on='region_of_interest_label')

del cluster_details
del cluster_colors
del roi

cell_ext.head(5) 

In [None]:
abc_cache.list_metadata_files('WHB-10Xv3')

In [None]:
gene = abc_cache.get_metadata_dataframe(directory='WHB-10Xv3', file_name='gene')
gene.set_index('gene_identifier', inplace=True)
print("Number of genes = ", len(gene))
gene.head(5)

In [None]:
ml = ["WHB-10Xv3-Neurons"] # matrix label
sc = ["Hippocampal CA1-3", "Hippocampal CA4", "Hippocampal dentate gyrus"] # supercluster
an = ["Hippocampus"] # anatomical division

# Filter cell dataframe to cells of interest
CL = cell_ext[cell_ext['feature_matrix_label'].isin(ml)]
CL = CL[CL['supercluster'].isin(sc)]
CL = CL[CL['anatomical_division_label'].isin(an)]

print("Number of cells = ", len(CL))
CL.head(5)

In [None]:
df = pd.read_csv(basePath + '\\inputs\\WHB_genes_GRIN.csv')
gn = df['gene_symbol'].tolist()

gene_data = get_gene_data(
    abc_atlas_cache = abc_cache,
    all_cells = CL,
    all_genes = gene,
    selected_genes = gn
)

In [None]:
gene_data[pd.notna(gene_data[gene_data.columns[0]])]
gene_data = gene_data[gn]
gene_data.head(5)

In [None]:
# Filter expression data by supercluster
CA13 = gene_data[CL["supercluster"] == sc[0]]
CA4  = gene_data[CL["supercluster"] == sc[1]]
DG   = gene_data[CL["supercluster"] == sc[2]]

# Export expression data to CSVs
CA13.to_csv(basePath + '\\outputs\\WHB_Supercluster_Expression_CA1-3-PC_GRIN.csv') 
CA4.to_csv(basePath + '\\outputs\\WHB_Supercluster_Expression_CA4-PC_GRIN.csv') 
DG.to_csv(basePath + '\\outputs\\WHB_Supercluster_Expression_DG-GC_GRIN.csv') 