In [None]:
# Adapted from example notebooks provided at: https://alleninstitute.github.io/abc_atlas_access/intro.html

import pandas as pd
import numpy as np
import anndata
from pathlib import Path
import matplotlib.pyplot as plt
import time

from abc_atlas_access.abc_atlas_cache.abc_project_cache import AbcProjectCache
from abc_atlas_access.abc_atlas_cache.anndata_utils import get_gene_data

In [None]:
basePath = 'Z:\\Common\\Transcriptomics\\ABC_Atlas' # Change to relevant folder path
download_base = Path(basePath)
abc_cache = AbcProjectCache.from_cache_dir(download_base)

abc_cache.current_manifest

In [None]:
cell = abc_cache.get_metadata_dataframe(directory='WMB-10X', file_name='cell_metadata_with_cluster_annotation')
cell.set_index('cell_label', inplace=True)
print("Number of cells = ", len(cell))

In [None]:
# Filter cell dataframe to cells of interest
cs = ["17 MH-LH Glut"] # class
CL = cell[cell['class'].isin(cs)]
print("Number of cells = ", len(CL))
CL.head(5)

In [None]:
matrices = cell.groupby(['dataset_label', 'feature_matrix_label'])[['library_label']].count()
matrices.columns  = ['cell_count']
matrices

In [None]:
matrices = CL.groupby(['dataset_label', 'feature_matrix_label'])[['library_label']].count()
matrices.columns  = ['CL_count']
matrices

In [None]:
gene = abc_cache.get_metadata_dataframe(directory='WMB-10X', file_name='gene')
gene.set_index('gene_identifier', inplace=True)
print("Number of genes = ", len(gene))
gene

In [None]:
df = pd.read_csv(basePath + '\\input\\WMB_genes_Hb.csv')
gn = df['gene_symbol'].tolist()
print("Number of selected genes = ", len(gn))
gn

In [None]:
gene_data = get_gene_data(
    abc_atlas_cache = abc_cache,
    all_cells = CL,
    all_genes = gene,
    selected_genes = gn
)

In [None]:
gene_data[pd.notna(gene_data[gene_data.columns[0]])]
gene_data = gene_data[gn]
gene_data

In [None]:
CL_with_genes = CL.join(gene_data)
CL_with_genes

In [None]:
def aggregate_by_metadata(df, gnames, value, sort = False):

    # Logic to show groups above the entered one:
    if (value == "cluster"):
        value = ["class", "subclass", "supertype", "cluster"]
    elif (value == "supertype"):
        value = ["class", "subclass", "supertype"]
    elif (value == "subclass"):
        value = ["class", "subclass"]

    grouped = df.groupby(value)[gnames].mean()
    if sort:
        grouped = grouped.sort_values(by=gnames[0], ascending=False)
    return grouped

In [None]:
def plot_heatmap(df, fig_width=14, fig_height=10, cmap=plt.cm.magma_r) :

    arr = df.to_numpy(dtype='float')

    fig, ax = plt.subplots()
    fig.set_size_inches(fig_width, fig_height)

    im = ax.imshow(arr, cmap=cmap, aspect='auto', vmin=0, vmax=6)
    xlabs = df.columns.values
    ylabs = df.index.values
    #xlabs = df.index.values
    #ylabs = df.columns.values

    ax.set_xticks(range(len(xlabs)))
    ax.set_xticklabels(xlabs)

    ax.set_yticks(range(len(ylabs)))
    res = ax.set_yticklabels(ylabs)
    
    return im

In [None]:
agg = aggregate_by_metadata(CL_with_genes, gene_data.columns, 'subclass')
res = plot_heatmap(agg, 14, 14)
agg.to_csv(basePath + '\\output\\Hb_Subclass_AveExpressionMarkers.csv') 
plt.show()

In [None]:
agg = aggregate_by_metadata(CL_with_genes, gene_data.columns, 'supertype')
res = plot_heatmap(agg, 14, 14)
agg.to_csv(basePath + '\\output\\Hb_Supertype_AveExpressionMarkers.csv') 
plt.show()

In [None]:
agg = aggregate_by_metadata(CL_with_genes, gene_data.columns, 'cluster')
res = plot_heatmap(agg, 14, 14)
agg.to_csv(basePath + '\\output\\Hb_Cluster_AveExpressionMarkers.csv') 
plt.show()

In [None]:
Counts = CL['subclass'].value_counts()
Counts.to_csv(basePath + '\\output\\Subclass_Counts.csv') 
Counts = CL['supertype'].value_counts()
Counts.to_csv(basePath + '\\output\\Supertype_Counts.csv')
Counts = CL['cluster'].value_counts()
Counts.to_csv(basePath + '\\output\\Cluster_Counts.csv') 