In [None]:
import os
from pathlib import Path
import ast

import numpy as np
import pandas as pd
import anndata as ad
import scanpy as sc

import matplotlib.pyplot as plt
import seaborn as sns
import PyComplexHeatmap as pch
plt.rcParams['figure.dpi'] = 300
plt.rcParams['savefig.dpi'] = 300
plt.rcParams['figure.figsize'] = (4, 4)
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = 'Arial'
plt.rcParams['axes.facecolor'] = 'white'

In [None]:
# Microglia - Hexb
# Astrocytes - Aqp4, s100b
# Neurons - DRD1 / DRD2 (for striatum)
genes_of_interest = [""]

In [None]:
# Downsample Function
def _downsample_reference(
    ref_adata : ad.AnnData,
    cluster_col : str,
    max_cluster_size: int = 3000,
    min_cluster_size: int = 0,
):
    """
    Remove clusters from the reference that have less than min_cluster_size cells.
    Downsample larger clusters that have more than max_cluster_size cells.
    """
    from spida.utilities._ad_utils import _downsample_ref_clusters, _remove_small_clusters
    if min_cluster_size > 0: 
        ref_adata = _remove_small_clusters(ref_adata, cluster_col, min_cells=min_cluster_size)
    if max_cluster_size > 0:
        ref_adata = _downsample_ref_clusters(ref_adata, cluster_col, max_cells=max_cluster_size)
    return ref_adata

# Subclass

## RNA

In [None]:
# ad_path = "/home/x-aklein2/projects/aklein/BICAN/BG/data/BICAN_BG_PFV8_annotated_v5.h5ad"
ad_path = "/home/x-aklein2/projects/aklein/BICAN/BG/data/BICAN_BG_CPS.h5ad"
adata = ad.read_h5ad(ad_path)
# Modifications I need to make wayyy upstream 
# adata.obs['Group'] = adata.obs['Group'].fillna("unknown")
# msn_dtypes = ["STR D1 MSN", "STR D2 MSN", "STR Hybrid MSN"]
# adata.obs.loc[(adata.obs['Subclass'].isin(msn_dtypes)) & (adata.obs['brain_region'] == "GP"), "Subclass"] = "unknown"
# adata.obs.loc[(adata.obs['Subclass'].isin(msn_dtypes)) & (adata.obs['brain_region'] == "GP"), "Group"] = "unknown"

spatial_genes = adata.var_names.tolist()
adata

In [None]:
adata_ds = _downsample_reference(adata, cluster_col="Subclass", max_cluster_size=2000, min_cluster_size=50)
adata_ds.obs['Subclass'] = adata_ds.obs['Subclass'].cat.remove_unused_categories()

### Calculated Markers

#### ALL CELL TYPES

In [None]:
# sc.tl.rank_genes_groups(adata_ds, groupby="Group", method="wilcoxon", n_genes=adata_ds.shape[1], layer="volume_norm")
# sc.tl.dendrogram(adata_ds, groupby="Group")
# sc.pl.rank_genes_groups_heatmap(adata_ds, n_genes=3, groupby="Group", show_gene_labels=False, figsize=(6,8))

In [None]:
sc.tl.rank_genes_groups(adata_ds, groupby="Subclass", method="wilcoxon", n_genes=adata_ds.shape[1], layer="volume_norm")
df = sc.get.rank_genes_groups_df(adata_ds, group=None)
df.head()

idx = df.groupby('group')['logfoldchanges'].nlargest(5).reset_index()['level_1']
df_top5 = df.loc[idx].copy()
keep_idx = df_top5['names'].drop_duplicates(keep='first').index
df_top5 = df_top5.loc[keep_idx].copy()

In [None]:
df_row = adata_ds.obs[['Subclass']].copy()
df_row = df_row.sort_values('Subclass')
# df_row.head()

df_col = df_top5.set_index('names').sort_values(by="group")
# df_col.head()

df_expr = pd.DataFrame(adata_ds.X.toarray(), columns=adata_ds.var_names, index=adata_ds.obs_names).loc[:, df_top5['names'].tolist()]
df_expr = df_expr.loc[df_row.index, df_col.index.tolist()]
df_expr_norm = df_expr.subtract(df_expr.min(axis=1), axis=0).div(df_expr.quantile(0.99, axis=1) - df_expr.min(axis=1), axis=0)
# df_expr = (df_expr - df_expr.mean()) / df_expr.std()
# df_expr.head()

colors_dict = adata.uns['Subclass_palette']
colors_dict = {k: v for k, v in colors_dict.items() if k in df_row['Subclass'].unique()}

In [None]:
col_ha = pch.HeatmapAnnotation(
    label=pch.anno_label(
        df_col['group'], merge=True, rotation=90, extend=True, 
        colors=colors_dict, adjust_color=True, luminance=0.75,
        relpos=(0.5,0)
    ),
    Group=pch.anno_simple(df_col['group'], colors=colors_dict),
    verbose=1, axis=1, plot_legend=False
)

left_ha = pch.HeatmapAnnotation(
    label=pch.anno_label(
        df_row['Subclass'], merge=True, rotation=0, extend=True, 
        colors=colors_dict, adjust_color=True, luminance=0.75,
        relpos=(1,0.5)
    ),
    Subclass=pch.anno_simple(df_row['Subclass'], colors=colors_dict),
    verbose=1, axis=0
)

plt.figure(figsize=(6,8))
cm = pch.ClusterMapPlotter(data=df_expr_norm,
                           top_annotation=col_ha,
                           left_annotation=left_ha,
                           row_cluster=False,
                           col_cluster=False,
                           row_dendrogram=False,
                           label="Expression",
                           cmap='Purples',
                           rasterized=True, 
                           xlabel="Genes",
                           vmax=1
)
 #                          vmin=0, vmax=df_expr.values.max()*0.5)

####  NEURONS ONLY

In [None]:
adata_neu = adata[adata.obs['neuron_type'] == 'Neuron'].copy()
adata_neu_ds = _downsample_reference(adata_neu, cluster_col="Subclass", max_cluster_size=2000, min_cluster_size=50)
adata_neu_ds.obs['Subclass'] = adata_neu_ds.obs['Subclass'].cat.remove_unused_categories()

sc.tl.rank_genes_groups(adata_neu_ds, groupby="Subclass", method="wilcoxon", n_genes=adata_neu.shape[1])
# sc.tl.dendrogram(adata_neu_ds, groupby="Subclass")
df = sc.get.rank_genes_groups_df(adata_neu_ds, group=None)
df.head()

idx = df.groupby('group')['logfoldchanges'].nlargest(5).reset_index()['level_1']
df_top5 = df.loc[idx].copy()
keep_idx = df_top5['names'].drop_duplicates(keep='first').index
df_top5 = df_top5.loc[keep_idx].copy()

In [None]:
df_row = adata_neu_ds.obs[['Subclass']].copy()
df_row = df_row.sort_values('Subclass')
# df_row.head()

df_col = df_top5.set_index('names').sort_values(by="group")
# df_col.head()

df_expr = pd.DataFrame(adata_neu_ds.X.toarray(), columns=adata_neu_ds.var_names, index=adata_neu_ds.obs_names).loc[:, df_top5['names'].tolist()]
df_expr = df_expr.loc[df_row.index, df_col.index.tolist()]
df_expr_norm = df_expr.subtract(df_expr.min(axis=1), axis=0).div(df_expr.quantile(0.99, axis=1) - df_expr.min(axis=1), axis=0)
# df_expr.head()

colors_dict = adata.uns['Subclass_palette']
colors_dict = {k: v for k, v in colors_dict.items() if k in df_row['Subclass'].unique()}

In [None]:
col_ha = pch.HeatmapAnnotation(
    label=pch.anno_label(
        df_col['group'], merge=True, rotation=90, extend=True, 
        colors=colors_dict, adjust_color=True, luminance=0.75,
        relpos=(0.5,0)
    ),
    Group=pch.anno_simple(df_col['group'], colors=colors_dict),
    verbose=1, axis=1, plot_legend=False
)

left_ha = pch.HeatmapAnnotation(
    label=pch.anno_label(
        df_row['Subclass'], merge=True, rotation=0, extend=True, 
        colors=colors_dict, adjust_color=True, luminance=0.75,
        relpos=(1,0.5)
    ),
    Subclass=pch.anno_simple(df_row['Subclass'], colors=colors_dict),
    verbose=1, axis=0
)

plt.figure(figsize=(6,8))
cm = pch.ClusterMapPlotter(data=df_expr_norm,
                           top_annotation=col_ha,
                           left_annotation=left_ha,
                           row_cluster=False,
                           col_cluster=False,
                           row_dendrogram=False,
                           label="Expression",
                           cmap='Purples',
                           rasterized=True, 
                           xlabel="Genes",
                           vmin=0, vmax=1)

### HMBA Markers

In [None]:
df_markers = pd.read_excel("/anvil/projects/x-mcb130189/Wubin/BG/metadata/BG_markers.xlsx")
df_markers.head()

# Get Markers
rna_markers = []
df_markers = df_markers[(df_markers["Level"] == "Subclass") & (df_markers["Parent"] != "Nonneuron")]
for subclass in df_markers.iterrows():
    genes = ast.literal_eval(subclass[1]['RNA_markers'])
    _sc = subclass[1]['CellType']
    for _gene in genes: 
        if _gene in spatial_genes:
            rna_markers.append((_sc, _gene))

# Remove Duplicated
df_rna_markers = pd.DataFrame(rna_markers, columns=['Subclass', 'Gene'])
print(df_rna_markers.shape)
df_rna_markers.drop_duplicates(['Gene'], keep='first', inplace=True)
print(df_rna_markers.shape)

# Getting only the subclasses in the MERFISH annotations
df_rna_markers = df_rna_markers[df_rna_markers['Subclass'].isin(adata_neu_ds.obs['Subclass'].cat.categories)]

In [None]:
df_row = adata_neu_ds.obs[['Subclass']].copy()
df_row = df_row.sort_values('Subclass')
# df_row.head()

df_col = df_rna_markers.set_index('Gene').sort_values(by="Subclass")
# df_col.head()

df_expr = pd.DataFrame(adata_neu_ds.X.toarray(), columns=adata_neu_ds.var_names, index=adata_neu_ds.obs_names).loc[:, df_rna_markers['Gene'].tolist()]
df_expr = df_expr.loc[df_row.index, df_col.index.tolist()]
df_expr_norm = df_expr.subtract(df_expr.min(axis=1), axis=0).div(df_expr.quantile(0.99, axis=1) - df_expr.min(axis=1), axis=0)
# df_expr.head()

colors_dict = adata.uns['Subclass_palette']
colors_dict = {k: v for k, v in colors_dict.items() if k in df_row['Subclass'].unique()}

In [None]:
col_ha = pch.HeatmapAnnotation(
    label=pch.anno_label(
        df_col['Subclass'], merge=True, rotation=90, extend=True, 
        colors=colors_dict, adjust_color=True, luminance=0.75,
        relpos=(0.5,0)
    ),
    Group=pch.anno_simple(df_col['Subclass'], colors=colors_dict),
    verbose=1, axis=1, plot_legend=False
)

left_ha = pch.HeatmapAnnotation(
    label=pch.anno_label(
        df_row['Subclass'], merge=True, rotation=0, extend=True, 
        colors=colors_dict, adjust_color=True, luminance=0.75,
        relpos=(1,0.5)
    ),
    Subclass=pch.anno_simple(df_row['Subclass'], colors=colors_dict),
    verbose=1, axis=0
)

plt.figure(figsize=(6,8))
cm = pch.ClusterMapPlotter(data=df_expr_norm,
                           top_annotation=col_ha,
                           left_annotation=left_ha,
                           row_cluster=False,
                           col_cluster=False,
                           row_dendrogram=False,
                           label="Expression",
                           cmap='Purples',
                           rasterized=True, 
                           xlabel="Genes",
                           vmin=0, vmax=1)

## Methylation 

In [None]:
spatial_mch = ad.read_h5ad("/home/x-aklein2/projects/aklein/BICAN/BG/data/methylation_2/BG_mCH_Imp_SubR.h5ad")
for k, v in adata.uns.items(): 
    if "colors" in k or "palette" in k: 
        spatial_mch.uns[k] = v
spatial_mch.obsm['spatial'] = spatial_mch.obs[['CENTER_X', 'CENTER_Y']].values
spatial_mch

In [None]:
common_genes = list(set(adata.var_names).intersection(set(spatial_mch.var_names)))
spatial_mch = spatial_mch[:, common_genes].copy()

In [None]:
spatial_mch = spatial_mch[spatial_mch.obs['neuron_type'] == "Neuron"]
spatial_mch_ds = _downsample_reference(spatial_mch, cluster_col="Subclass", max_cluster_size=2000, min_cluster_size=50)
spatial_mch_ds = spatial_mch_ds[spatial_mch_ds.obs['Subclass'] != 'unknown'].copy()

In [None]:
spatial_mch_ds

### Calculated Markers

In [None]:
df_mch_row = spatial_mch_ds.obs[['Subclass']].copy()
df_mch_row = df_mch_row.sort_values('Subclass')
df_mch_row.head()

df_top5 = df_top5.loc[df_top5['names'].isin(common_genes)]
df_top5 = df_top5.loc[df_top5['group'].isin(spatial_mch_ds.obs['Subclass'])]
df_col = df_top5.set_index('names').sort_values(by="group")
df_col.head()

df_mch = pd.DataFrame(spatial_mch_ds.X, columns=spatial_mch_ds.var_names, index=spatial_mch_ds.obs_names)
df_mch = df_mch.loc[:, df_top5['names'].tolist()]
df_mch = df_mch.loc[df_mch_row.index, df_col.index.tolist()]
df_mch.head()

colors_dict = adata.uns['Subclass_palette']
colors_dict = {k: v for k, v in colors_dict.items() if k in df_mch_row['Subclass'].unique()}

In [None]:
col_ha = pch.HeatmapAnnotation(
    label=pch.anno_label(
        df_col['group'], merge=True, rotation=90, extend=True, 
        colors=colors_dict, adjust_color=True, luminance=0.75,
        relpos=(0.5,0)
    ),
    Group=pch.anno_simple(df_col['group'], colors=colors_dict),
    verbose=1, axis=1, plot_legend=False,
)

left_ha = pch.HeatmapAnnotation(
    label=pch.anno_label(
        df_mch_row['Subclass'], merge=True, rotation=0, extend=True, 
        colors=colors_dict, adjust_color=True, luminance=0.75,
        relpos=(1,0.5)
    ),
    Subclass=pch.anno_simple(df_mch_row['Subclass'], colors=colors_dict),
    verbose=1, axis=0
)

plt.figure(figsize=(6,8))
cm = pch.ClusterMapPlotter(data=df_mch,
                           top_annotation=col_ha,
                           left_annotation=left_ha,
                           row_cluster=False,
                           col_cluster=False,
                           row_dendrogram=False,
                           label="mCH",
                           cmap='parula',
                           rasterized=True, 
                           xlabel="Genes", 
                           vmax=0.1, vmin=0)

### Wubin Markers

In [None]:
df_markers = pd.read_excel("/anvil/projects/x-mcb130189/Wubin/BG/metadata/BG_markers.xlsx")
df_markers.head()

meth_markers = []
df_markers = df_markers[(df_markers["Level"] == "Subclass") & (df_markers["Parent"] != "Nonneuron")]
for subclass in df_markers.iterrows():
    mm = subclass[1]['Methylation_markers']
    try:
        if np.isnan(mm):
            continue
    except TypeError:
        pass
    genes = ast.literal_eval(mm)
    _sc = subclass[1]['CellType']
    for _gene in genes: 
        if _gene in spatial_genes:
            meth_markers.append((_sc, _gene))

df_meth_markers = pd.DataFrame(meth_markers, columns=['Subclass', 'Gene'])
print(df_meth_markers.shape)
df_meth_markers.drop_duplicates(['Gene'], keep='first', inplace=True)
print(df_meth_markers.shape)

In [None]:
df_mch_row = spatial_mch_ds.obs[['Subclass']].copy()
df_mch_row = df_mch_row.sort_values('Subclass')
df_mch_row.head()

df_col = df_meth_markers.set_index('Gene').sort_values(by="Subclass")
df_col.head()

df_mch = pd.DataFrame(spatial_mch_ds.X, columns=spatial_mch_ds.var_names, index=spatial_mch_ds.obs_names)
df_mch = df_mch.loc[:, df_meth_markers['Gene'].tolist()]
df_mch = df_mch.loc[df_mch_row.index, df_col.index.tolist()]
df_mch.head()

colors_dict = adata.uns['Subclass_palette']
colors_dict = {k: v for k, v in colors_dict.items() if k in df_mch_row['Subclass'].unique()}

In [None]:
col_ha = pch.HeatmapAnnotation(
    label=pch.anno_label(
        df_col['Subclass'], merge=True, rotation=90, extend=True, 
        colors=colors_dict, adjust_color=True, luminance=0.75,
        relpos=(0.5,0)
    ),
    Group=pch.anno_simple(df_col['Subclass'], colors=colors_dict),
    verbose=1, axis=1, plot_legend=False,
)

left_ha = pch.HeatmapAnnotation(
    label=pch.anno_label(
        df_mch_row['Subclass'], merge=True, rotation=0, extend=True, 
        colors=colors_dict, adjust_color=True, luminance=0.75,
        relpos=(1,0.5)
    ),
    Subclass=pch.anno_simple(df_mch_row['Subclass'], colors=colors_dict),
    verbose=1, axis=0
)

plt.figure(figsize=(6,8))
cm = pch.ClusterMapPlotter(data=df_mch,
                           top_annotation=col_ha,
                           left_annotation=left_ha,
                           row_cluster=False,
                           col_cluster=False,
                           row_dendrogram=False,
                           label="mCH",
                           cmap='parula',
                           rasterized=True, 
                           xlabel="Genes", 
                           vmax=0.1, vmin=0)

# Group

##  RNA

In [None]:
adata_ds = _downsample_reference(adata, cluster_col="Group", max_cluster_size=1000, min_cluster_size=50)
adata_ds.obs['Group'] = adata_ds.obs['Group'].cat.remove_unused_categories()

### NEURONS ONLY

In [None]:
adata_neu = adata[adata.obs['neuron_type'] == 'Neuron'].copy()
adata_neu_ds = _downsample_reference(adata_neu, cluster_col="Group", max_cluster_size=1000, min_cluster_size=50)
adata_neu_ds.obs['Group'] = adata_neu_ds.obs['Group'].cat.remove_unused_categories()

sc.tl.rank_genes_groups(adata_neu_ds, groupby="Group", method="wilcoxon", n_genes=adata_neu.shape[1])
# sc.tl.dendrogram(adata_neu_ds, groupby="Group")
df = sc.get.rank_genes_groups_df(adata_neu_ds, group=None)
df.head()

idx = df.groupby('group')['logfoldchanges'].nlargest(5).reset_index()['level_1']
df_top5 = df.loc[idx].copy()
keep_idx = df_top5['names'].drop_duplicates(keep='first').index
df_top5 = df_top5.loc[keep_idx].copy()

In [None]:
df_row = adata_neu_ds.obs[['Group']].copy()
df_row = df_row.sort_values('Group')
# df_row.head()

df_col = df_top5.set_index('names').sort_values(by="group")
# df_col.head()

df_expr = pd.DataFrame(adata_neu_ds.X.toarray(), columns=adata_neu_ds.var_names, index=adata_neu_ds.obs_names).loc[:, df_top5['names'].tolist()]
df_expr = df_expr.loc[df_row.index, df_col.index.tolist()]
df_expr_norm = df_expr.subtract(df_expr.min(axis=1), axis=0).div(df_expr.quantile(0.99, axis=1) - df_expr.min(axis=1), axis=0)
# df_expr.head()

colors_dict = adata.uns['Group_palette']
colors_dict = {k: v for k, v in colors_dict.items() if k in df_row['Group'].unique()}

In [None]:
col_ha = pch.HeatmapAnnotation(
    label=pch.anno_label(
        df_col['group'], merge=True, rotation=90, extend=True, 
        colors=colors_dict, adjust_color=True, luminance=0.75,
        relpos=(0.5,0)
    ),
    Group=pch.anno_simple(df_col['group'], colors=colors_dict),
    verbose=1, axis=1, plot_legend=False
)

left_ha = pch.HeatmapAnnotation(
    label=pch.anno_label(
        df_row['Group'], merge=True, rotation=0, extend=True, 
        colors=colors_dict, adjust_color=True, luminance=0.75,
        relpos=(1,0.5)
    ),
    Subclass=pch.anno_simple(df_row['Group'], colors=colors_dict),
    verbose=1, axis=0
)

plt.figure(figsize=(6,8))
cm = pch.ClusterMapPlotter(data=df_expr_norm,
                           top_annotation=col_ha,
                           left_annotation=left_ha,
                           row_cluster=False,
                           col_cluster=False,
                           row_dendrogram=False,
                           label="Expression",
                           cmap='Purples',
                           rasterized=True, 
                           xlabel="Genes",
                           vmin=0, vmax=1)

### HMBA Markers

In [None]:
df_markers = pd.read_excel("/anvil/projects/x-mcb130189/Wubin/BG/metadata/BG_markers.xlsx")
df_markers.head()

nonneuronal_subclasses = df_markers[df_markers['Parent'] == "Nonneuron"]['CellType'].tolist()
# Get Markers
rna_markers = []
df_markers = df_markers[(df_markers["Level"] == "Group") & (~df_markers["Parent"].isin(nonneuronal_subclasses))]
for subclass in df_markers.iterrows():
    genes = ast.literal_eval(subclass[1]['RNA_markers'])
    _sc = subclass[1]['CellType']
    for _gene in genes: 
        if _gene in spatial_genes:
            rna_markers.append((_sc, _gene))

# Remove Duplicated
df_rna_markers = pd.DataFrame(rna_markers, columns=['Group', 'Gene'])
print(df_rna_markers.shape)
df_rna_markers.drop_duplicates(['Gene'], keep='first', inplace=True)
print(df_rna_markers.shape)

# Getting only the subclasses in the MERFISH annotations
df_rna_markers = df_rna_markers[df_rna_markers['Group'].isin(adata_neu_ds.obs['Group'].cat.categories)]

In [None]:
df_row = adata_neu_ds.obs[['Group']].copy()
df_row = df_row.sort_values('Group')
# df_row.head()

df_col = df_rna_markers.set_index('Gene').sort_values(by="Group")
# df_col.head()

df_expr = pd.DataFrame(adata_neu_ds.X.toarray(), columns=adata_neu_ds.var_names, index=adata_neu_ds.obs_names).loc[:, df_rna_markers['Gene'].tolist()]
df_expr = df_expr.loc[df_row.index, df_col.index.tolist()]
df_expr_norm = df_expr.subtract(df_expr.min(axis=1), axis=0).div(df_expr.quantile(0.99, axis=1) - df_expr.min(axis=1), axis=0)
# df_expr.head()

colors_dict = adata.uns['Group_palette']
colors_dict = {k: v for k, v in colors_dict.items() if k in df_row['Group'].unique()}

In [None]:
col_ha = pch.HeatmapAnnotation(
    label=pch.anno_label(
        df_col['Group'], merge=True, rotation=90, extend=True, 
        colors=colors_dict, adjust_color=True, luminance=0.75,
        relpos=(0.5,0)
    ),
    Group=pch.anno_simple(df_col['Group'], colors=colors_dict),
    verbose=1, axis=1, plot_legend=False
)

left_ha = pch.HeatmapAnnotation(
    label=pch.anno_label(
        df_row['Group'], merge=True, rotation=0, extend=True, 
        colors=colors_dict, adjust_color=True, luminance=0.75,
        relpos=(1,0.5)
    ),
    Subclass=pch.anno_simple(df_row['Group'], colors=colors_dict),
    verbose=1, axis=0
)

plt.figure(figsize=(6,8))
cm = pch.ClusterMapPlotter(data=df_expr_norm,
                           top_annotation=col_ha,
                           left_annotation=left_ha,
                           row_cluster=False,
                           col_cluster=False,
                           row_dendrogram=False,
                           label="Expression",
                           cmap='Purples',
                           rasterized=True, 
                           xlabel="Genes",
                           vmin=0, vmax=1)
# df_expr.values.max()*0.5

## Methylation 

In [None]:
spatial_mch = spatial_mch[spatial_mch.obs['neuron_type'] == "Neuron"]
spatial_mch_ds = _downsample_reference(spatial_mch, cluster_col="Group", max_cluster_size=2000, min_cluster_size=50)
spatial_mch_ds = spatial_mch_ds[spatial_mch_ds.obs['Group'] != 'unknown'].copy()

### Calculated Markers

In [None]:
import json 
with open("/home/x-aklein2/projects/aklein/BICAN/data/reference/AIT/BG_gene_rename.json", 'r') as f:
    rename_dict = json.load(f)
print(rename_dict)
exclude_genes = 'AC109466.1'

In [None]:
df_top5 = df_top5[~(df_top5['names'] == exclude_genes)]

In [None]:
df_mch_row = spatial_mch_ds.obs[['Group']].copy()
df_mch_row = df_mch_row.sort_values('Group')
df_mch_row.head()

df_col = df_top5.set_index('names').sort_values(by="group")
df_col = df_col.loc[df_col['group'].isin(df_mch_row['Group'].unique())]
df_col.head()

df_mch = pd.DataFrame(spatial_mch_ds.X, columns=spatial_mch_ds.var_names, index=spatial_mch_ds.obs_names)
df_mch.rename(columns=rename_dict, inplace=True)
df_mch = df_mch.loc[:, df_top5['names'].tolist()]
df_mch = df_mch.loc[df_mch_row.index, df_col.index.tolist()]
df_mch.head()

colors_dict = adata.uns['Group_palette']
colors_dict = {k: v for k, v in colors_dict.items() if k in df_mch_row['Group'].unique()}

In [None]:
col_ha = pch.HeatmapAnnotation(
    label=pch.anno_label(
        df_col['group'], merge=True, rotation=90, extend=True, 
        colors=colors_dict, adjust_color=True, luminance=0.75,
        relpos=(0.5,0)
    ),
    Group=pch.anno_simple(df_col['group'], colors=colors_dict),
    verbose=1, axis=1, plot_legend=False,
)

left_ha = pch.HeatmapAnnotation(
    label=pch.anno_label(
        df_mch_row['Group'], merge=True, rotation=0, extend=True, 
        colors=colors_dict, adjust_color=True, luminance=0.75,
        relpos=(1,0.5)
    ),
    Subclass=pch.anno_simple(df_mch_row['Group'], colors=colors_dict),
    verbose=1, axis=0
)

plt.figure(figsize=(6,8))
cm = pch.ClusterMapPlotter(data=df_mch,
                           top_annotation=col_ha,
                           left_annotation=left_ha,
                           row_cluster=False,
                           col_cluster=False,
                           row_dendrogram=False,
                           label="mCH",
                           cmap='parula',
                           rasterized=True, 
                           xlabel="Genes", 
                           vmax=0.1, vmin=0)

### Wubin Markers

In [None]:
df_markers = pd.read_excel("/anvil/projects/x-mcb130189/Wubin/BG/metadata/BG_markers.xlsx")
df_markers.head()

nonneuronal_subclasses = df_markers[df_markers['Parent'] == "Nonneuron"]['CellType'].tolist()
meth_markers = []
df_markers = df_markers[(df_markers["Level"] == "Group") & (~df_markers["Parent"].isin(nonneuronal_subclasses))]
for subclass in df_markers.iterrows():
    mm = subclass[1]['SelectedMarkers']
    try:
        if np.isnan(mm):
            continue
    except TypeError:
        pass
    genes = ast.literal_eval(mm)
    _sc = subclass[1]['CellType']
    for _gene in genes: 
        if _gene in spatial_genes:
            meth_markers.append((_sc, _gene))

df_meth_markers = pd.DataFrame(meth_markers, columns=['Group', 'Gene'])
print(df_meth_markers.shape)
df_meth_markers.drop_duplicates(['Gene'], keep='first', inplace=True)
print(df_meth_markers.shape)
df_meth_markers = df_meth_markers[df_meth_markers['Group'].isin(spatial_mch_ds.obs['Group'].cat.categories)]
print(df_meth_markers.shape)

In [None]:
df_mch_row = spatial_mch_ds.obs[['Group']].copy()
df_mch_row = df_mch_row.sort_values('Group')
df_mch_row.head()

df_col = df_meth_markers.set_index('Gene').sort_values(by="Group")
df_col.head()

df_mch = pd.DataFrame(spatial_mch_ds.X, columns=spatial_mch_ds.var_names, index=spatial_mch_ds.obs_names)
df_mch = df_mch.loc[:, df_meth_markers['Gene'].tolist()]
df_mch = df_mch.loc[df_mch_row.index, df_col.index.tolist()]
df_mch.head()

colors_dict = adata.uns['Group_palette']
colors_dict = {k: v for k, v in colors_dict.items() if k in df_mch_row['Group'].unique()}

In [None]:
col_ha = pch.HeatmapAnnotation(
    label=pch.anno_label(
        df_col['Group'], merge=True, rotation=90, extend=True, 
        colors=colors_dict, adjust_color=True, luminance=0.75,
        relpos=(0.5,0)
    ),
    Group=pch.anno_simple(df_col['Group'], colors=colors_dict),
    verbose=1, axis=1, plot_legend=False,
)

left_ha = pch.HeatmapAnnotation(
    label=pch.anno_label(
        df_mch_row['Group'], merge=True, rotation=0, extend=True, 
        colors=colors_dict, adjust_color=True, luminance=0.75,
        relpos=(1,0.5)
    ),
    Group=pch.anno_simple(df_mch_row['Group'], colors=colors_dict),
    verbose=1, axis=0
)

plt.figure(figsize=(6,8))
cm = pch.ClusterMapPlotter(data=df_mch,
                           top_annotation=col_ha,
                           left_annotation=left_ha,
                           row_cluster=False,
                           col_cluster=False,
                           row_dendrogram=False,
                           label="mCH",
                           cmap='parula',
                           rasterized=True, 
                           xlabel="Genes", 
                           vmax=0.1, vmin=0)