In [None]:
import os
from pathlib import Path

import numpy as np
import pandas as pd
import anndata as ad

from spida.P.setup_adata import multi_round_clustering

import matplotlib.pylab as plt
%matplotlib inline
import seaborn as sns
from spida.pl import plot_categorical, plot_continuous, categorical_scatter
plt.rcParams['axes.facecolor'] = "white"

In [None]:
regions = [
    "PU", "CAH", "CAB", "CAT", "NAC", "STH", "MGM", "GP"
]

In [None]:
suffix = "CPSfilt" #  "cellpose_SAM_filt" # "proseg_fv38_filt"
adata_list = []
for _reg in regions: 
    adata = ad.read_h5ad(f"/home/x-aklein2/projects/aklein/BICAN/BG/data/annotation/BICAN_BG_{_reg}_{suffix}/{_reg}.h5ad")
    adata_list.append(adata)

# adata = ad.concat(adata_list, join='outer', index_unique=None)

In [None]:
adata

In [None]:
adata_all = ad.concat(adata_list, join='outer', index_unique=None)

In [None]:
regions = [
    "PU", "CAH", "CAB", "CAT", "NAC", "SUBTH", "MGM1", "GP"
]
for _reg in regions:
    print(_reg)
    adata_reg = adata_all[adata_all.obs['brain_region'] == _reg].copy()
    print(adata_reg.shape)
    print((adata_reg.layers['counts'].toarray() != 0).sum())
    print((adata_reg.X.toarray() != 0).sum())

In [None]:
# For Color Schemes! 
for key, value in adata.uns.items(): 
    if key.endswith('_palette') or key.endswith('_colors'):
        adata_all.uns[key] = value

In [None]:
def add_colors(adata, cat_col, palette):
    colors = []
    for _cat in adata.obs[cat_col].cat.categories: 
        try:
            if isinstance(palette, dict):
                color = palette[_cat]
            else:
                color = palette.loc[_cat, 'Hex']
        except KeyError:
            print(_cat)
            color = '#808080'
        colors.append(color)

    adata.uns[f'{cat_col}_colors'] = colors

In [None]:
# Are there more pallettes I should add? 
add_colors(adata_all, "brain_region", adata_all.uns['brain_region_palette'])

In [None]:
adata_all

In [None]:
Path("/home/x-aklein2/projects/aklein/BICAN/BG/data/annotation/BICAN_BG_ALL/").mkdir(parents=True, exist_ok=True)
adata_all.write_h5ad("/home/x-aklein2/projects/aklein/BICAN/BG/data/annotation/BICAN_BG_ALL/BG_cps_all.h5ad")

# adata_neu = adata_all[adata_all.obs['c2c_allcools_label_Is_Neuron'] == 'Neuron'].copy()
# adata_nn = adata_all[adata_all.obs['c2c_allcools_label_Is_Neuron'] == 'Nonneuron'].copy()

# adata_neu.write_h5ad("/home/x-aklein2/projects/aklein/BICAN/BG/data/annotation/BICAN_BG_ALL/BG_cpsam_neu.h5ad")
# adata_nn.write_h5ad("/home/x-aklein2/projects/aklein/BICAN/BG/data/annotation/BICAN_BG_ALL/BG_cpsam_nn.h5ad")

# Calculate Embeddings

### ALL

In [None]:
adata_all = ad.read_h5ad("/home/x-aklein2/projects/aklein/BICAN/BG/data/annotation/BICAN_BG_ALL/BG_cps_all.h5ad")

In [None]:
multi_round_clustering(adata_all,
                       layer="volume_norm",
                       key_added="all_",
                       num_rounds=1,
                       leiden_res=1,
                       min_dist=0.25,
                       knn=50,
                       min_group_size=50,
                       run_harmony=True, 
                       batch_key=["replicate", "donor", "brain_region"],
                       harmony_nclust=20,
                       max_iter_harmony=20,
                    )
adata_all

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(10, 5), dpi=300)
plot_categorical(adata_all, cluster_col="all_round1_leiden", coord_base="all_round1_umap",
                 text_anno=True, coding=True, show=False, ax=axes[0])
axes[0].set_title("All Regions - Round 1 Clustering")
plot_categorical(adata_all, cluster_col="c2c_allcools_label_Is_Neuron", coord_base="all_round1_umap",
                 text_anno=True, coding=True, show=False, ax=axes[1])
axes[1].set_title("All Regions - Is Neuron")
plt.show()

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(15, 5), dpi=300)
plot_categorical(adata_all, cluster_col="donor", coord_base="all_round1_umap",
                 text_anno=True, coding=True, show=False, ax=axes[0])
axes[0].set_title("All Regions - Donor")
plot_categorical(adata_all, cluster_col="brain_region", coord_base="all_round1_umap",
                 text_anno=True, coding=True, show=False, ax=axes[1])
axes[1].set_title("All Regions - Brain Region")
plot_categorical(adata_all, cluster_col="replicate", coord_base="all_round1_umap",
                 text_anno=True, coding=True, show=False, ax=axes[2])
axes[2].set_title("All Regions - Replicate")
plt.show()

### Neurons

In [None]:
adata_neu = ad.read_h5ad("/home/x-aklein2/projects/aklein/BICAN/BG/data/annotation/BICAN_BG_ALL/BG_pfv8_neu.h5ad")
adata_neu

In [None]:
multi_round_clustering(adata_neu,
                       layer="volume_norm",
                       key_added="all_",
                       num_rounds=2,
                       leiden_res=[0.75, 0.5],
                       min_dist=0.25,
                       knn=50,
                       min_group_size=50,
                       run_harmony=True, 
                       batch_key=["replicate", "donor"],
                       harmony_nclust=20,
                       max_iter_harmony=20,
                    )
adata_neu

In [None]:
adata_neu.write_h5ad("/home/x-aklein2/projects/aklein/BICAN/BG/data/annotation/BICAN_BG_ALL/BG_pfv8_neu.h5ad")

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(10, 5), dpi=200)
plot_categorical(adata_neu, cluster_col="all_round1_leiden", coord_base="all_round1_umap",
                 text_anno=True, coding=True, show=False, ax=axes[0])
axes[0].set_title("All Regions - Round 1 Clustering")
plot_categorical(adata_neu, cluster_col="Is_Neuron", coord_base="all_round1_umap",
                 text_anno=True, coding=True, show=False, ax=axes[1])
axes[1].set_title("All Regions - Is Neuron")
plt.show()

fig, axes = plt.subplots(1, 3, figsize=(15, 5), dpi=200)
plot_categorical(adata_neu, cluster_col="donor", coord_base="all_round1_umap",
                 text_anno=True, coding=True, show=False, ax=axes[0])
axes[0].set_title("All Regions - Donor")
plot_categorical(adata_neu, cluster_col="brain_region", coord_base="all_round1_umap",
                 text_anno=True, coding=True, show=False, ax=axes[1])
axes[1].set_title("All Regions - Brain Region")
plot_categorical(adata_neu, cluster_col="replicate", coord_base="all_round1_umap",
                 text_anno=True, coding=True, show=False, ax=axes[2])
axes[2].set_title("All Regions - Replicate")
plt.show()

### NonNeurons

In [None]:
adata_nn = ad.read_h5ad("/home/x-aklein2/projects/aklein/BICAN/BG/data/annotation/BICAN_BG_ALL/BG_pfv8_nn.h5ad")
adata_nn

In [None]:
multi_round_clustering(adata_nn,
                       layer="volume_norm",
                       key_added="all_",
                       num_rounds=2,
                       leiden_res=[0.75, 0.5],
                       min_dist=0.25,
                       knn=50,
                       min_group_size=50,
                       run_harmony=True, 
                       batch_key=["replicate", "donor"],
                       harmony_nclust=20,
                       max_iter_harmony=20,
                    )
adata_nn

In [None]:
adata_nn.write_h5ad("/home/x-aklein2/projects/aklein/BICAN/BG/data/annotation/BICAN_BG_ALL/BG_pfv8_nn.h5ad")

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(10, 5), dpi=200)
plot_categorical(adata_nn, cluster_col="all_round1_leiden", coord_base="all_round1_umap",
                 text_anno=True, coding=True, show=False, ax=axes[0])
axes[0].set_title("All Regions - Round 1 Clustering")
plot_categorical(adata_nn, cluster_col="all_round2_leiden", coord_base="all_round1_umap",
                 text_anno=True, coding=True, show=False, ax=axes[1])
axes[1].set_title("All Regions - Round 2 Clustering")
plt.show()

fig, axes = plt.subplots(1, 3, figsize=(15, 5), dpi=200)
plot_categorical(adata_nn, cluster_col="donor", coord_base="all_round1_umap",
                 text_anno=True, coding=True, show=False, ax=axes[0])
axes[0].set_title("All Regions - Donor")
plot_categorical(adata_nn, cluster_col="brain_region", coord_base="all_round1_umap",
                 text_anno=True, coding=True, show=False, ax=axes[1])
axes[1].set_title("All Regions - Brain Region")
plot_categorical(adata_nn, cluster_col="replicate", coord_base="all_round1_umap",
                 text_anno=True, coding=True, show=False, ax=axes[2])
axes[2].set_title("All Regions - Replicate")
plt.show()

# Regional Neuronal Composition

In [None]:
comb_adata_path = "/home/x-aklein2/projects/aklein/BICAN/BG/data/annotation/BICAN_BG_ALL/BG_pfv8_all.h5ad"
adata = ad.read_h5ad(comb_adata_path)
adata

In [None]:
adata.obs['Is_Neuron'] = adata.obs['c2c_allcools_label_Is_Neuron'].copy()

In [None]:
for _reg in regions: 
    adata_reg = adata[adata.obs['brain_region'] == _reg].copy()
    print(adata_reg)
    break

In [None]:
adata.obs['Is_Neuron'] = adata.obs['Is_Neuron'].cat.remove_unused_categories()

In [None]:
region_neuron_composition = adata.obs.groupby(['brain_region', 'Is_Neuron']).size().to_frame().reset_index()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(8, 4), dpi=300)
bars = sns.barplot(data=region_neuron_composition, x='Is_Neuron', y=0, hue='brain_region', ax=ax, palette=adata.uns['brain_region_palette'])
[ax.bar_label(bars.containers[i], fontsize=6, padding=2, ) for i in range(len(bars.containers))]
ax.set_xlabel("Is Neuron")
ax.set_ylabel("Cell Count")
ax.set_title("Regional Neuronal Composition")
ax.legend(bbox_to_anchor=(1, 1), loc='upper left')
plt.show()