In [None]:
import os 
from pathlib import Path

import numpy as np
import pandas as pd
import anndata as ad

import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams['figure.dpi'] = 200
plt.rcParams['savefig.dpi'] = 300
plt.rcParams['axes.facecolor'] = 'white'

## Read Paths

In [None]:
annots_path = "/home/x-aklein2/projects/aklein/BICAN/BG/data/methylation/annot_with_scores.csv"
imputation_path = "/anvil/projects/x-mcb130189/qzeng/analysis/251105_merfish_methylation_2/Imputation.Subclass_Restricted.mC_MERFISH.meta.csv"
# imputation_path = "/anvil/projects/x-mcb130189/qzeng/analysis/251009_merfish_methylation/Imputation.Subclass_Restricted.mC_MERFISH.meta.csv"
# imputation_path = "/anvil/projects/x-mcb130189/qzeng/analysis/251009_merfish_methylation/Imputation.Group_Restricted.mC_MERFISH.meta.csv"
snm3c_annot_path = "/anvil/projects/x-mcb130189/Wubin/BG/clustering/100kb/annotations.tsv"
spatial_annot_path = "/home/x-aklein2/projects/aklein/BICAN/BG/data/BICAN_BG_CPS.tsv" 

In [None]:
sqmeth_annot = pd.read_csv(annots_path, index_col=0)
display(meth_annot.head())
impute_df = pd.read_csv(imputation_path, index_col=0)
display(impute_df.head())
snm3c_annot = pd.read_csv(snm3c_annot_path, sep="\t", index_col=0)
display(snm3c_annot.head())
spatial_annot = pd.read_csv(spatial_annot_path, sep="\t", index_col=0)
display(spatial_annot.head())

## Functions

In [None]:
def pull_cols(
    cells,
    colnames,
    df_annot
):
    return df_annot.loc[cells, colnames]


def plot_grouped_bar(df, ax=None, width=0.8, colors=None, ylabel=None, title=None,
                     show_values=True, value_fmt="{:.2f}", err=None, figsize=(8,4), label_fontsize=6):
    """
    df: pandas DataFrame where index are the x groups and columns are the categories.
    err: None or DataFrame/2D array with same shape as df (for error bars).
    """
    if ax is None:
        fig, ax = plt.subplots(figsize=figsize)
    n_groups = len(df.index)
    n_bars = len(df.columns)
    ind = np.arange(n_groups)
    total_width = width
    bar_width = total_width / n_bars
    if colors is None:
        colors = plt.rcParams['axes.prop_cycle'].by_key()['color'][:n_bars]
    if isinstance(colors, dict):
        colors = [colors.get(col, 'gray') for col in df.columns]

    for i, col in enumerate(df.columns):
        x = ind - total_width/2 + i * bar_width + bar_width/2
        heights = df[col].values
        yerr = None
        if err is not None:
            yerr = np.array(err)[:, i] if isinstance(err, (list, np.ndarray)) else err[col].values
        ax.bar(x, heights, bar_width, label=str(col), color=colors[i % len(colors)], yerr=yerr, capsize=3)

        if show_values:
            for xi, h in zip(x, heights):
                ax.text(xi, h + (0 if h >= 0 else -0.01*abs(h)),
                        value_fmt.format(h), ha='center', va='bottom', fontsize=label_fontsize, rotation=0)

    ax.set_xticks(ind)
    ax.set_xticklabels(df.index.astype(str))
    if ylabel:
        ax.set_ylabel(ylabel)
    if title:
        ax.set_title(title)
    ax.legend()
    ax.margins(x=0.02)
    return ax

## Transfer info

In [None]:
adata = ad.read_h5ad("/home/x-aklein2/projects/aklein/BICAN/BG/data/BICAN_BG_CPS.h5ad", backed='r')
donor_palette = adata.uns['donor_palette']
lab_palette = adata.uns['replicate_palette']
brain_region_palette = adata.uns['brain_region_palette']
subclass_palette = adata.uns['Subclass_palette']
group_palette = adata.uns['Group_palette']
obs = adata.obs.copy()
del adata

In [None]:
add_col = ['slab_distance', 'mCGFrac', 'mCHFrac']

In [None]:
snm3c_annot[add_col]
common_cells = impute_df.index.intersection(snm3c_annot.index)
impute_df.loc[common_cells, add_col] = snm3c_annot.loc[common_cells, add_col]
obs[add_col] = impute_df.groupby("merfish_cell")[add_col].mean()

In [None]:
toplot = obs.groupby(["brain_region", 'donor'], observed=False)['slab_distance'].mean().reset_index()
toplot = toplot.pivot(index='brain_region', columns='donor', values='slab_distance')

In [None]:
ax = plot_grouped_bar(toplot, ylabel="slab_distance", colors=donor_palette, title="Average Slab Distance by Brain Region and Donor", figsize=(10,4), value_fmt="{:.1f}")
plt.tight_layout()
plt.show()

In [None]:
order_5224 = ["NAC", "CAH", "CAB", "PU", "GP", "SUBTH", "MGM1", "CAT"]
order_2424 = ["CAH", "NAC", "PU", "CAB", "GP", "SUBTH", "MGM1", "CAT"]
order_4723 = ["CAH", "NAC", "PU", "CAB", "GP", "SUBTH", "MGM1", "CAT"]
order_7648 = ["CAH", "PU", "NAC", "CAB", "GP", "SUBTH", "MGM1"]
order_dict = {
    "UCI2424": order_2424,
    "UCI4723": order_4723,
    "UWA7648": order_7648,
    "UCI5224": order_5224
}

In [None]:
def orient_slabs(df, order, order_by="brain_region", orient_along="CENTER_X"): 
    df_ordered = df.copy()
    region_maxes = df_ordered.groupby(order_by, observed=False)["CENTER_X"].max()
    # region_means = df_ordered.groupby(order_by, observed=False)["CENTER_X"].mean()
    tomove = list(df_ordered[order_by].unique())
    for _slab in order: 
        move_by = region_maxes[_slab]
        tomove.remove(_slab)
        df_ordered.loc[df_ordered[order_by].isin(tomove), orient_along] += move_by
    region_means = df_ordered.groupby(order_by, observed=False)["CENTER_X"].mean()
    return df_ordered, region_means

In [None]:
# for did, df in obs.groupby(["donor", "replicate"], observed=False): 
#     donor = did[0]
#     lab = did[1]
#     print(donor, lab)
#     df_cp = orient_slabs(df, order_dict[donor])
#     fig, ax = plt.subplots(figsize=(20,4), dpi=200)
#     sc = ax.scatter(
#         df_cp['CENTER_X'], df_cp['CENTER_Y'], c=df_cp['slab_distance'], cmap='coolwarm', s=5)
#     ax.set_title(f"Donor: {donor}, Lab: {lab}")
#     plt.colorbar(sc, ax=ax, label='slab_distance')
#     plt.show()
#     # break

In [None]:
toplot = obs.groupby(["brain_region", 'donor'], observed=False)['mCGFrac'].mean().reset_index()
toplot = toplot.pivot(index='brain_region', columns='donor', values='mCGFrac')

ax = plot_grouped_bar(toplot, ylabel="mCGFrac", colors=donor_palette, title="Average mCG Fraction by Brain Region and Donor", figsize=(10,4), value_fmt="{:.2f}")
plt.tight_layout()
plt.show()

In [None]:
toplot = obs.groupby(["brain_region", 'donor'], observed=False)['mCHFrac'].mean().reset_index()
toplot = toplot.pivot(index='brain_region', columns='donor', values='mCHFrac')

ax = plot_grouped_bar(toplot, ylabel="mCHFrac", colors=donor_palette, title="Average mCH Fraction by Brain Region and Donor", figsize=(10,4), value_fmt="{:.3f}")
plt.tight_layout()
plt.show()

In [None]:
for did, df in obs.groupby(["donor", "replicate"], observed=False): 
    donor = did[0]
    lab = did[1]
    print(donor, lab)
    order=order_dict[donor]
    df_cp, tick_locs = orient_slabs(df, order=order)
    fig, ax = plt.subplots(figsize=(25,4), dpi=200)
    sc = ax.scatter(
        df_cp['CENTER_X'], df_cp['CENTER_Y'], c=df_cp['mCHFrac'], cmap='coolwarm', s=5, vmin=0, vmax=0.1)
    ax.set_xticks(tick_locs.values)
    ax.set_xticklabels(tick_locs.index)
    ax.set_title(f"Donor: {donor}, Lab: {lab}")
    plt.colorbar(sc, ax=ax, label='mCHFrac')
    plt.show()
    # break

# Annotation Entropy

In [None]:
from scipy.stats import entropy
obs['meth_subclass'] = meth_annot.loc[obs.index, 'subclass'].astype('category')
obs['meth_subclass_score'] = meth_annot.loc[obs.index, 'subclass_score'].values
obs['meth_group'] = meth_annot.loc[obs.index, 'group'].astype("category")
obs['meth_group_score'] = meth_annot.loc[obs.index, 'group_score'].values

In [None]:
entropies = {}
for (_donor, _region), _df in df_obs.groupby(['donor', 'brain_region']):
    group_entropies = {}
    for _class in _df['Group'].cat.remove_unused_categories().cat.categories:
        probs = _df.loc[_df['Group'] == _class, 'allcools_Group_transfer_score']
        ent = entropy(probs.round(3).value_counts().sort_index())
        if ent > 0: 
            group_entropies[_class] = ent
        # print(f"Class: {_class}, Entropy: {group_entropies[_class]}")
    entropies[(_donor, _region)] = group_entropies

In [None]:
# Reorganize the data for plotting
# Create a dataframe where each row is a (donor, region, group) combination with entropy values
plot_data = []
for (donor, region), group_entropies in entropies.items():
    for group, entropy_val in group_entropies.items():
        plot_data.append({
            'donor': donor,
            'brain_region': region,
            'group': group,
            'entropy': entropy_val
        })

entropy_df = pd.DataFrame(plot_data)

# Get unique regions and groups for consistent ordering
regions = sorted(entropy_df['brain_region'].unique())
all_groups = sorted(entropy_df['group'].unique())

# Calculate mean and std for each brain region and group combination
stats_df = entropy_df.groupby(['brain_region', 'group'])['entropy'].agg(['mean', 'std']).reset_index()
stats_df['std'] = stats_df['std'].fillna(0)  # Fill NaN std with 0 for single donor cases

# Create subplots - one for each brain region, stacked vertically
fig, axes = plt.subplots(nrows=len(regions), ncols=1, figsize=(12, 4*len(regions)), 
                        sharex=True, squeeze=False)
axes = axes.flatten()  # Make it easier to index

palette = all_adata.uns['brain_region_palette']

for i, region in enumerate(regions):
    ax = axes[i]
    
    # Get data for this region
    color = palette.get(region, 'gray')
    region_data = stats_df[stats_df['brain_region'] == region]
    
    # Create lists for plotting, ensuring all groups are represented
    groups_to_plot = []
    means_to_plot = []
    stds_to_plot = []
    
    for group in all_groups:
        group_row = region_data[region_data['group'] == group]
        if not group_row.empty:
            groups_to_plot.append(group)
            means_to_plot.append(group_row['mean'].iloc[0])
            stds_to_plot.append(group_row['std'].iloc[0])
        else: 
            groups_to_plot.append(group)
            means_to_plot.append(0)
            stds_to_plot.append(0)
    
    # Create the bar plot
    if groups_to_plot:
        bars = ax.bar(range(len(groups_to_plot)), means_to_plot, 
                     yerr=stds_to_plot, capsize=3,
                     color=color, alpha=0.8, edgecolor='black', linewidth=0.5)

        # Set the x-tick labels
        ax.set_xticks(range(len(groups_to_plot)))
        ax.set_xticklabels(groups_to_plot, rotation=45, ha='right', fontsize=8)
    
    # Formatting
    ax.set_ylabel(f'{region}\nEntropy', fontsize=10, rotation=0, ha='right', va='center')
    ax.grid(axis='y', linestyle='--', alpha=0.3)
    ax.set_ylim(bottom=0)
    
    # Only show x-axis labels on the bottom plot
    if i < len(regions) - 1:
        ax.set_xticklabels([])

# Set the x-label only for the bottom plot
axes[-1].set_xlabel('Group', fontsize=12)

# Overall title
fig.suptitle('Entropy by Brain Region and Group\n(Mean Â± Std Dev across donors)', 
             fontsize=14, y=0.98)

plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()