In [None]:
import os
from pathlib import Path
import itertools 
from tqdm import tqdm

import numpy as np
import pandas as pd
import anndata as ad
from scipy.stats import norm 
from statsmodels.stats.multitest import multipletests

import matplotlib.pyplot as plt
import seaborn as sns
from adjustText import adjust_text  # pip install adjustText

In [None]:
plt.rcParams['figure.dpi'] = 300
plt.rcParams['savefig.dpi'] = 300
plt.rcParams['figure.figsize'] = (4, 4)
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = 'Arial'
plt.rcParams['font.size'] = 14
plt.rcParams['axes.facecolor'] = 'white'

In [None]:
image_path = Path("/home/x-aklein2/projects/aklein/BICAN/BG/images/CPS/ccc_gr")
image_path.mkdir(parents=True, exist_ok=True)

## Functions

In [None]:
### From ChatGPT for the SPARKX meta-analysis: 

def stouffer_meta(pvals, signs, weights):
    pvals = np.clip(np.asarray(pvals), 1e-300, 1.0)
    z = norm.isf(pvals / 2.0)
    z_signed = np.sign(signs) * z
    Z = np.sum(weights * z_signed) / np.sqrt(np.sum(weights**2))
    p = 2 * norm.sf(abs(Z))
    return Z, p

def i_squared(effect_sizes, weights):
    k = len(effect_sizes)
    if k <= 1:
        return np.nan
    mean_eff = np.average(effect_sizes, weights=weights)
    Q = np.sum(weights * (effect_sizes - mean_eff) ** 2)
    df = k - 1
    return max(0, (Q - df) / Q) * 100 if Q > 0 else 0

def meta_per_gene(df, cell_type):
    out = []
    for g, gdf in df.groupby("gene"):
        pvals, signs = gdf["p_sparkx"].values, np.sign(gdf["rho_axis"].values)
        weights = np.sqrt(gdf["n_cells"].values)
        Z, p_meta = stouffer_meta(pvals, signs, weights)
        effs = signs * norm.isf(pvals / 2.0)
        I2 = i_squared(effs, weights)
        out.append({
            "cell_type": cell_type, "gene": g, "meta_Z": Z, "meta_p": p_meta,
            "fdr": None, "I2": I2, "direction": "up" if Z > 0 else "down"
        })
    res = pd.DataFrame(out)
    res["fdr"] = multipletests(res["meta_p"], method="fdr_bh")[1]
    return res
    
def dl_tau2(yi, vi):
    """
    DerSimonian–Laird estimator of between-study variance τ².
    """
    w = 1.0 / vi
    ybar = np.sum(w * yi) / np.sum(w)
    Q = np.sum(w * (yi - ybar) ** 2)
    k = len(yi)
    c = np.sum(w) - np.sum(w ** 2) / np.sum(w)
    tau2 = max(0.0, (Q - (k - 1)) / c) if c > 0 else 0.0
    return tau2, Q

def re_meta(yi, vi):
    """
    Run a random-effects meta-analysis given effect sizes yi and variances vi.
    Returns pooled mean, SE, z-statistic, p-value, CI, τ², and I².
    """
    tau2, Q = dl_tau2(yi, vi)
    w_star = 1.0 / (vi + tau2)
    mu = np.sum(w_star * yi) / np.sum(w_star)
    se = np.sqrt(1.0 / np.sum(w_star))
    z = mu / se if se > 0 else np.nan
    p = 2 * norm.sf(abs(z)) if np.isfinite(z) else np.nan
    ci_lb, ci_ub = mu - 1.96 * se, mu + 1.96 * se
    k = len(yi)
    I2 = max(0.0, (Q - (k - 1)) / Q) * 100 if (k > 1 and Q > 0) else 0.0
    return dict(mu=mu, se=se, z=z, p=p,
                ci_lb=ci_lb, ci_ub=ci_ub,
                tau2=tau2, Q=Q, k=k, I2=I2)

In [None]:
# functions from xingjiepan 2023 mouse atlas paper
def adjust_p_value_matrix_by_BH(p_val_mtx):
    '''Adjust the p-values in a matrix by the Benjamini/Hochberg method.
    The matrix should be symmetric.
    '''
    p_val_sequential = []
    N = p_val_mtx.shape[0]
    
    for i in range(N):
        for j in range(i, N):
            p_val_sequential.append(p_val_mtx[i, j])

    p_val_sequential_bh = multipletests(p_val_sequential, method='fdr_bh')[1]
    
    adjusted_p_val_mtx = np.zeros((N, N))
    
    counter = 0
    for i in range(N):
        for j in range(i, N):
            adjusted_p_val_mtx[i, j] = p_val_sequential_bh[counter]
            adjusted_p_val_mtx[j, i] = p_val_sequential_bh[counter]
            counter += 1
            
    return adjusted_p_val_mtx

def get_data_frame_from_metrices(cell_types, mtx_dict):
    N = len(cell_types)
    
    serials_dict = {'cell_type1':[], 'cell_type2':[]}
    for k in mtx_dict.keys():
        serials_dict[k] = []
        
    for i in range(N):
        for j in range(i, N):
            serials_dict['cell_type1'].append(cell_types[i])
            serials_dict['cell_type2'].append(cell_types[j])
            for k in mtx_dict.keys():
                serials_dict[k].append(mtx_dict[k][i, j])
                
    return pd.DataFrame(serials_dict)
    

def sort_cell_type_contact_p_values(p_val_mtx, cell_types):
    '''Return a list of (cell_type1, cell_type2, p_value) sorted by p_values.'''
    p_val_list = []
    N = p_val_mtx.shape[0]
    for i in range(N):
        for j in range(i, N):
            p_val_list.append((cell_types[i], cell_types[j], p_val_mtx[i, j]))
    return sorted(p_val_list, key=lambda x:x[2])

## Continue 
NEED TO RUN THIS BEFORE THE META ANALYSIS AS THIS COMPILES THE META ANALYSIS INPUT

In [None]:
ad_path = "/home/x-aklein2/projects/aklein/BICAN/BG/data/BICAN_BG_CPS.h5ad"
adata = ad.read_h5ad(ad_path)
adata

In [None]:
donors = adata.obs['donor'].unique().tolist()
brain_regions = adata.obs['brain_region'].unique().tolist()
replicates = adata.obs['replicate'].unique().tolist()
skip = [("UWA7648", "CAT", "ucsd"), ("UWA7648", "CAT", "salk")]

In [None]:
_donor = donors[0]
_brain_region = brain_regions[4]
_replicate = replicates[0]
print(_donor, _brain_region, _replicate)

In [None]:
r=50
cell_contact_root = Path(f"/home/x-aklein2/projects/aklein/BICAN/BG/data/CPS/cell_contacts_gr_{r}um")

In [None]:
# df_contacts

In [None]:
all_res = []
pbar = tqdm(itertools.product(donors, brain_regions, replicates))
for _i in pbar:
    if _i in skip:
        print(f"Skipping {_i}")
        continue
    _donor, _brain_region, _replicate, = _i
    pbar.set_description(f"Processing {_donor} | {_brain_region} | {_replicate}")
    
    adata_sub = adata[(adata.obs['donor'] == _donor) &
                    (adata.obs['brain_region'] == _brain_region) &
                    (adata.obs['replicate'] == _replicate)]
    adata_sub = adata_sub[(adata_sub.obs['Group'] != "unknown")].copy()
    cell_types = np.unique(adata_sub.obs['Group'])
    
    df_contacts = pd.read_csv(cell_contact_root / f"cell_contacts_{_donor}_{_brain_region}_{_replicate}_{r}um.csv")
    real_contacts = np.load(cell_contact_root / f"contact_counts_real_{_donor}_{_brain_region}_{_replicate}_{r}um.npy")
    null_contacts = np.load(cell_contact_root / f"contact_counts_permuted_{_donor}_{_brain_region}_{_replicate}_{r}um.npy")
    null_contacts_mean = np.load(cell_contact_root / f"contact_counts_permuted_mean_{_donor}_{_brain_region}_{_replicate}_{r}um.npy")
    null_contacts_std = np.load(cell_contact_root / f"contact_counts_permuted_std_{_donor}_{_brain_region}_{_replicate}_{r}um.npy")

    null_contacts_std = np.maximum(null_contacts_std, np.sqrt(1/1000))
    permuted_z_score = (real_contacts - null_contacts_mean) / null_contacts_std
    local_p_values = norm.sf(np.abs(permuted_z_score))
    adjusted_local_p_value = adjust_p_value_matrix_by_BH(local_p_values)
    fold_changes = real_contacts / (null_contacts_mean + 1e-6)
    # Gather all results into a data frame
    contact_result_df = get_data_frame_from_metrices(cell_types, 
                                                    {'pval-adjusted': adjusted_local_p_value,
                                                    'pval': local_p_values,
                                                    'z_score': permuted_z_score,
                                                    'contact_count': real_contacts,
                                                    'permutation_mean': null_contacts_mean,
                                                    'permutation_std': null_contacts_std,
                                                    'fold-change' : fold_changes,
                                            }).sort_values('z_score', ascending=False)
    contact_result_df['donor'] = _donor
    contact_result_df['brain_region'] = _brain_region
    contact_result_df['replicate'] = _replicate
    all_res.append(contact_result_df)
all_results_df = pd.concat(all_res, axis=0)

In [None]:
# all_results_df

In [None]:
DIR = Path(f"/home/x-aklein2/projects/aklein/BICAN/BG/data/CPS/meta_contacts/gr_{r}um_2")
DIR.mkdir(parents=True, exist_ok=True)
INFILE = DIR / "contacts_meta_input.csv"   # combined table of all donor/region/replicate results
all_results_df.to_csv(INFILE)

In [None]:
# contact_result_df

In [None]:
# contact_result_df = contact_result_df[contact_result_df['pval-adjusted'] < 0.05]
# contact_result_df = contact_result_df[contact_result_df['contact_count'] > 5]
# contact_result_df

In [None]:
# _ct2 = "Astrocyte"
# _ct1 = "CN ST18 GABA"
# i = np.where(cell_types == _ct1)[0][0]
# j = np.where(cell_types == _ct2)[0][0]
# null_dist = null_contacts[:, i, j]
# real_count = real_contacts[i, j]

In [None]:
# fig, ax = plt.subplots(figsize=(4, 4))
# ax.hist(null_dist, bins=15, color='lightgrey', density=True)
# ax.axvline(real_count, color='red', linestyle='--', label='Real Count')
# ax.set_xlabel(f'Contact Counts between {_ct1} and {_ct2}')
# ax.set_ylabel('Density')
# ax.set_title(f'Contact Counts Distribution\n{_donor}, {_brain_region}, {_replicate}')
# ax.legend(loc='upper right', bbox_to_anchor=(1.6, 1), fontsize=12)
# plt.show()

## Run Meta Analysis

In [None]:
### Written with ChatGPT help ###
## To make this callable from the command line maybe add some argparse / click functionality later ## 

"""
Meta-analysis of cell–cell contact z-scores across spatial transcriptomics experiments.
Performs:
  1. Direction-specific random-effects meta-analysis within each brain region
  2. Region-wise moderator tests (do effects differ across regions?)
  3. Multilevel heterogeneity partitioning (donor vs experiment)
  4. Directional asymmetry summary (A→B vs B→A)

Author: eckerlab
"""

# -------------------------------------------------------------------------
#  Imports
# -------------------------------------------------------------------------
from pathlib import Path
import pandas as pd
import numpy as np
from scipy.stats import norm 
import statsmodels.api as sm
from statsmodels.regression.mixed_linear_model import MixedLM
from itertools import combinations
import warnings
warnings.filterwarnings("ignore")

r = 50

In [None]:
# -------------------------------------------------------------------------
#  Load input data
# -------------------------------------------------------------------------

DIR = Path(f"/home/x-aklein2/projects/aklein/BICAN/BG/data/CPS/meta_contacts/gr_{r}um_2")
INFILE = DIR / "contacts_meta_input.csv"   # combined table of all donor/region/replicate results
df = pd.read_csv(INFILE)

# Verify required columns exist
req = {
    "cell_type1", "cell_type2", "z_score", "pval",
    "permutation_mean", "permutation_std",
    "contact_count", "pval-adjusted", "fold-change"
}
missing = req - set(df.columns)
if missing:
    raise ValueError(f"Missing columns: {missing}")

# Ensure donor / brain_region / replicate exist; create placeholders if not
for col in ["donor", "brain_region", "replicate"]:
    if col not in df.columns:
        df[col] = "NA"

# -------------------------------------------------------------------------
#  Preprocessing
# -------------------------------------------------------------------------

# (1) Construct a *directional* interaction ID.
# We now keep order: A→B is distinct from B→A.
df["pair"] = (
    df["cell_type1"].astype(str).str.strip()
    + "|"
    + df["cell_type2"].astype(str).str.strip()
)

# (2) Drop entries with unstable null distributions.
# If permutation_mean ≈ 0, fold-changes blow up and z-scores aren’t meaningful.
# df = df[df["permutation_mean"] > 1.0].copy()

# (3) Define effect size (yi) and its sampling variance (vi).
# For permutation z-scores, the variance under the null ≈ 1.
df = df.assign(yi=df["z_score"].astype(float), vi=1.0)

# Unique ID for each experiment replicate
df["study_id"] = (
    df["donor"].astype(str) + "|" +
    df["brain_region"].astype(str) + "|" +
    df["replicate"].astype(str)
)

In [None]:
# -------------------------------------------------------------------------
#  (1) Random-effects meta-analysis within each region
# -------------------------------------------------------------------------
region_rows = []
for pair, dfg in df.groupby("pair"):
    for region, dfr in dfg.groupby("brain_region"):
        # Need ≥2 experiments to estimate heterogeneity
        if len(dfr) >= 2:
            res = re_meta(dfr["yi"].values, dfr["vi"].values)
            res.update(pair=pair, brain_region=region)
            region_rows.append(res)

per_region = pd.DataFrame(region_rows)
if len(per_region):
    # Benjamini–Hochberg FDR correction within all tests
    per_region["p_fdr"] = sm.stats.multipletests(per_region["p"], method="fdr_bh")[1]
    per_region.to_csv(DIR / "meta_region_pooled.csv", index=False)
print("Wrote meta_region_pooled.csv (pooled z per pair × region)")

In [None]:
# -------------------------------------------------------------------------
#  (2) Region moderator analysis: do effects differ across regions?
# -------------------------------------------------------------------------
mod_rows, contrast_rows = [], []
for pair, dfg in df.groupby("pair"):
    regs = sorted(dfg["brain_region"].unique())
    if len(regs) < 2:
        continue
    # Create design matrix: one column per region
    X = pd.get_dummies(dfg["brain_region"], drop_first=False)
    w = 1.0 / dfg["vi"].values  # weights
    model = sm.WLS(dfg["yi"].values, X, weights=w).fit()

    # Compare to an intercept-only model (grand mean) → omnibus F-test
    X0 = np.ones((len(dfg), 1))
    model0 = sm.WLS(dfg["yi"].values, X0, weights=w).fit()
    anov = sm.stats.anova_lm(model0, model)
    p_between = float(anov["Pr(>F)"].iloc[-1])
    mod_rows.append({"pair": pair, "p_between": p_between})

    # Pairwise region contrasts
    coefs, cov = model.params.values, model.cov_params().values
    cols = X.columns.tolist()
    for a, b in combinations(cols, 2):
        L = np.zeros(len(cols))
        L[cols.index(a)], L[cols.index(b)] = 1, -1
        est = float(L @ coefs)
        se = float(np.sqrt(L @ cov @ L))
        z = est / se if se > 0 else np.nan
        p = 2 * norm.sf(abs(z)) if np.isfinite(z) else np.nan
        contrast_rows.append({
            "pair": pair, "region1": a, "region2": b,
            "contrast_est": est, "contrast_se": se, "z": z, "p_contrast": p
        })

mod = pd.DataFrame(mod_rows)
if len(mod):
    ret = sm.stats.multipletests(mod.loc[~mod['p_between'].isna(), 'p_between'], method='fdr_bh')[1]
    mod.loc[~mod['p_between'].isna(), "p_between_fdr"] = ret
    mod.to_csv(DIR / "meta_region_moderator.csv", index=False)
print("Wrote meta_region_moderator.csv (omnibus region tests)")

contr = pd.DataFrame(contrast_rows)
if len(contr):
    ret = sm.stats.multipletests(contr.loc[~contr["p_contrast"].isna(), 'p_contrast'], method="fdr_bh")[1]
    contr.loc[~contr['p_contrast'].isna(), "p_contrast_fdr"] = ret
    contr.to_csv(DIR / "meta_region_pairwise_contrasts.csv", index=False)
print("Wrote meta_region_pairwise_contrasts.csv (pairwise region contrasts)")

In [None]:
# === Combined heterogeneity analysis ===
# Two separate models per pair:
# (1) donor random intercept controlling for region
# (2) replicate random intercept controlling for region

# import statsmodels.api as sm
# import numpy as np
# import pandas as pd

het_rows = []

for pair, dfg in df.groupby("pair"):
    if dfg["donor"].nunique() < 2 or dfg["study_id"].nunique() < 2:
        het_rows.append({
            "pair": pair,
            "tau2_donor": np.nan,
            "I2_donor": np.nan,
            "tau2_replicate": np.nan,
            "I2_replicate": np.nan,
            "n_donors": dfg["donor"].nunique(),
            "n_replicates": dfg["study_id"].nunique(),
            "note": "insufficient levels"
        })
        continue

    tmp = dfg.copy()
    tmp["yi"] = tmp["yi"].astype(float)
    tmp["donor"] = tmp["donor"].astype(str)
    tmp["study_id"] = tmp["study_id"].astype(str)
    tmp["region"] = tmp["brain_region"].astype(str)

    # --- (1) Donor random intercept model ---
    try:
        m_donor = sm.MixedLM.from_formula("yi ~ C(region)", groups="donor", data=tmp)
        fit_donor = m_donor.fit(reml=True, method="lbfgs", maxiter=500, disp=False)

        if hasattr(fit_donor, "cov_re") and fit_donor.cov_re.shape[0] > 0:
            var_donor = float(fit_donor.cov_re.iloc[0, 0])
        else:
            var_donor = 0.0
    except Exception as e:
        var_donor = np.nan

    # --- (2) Replicate random intercept model ---
    try:
        m_rep = sm.MixedLM.from_formula("yi ~ C(region)", groups="study_id", data=tmp)
        fit_rep = m_rep.fit(reml=True, method="lbfgs", maxiter=500, disp=False)

        if hasattr(fit_rep, "cov_re") and fit_rep.cov_re.shape[0] > 0:
            var_replicate = float(fit_rep.cov_re.iloc[0, 0])
        else:
            var_replicate = 0.0
    except Exception as e:
        var_replicate = np.nan

    # Compute heterogeneity measures
    sigma2_within = 1.0
    tau2_donor = max(0.0, var_donor)
    tau2_replicate = max(0.0, var_replicate)

    I2_donor = 100 * tau2_donor / (tau2_donor + sigma2_within)
    I2_replicate = 100 * tau2_replicate / (tau2_replicate + sigma2_within)

    het_rows.append({
        "pair": pair,
        "tau2_donor": tau2_donor,
        "I2_donor": I2_donor,
        "tau2_replicate": tau2_replicate,
        "I2_replicate": I2_replicate,
        "n_donors": tmp["donor"].nunique(),
        "n_replicates": tmp["study_id"].nunique()
    })

# Compile and write results
het_combined = pd.DataFrame(het_rows)
het_combined.to_csv(DIR / "meta_multilevel_heterogeneity.csv", index=False)
print("Wrote meta_multilevel_heterogeneity.csv (donor + replicate τ², region-adjusted).")

In [None]:
# -------------------------------------------------------------------------
#  (4) Directional asymmetry summary  A→B  vs  B→A
# -------------------------------------------------------------------------
def split_pair(pair):
    """Split 'A|B' into ['A', 'B']."""
    if "|" in pair:
        return pair.split("|", 1)
    return [pair, ""]

def find_reciprocal(pair):
    """Return reciprocal 'B|A'."""
    a, b = split_pair(pair)
    return f"{b}|{a}"

def classify_asym(row):
    """Heuristic label based on z-difference."""
    if pd.isna(row["z_diff"]):
        return "missing"
    if abs(row["z_diff"]) < 0.5:
        return "Both (symmetric)"
    elif row["z_diff"] > 0.5:
        return "Directional (A|B dominates)"
    else:
        return "Directional (B|A dominates)"

meta = pd.read_csv(DIR / "meta_region_pooled.csv")
asym_rows = []

# For each region, compare each pair with its reciprocal
for region, dfreg in meta.groupby("brain_region"):
    pairs = set(dfreg["pair"])
    for pair in pairs:
        recip = find_reciprocal(pair)
        a, b = split_pair(pair)
        dat_a = dfreg.loc[dfreg["pair"] == pair]
        dat_b = dfreg.loc[dfreg["pair"] == recip]

        if dat_a.empty and dat_b.empty:
            continue

        row = {
            "brain_region": region,
            "pair": pair,
            "reciprocal": recip,
            "cell_type1": a,
            "cell_type2": b,
            "z_AB": dat_a["mu"].iloc[0] if not dat_a.empty else np.nan,
            "z_BA": dat_b["mu"].iloc[0] if not dat_b.empty else np.nan,
            "p_AB": dat_a["p"].iloc[0] if not dat_a.empty else np.nan,
            "p_BA": dat_b["p"].iloc[0] if not dat_b.empty else np.nan,
            "I2_AB": dat_a["I2"].iloc[0] if not dat_a.empty else np.nan,
            "I2_BA": dat_b["I2"].iloc[0] if not dat_b.empty else np.nan,
        }

        # Differences
        row["z_diff"] = (
            row["z_AB"] - row["z_BA"]
            if pd.notna(row["z_AB"]) and pd.notna(row["z_BA"]) else np.nan
        )
        if pd.notna(row["p_AB"]) and pd.notna(row["p_BA"]) and row["p_AB"] > 0 and row["p_BA"] > 0:
            row["log10_p_ratio"] = np.log10(row["p_BA"] / row["p_AB"])
        else:
            row["log10_p_ratio"] = np.nan

        # Classification
        if dat_a.empty and not dat_b.empty:
            row["asym_class"] = "B|A only"
        elif not dat_a.empty and dat_b.empty:
            row["asym_class"] = "A|B only"
        else:
            row["asym_class"] = classify_asym(row)

        asym_rows.append(row)

asym = pd.DataFrame(asym_rows)
if len(asym):
    asym["abs_z_diff"] = abs(asym["z_diff"])
    asym["dominant_direction"] = np.where(
        asym["z_diff"] > 0, "A|B",
        np.where(asym["z_diff"] < 0, "B|A", "none")
    )
    asym = asym.sort_values(["brain_region", "abs_z_diff"], ascending=[True, False])
    asym.to_csv(DIR / "meta_directional_asymmetry.csv", index=False)
    print("Wrote meta_directional_asymmetry.csv (A|B vs B|A)")
else:
    print("No reciprocal pairs found for asymmetry summary.")


## Analyzing Results from Meta Analysis

### Looking at files

In [None]:
DIR = Path("/home/x-aklein2/projects/aklein/BICAN/BG/data/CPS/meta_contacts")
region_diff = "meta_region_moderator.csv"
region_heterogeneity = "meta_multilevel_heterogeneity.csv"
region_pairwise_contrasts = "meta_region_pairwise_contrasts.csv"
region_pooled_effects = "meta_region_pooled.csv"
asymmetry = "meta_directional_asymmetry.csv"

In [None]:
# df = pd.read_csv(DIR / region_pooled_effects)
# df.head()

In [None]:
# df = pd.read_csv(DIR / region_heterogeneity)
# df['tau2_donor'].isna().sum() / df.shape[0] # all NA? 

In [None]:
# df = pd.read_csv(DIR / region_pairwise_contrasts)
# df.head() # FDR is NAN

In [None]:
df = pd.read_csv(DIR / region_diff)
df.head() # again FDR is NAN

In [None]:
df = pd.read_csv(DIR / asymmetry)
df.loc[df['asym_class'] != 'Both (symmetric)'].head() # NANs in the directional part! 

### plots

#### functions

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from adjustText import adjust_text
import numpy as np

def plot_labeled_scatter(
    df,
    x_col,
    y_col,
    label_col="pair",
    label_top=5,
    diff_quantile=0.95,
    figsize=(7, 6),
    title=None,
    color="steelblue",
    label_color="darkred",
    diagonal=True,
    save=None
):
    """
    Scatterplot comparing two numeric columns (e.g. donor vs replicate τ²)
    with optional labeling of extreme points.

    Parameters
    ----------
    df : pd.DataFrame
        DataFrame containing the numeric columns.
    x_col, y_col : str
        Column names for x and y axes.
    label_col : str
        Column name to use for text labels.
    label_top : int
        Number of top points (by each axis) to label.
    diff_quantile : float
        Quantile threshold (0–1) for labeling outliers far from diagonal.
    figsize : tuple
        Figure size (width, height).
    title : str
        Custom plot title (optional).
    color : str
        Color for scatter points.
    label_color : str
        Text color for labels.
    diagonal : bool
        Whether to plot y=x diagonal line.
    save : str or None
        Path to save figure; if None, show interactively.
    """

    plt.figure(figsize=figsize)
    sns.scatterplot(x=x_col, y=y_col, data=df, alpha=0.7, color=color, edgecolor="none")

    if diagonal:
        plt.axline((0, 0), slope=1, color="k", ls="--", lw=1)

    plt.xlabel(x_col)
    plt.ylabel(y_col)
    plt.title(title or f"{y_col} vs {x_col}")

    # --- Identify extreme points ---
    df = df.copy()
    df["diff"] = np.abs(df[y_col] - df[x_col])
    top_x = df.nlargest(label_top, x_col)
    top_y = df.nlargest(label_top, y_col)
    far = df[df["diff"] > df["diff"].quantile(diff_quantile)]
    labels = pd.concat([top_x, top_y, far]).drop_duplicates(subset=[label_col])

    texts = []
    for _, row in labels.iterrows():
        txt = plt.text(
            row[x_col],
            row[y_col],
            str(row[label_col]),
            fontsize=8,
            color=label_color,
            weight="bold"
        )
        texts.append(txt)

    adjust_text(texts, arrowprops=dict(arrowstyle="->", color="gray", lw=0.5))

    plt.tight_layout()
    if save:
        plt.savefig(save, dpi=300)
        print(f"Saved plot to: {save}")
    else:
        plt.show()


#### plots

In [None]:
# df = pd.read_csv(DIR / f"gr_{r}um_2" / "contacts_meta_input.csv")
# meta = pd.read_csv(DIR / f"gr_{r}um_2" / "meta_region_pooled.csv")

# _ct2 = "SN SOX6 Dopa"
# _ct1 = "Astrocyte"
# pair = f"{_ct1}|{_ct2}"
# region = "MGM1"

# # subset for that pair/region
# d = df[(df["cell_type1"] == _ct1) & (df["cell_type2"] == _ct2) & (df["brain_region"] == region)]
# m = meta[(meta["pair"] == pair) & (meta["brain_region"] == region)]

# fig, ax = plt.subplots(figsize=(6, 4))
# ax.scatter(d["z_score"], d["donor"], color="purple", s=50, marker='x')
# ax.axvline(0, color="k", lw=1)
# ax.axvline(m["mu"].values[0], color="red", lw=2, label="pooled z")
# ax.set_title(f"{pair} in {region}")
# ax.set_xlabel("z-score vs null")
# ax.legend()
# plt.show()


In [None]:
# meta = pd.read_csv(DIR / f"gr_{r}um_2" / "meta_region_pooled.csv")
# pivot = meta.pivot(index="pair", columns="brain_region", values="mu")
# # pivot = pivot[['CAH', 'CAB', 'PU', 'NAC', 'GP']].dropna(how='any')
# pivot = pivot[['MGM1', 'SUBTH']].dropna(how='all')

# plt.figure(figsize=(10, max(4, len(pivot)/4)))
# sns.heatmap(pivot, cmap="coolwarm", center=0, vmax=5, vmin=-5)
# plt.title("Pooled contact enrichment (z) per region")
# plt.xlabel("Region")
# plt.ylabel("Cell–cell pair (directional)")
# plt.show()


In [None]:
# mod = pd.read_csv(DIR / f"{r}um_2" / "meta_region_moderator.csv")

# plt.figure(figsize=(6,5))
# sns.scatterplot(
#     x=-np.log10(mod["p_between_fdr"]),
#     y=-np.log10(mod["p_between"]),
#     data=mod,
#     alpha=0.7
# )
# plt.axhline(-np.log10(0.05), color="red", ls="--", label="FDR 0.05")
# plt.xlabel("−log10(FDR-corrected p_between)")
# plt.ylabel("−log10(raw p_between)")
# plt.title("Omnibus region heterogeneity across cell–cell pairs")
# plt.legend()
# plt.tight_layout()
# plt.show()


In [None]:
# contr = pd.read_csv(DIR / f"{r}um_2" / "meta_region_pairwise_contrasts.csv")
# pair = "Astrocyte|STR D1 MSN"

# dfp = contr[contr["pair"] == pair].pivot(
#     index="region1", columns="region2", values="contrast_est"
# )

# plt.figure(figsize=(6,5))
# sns.heatmap(dfp, cmap="vlag", center=0, annot=True, fmt=".2f")
# plt.title(f"Pairwise region contrasts for {pair}")
# plt.xlabel("Region 2")
# plt.ylabel("Region 1")
# plt.tight_layout()
# plt.show()


In [None]:
# fig, ax = plt.subplots(figsize=(7,5))
# ax.scatter(contr['contrast_est'], -np.log10(contr["p_contrast_fdr"]), alpha=0.6)
# # sns.scatterplot(
# #     x="contrast_est",
# #     y=-np.log10(contr["p_contrast_fdr"]),
# #     hue="pair",
# #     data=contr,
# #     alpha=0.6,
# #     ax=ax
# # )
# ax.axvline(0, color="k", lw=1)
# ax.axhline(-np.log10(0.05), color="red", ls="--")
# ax.set_xlabel("Contrast estimate (region1 − region2)")
# ax.set_ylabel("−log10(FDR-corrected p)")
# ax.set_title("Pairwise region contrasts across cell–cell pairs")
# # ax.legend(None)
# plt.tight_layout()
# plt.show()


In [None]:
# agg = (contr.groupby(["pair","region1"])
#              .agg(mean_diff=("contrast_est","mean"),
#                   sig_frac=("p_contrast_fdr", lambda x: (x<0.05).mean()))
#              .reset_index())

# pair = "Astrocyte|STR D1 MSN"
# dfp = agg[agg["pair"] == pair]

# plt.figure(figsize=(6,4))
# sns.barplot(y="region1", x="mean_diff", data=dfp, orient="h", palette="vlag")
# plt.axvline(0, color="k", lw=1)
# plt.title(f"Mean regional differences for {pair}")
# plt.xlabel("Average contrast estimate")
# plt.ylabel("Region")
# plt.tight_layout()
# plt.show()


In [None]:
# asym = pd.read_csv(DIR / f"{r}um_2" / "meta_directional_asymmetry.csv")

# plt.figure(figsize=(6,4))
# sns.histplot(asym["z_diff"], bins=50, kde=True)
# plt.axvline(0, color="k", lw=1)
# plt.xlabel("Δz = z(A→B) − z(B→A)")
# plt.title("Distribution of directional asymmetry across regions")
# plt.show()


In [None]:
# het = pd.read_csv(DIR / "meta_multilevel_heterogeneity.csv")

# plot_labeled_scatter(
#     het,
#     x_col="I2_replicate",
#     y_col="I2_donor",
#     label_col="pair",
#     label_top=3,
#     diff_quantile=0.99,
#     title="Donor vs Replicate Heterogeneity (I², region-adjusted)",
#     color="royalblue"
# )


In [None]:
# # # TOP RIGHT = Strong Enrichments of contacts
# # # TOP LEFT = Strong depletion of contacts
# meta = pd.read_csv(DIR / f"{r}um_2" / "meta_region_pooled.csv")
# region = "CAH"
# d = meta[meta["brain_region"] == region]

# plt.figure(figsize=(6,4))
# plt.scatter(d["mu"], -np.log10(d["p_fdr"]), alpha=0.6)
# plt.axvline(0, color="k", lw=1)
# plt.axhline(-np.log10(0.05), color="red", ls="--")
# plt.xlabel("Pooled z")
# plt.ylabel("−log10(FDR)")
# plt.title(f"Significant contacts in {region}")
# plt.show()


## Analyzing Contacts

#### functions

In [None]:
# Chat GPT
def combine_results(meta_df, cell_types_to_combine, new_category_name, comparison_ct):
    """
    Correctly combine multiple cell type meta-analysis results.
    This computes the AVERAGE effect across different cell type pairs,
    not a meta-analysis of the same effect.
    
    Parameters:
    -----------
    meta_df : pd.DataFrame
        DataFrame with columns: pair, brain_region, mu, se, etc.
    cell_types_to_combine : list
        List of cell types to combine
    new_category_name : str
        Name for the combined category
    
    Returns:
    --------
    pd.DataFrame with combined results
    """
    combined_results = []
    
    # Group by brain region
    for region, region_df in meta_df.groupby('brain_region'):
        # Filter for the cell types we want to combine
        subset = region_df[
            (region_df['ct1'].isin(cell_types_to_combine) & (region_df['ct2'] == comparison_ct)) | 
            (region_df['ct2'].isin(cell_types_to_combine) & (region_df['ct1'] == comparison_ct))
        ].copy()
        
        if subset.empty:
            continue
            
        # Extract means and standard errors
        mus = subset['mu'].values
        ses = subset['se'].values
        n = len(subset)
        
        # Simple average of the means (unweighted)
        combined_mu = np.mean(mus)
        
        # Standard error of the mean across different effects
        # This is: sqrt(sum(SE_i^2)) / n
        # This accounts for the uncertainty in each individual estimate
        combined_se = np.sqrt(np.sum(ses**2)) / n
        
        # Alternative: if you want to weight by precision
        # weights = 1 / (ses ** 2)
        # combined_mu = np.sum(weights * mus) / np.sum(weights)
        # combined_se = 1 / np.sqrt(np.sum(weights))
        
        combined_results.append({
            'brain_region': region,
            'category': new_category_name,
            'mu': combined_mu,
            'se': combined_se,
            'n_combined': n,
            'original_pairs': subset['pair'].tolist(),
            'individual_mus': mus.tolist(),
            'individual_ses': ses.tolist()
        })
    
    return pd.DataFrame(combined_results)

# Test the function
# MSN_types = ["STR D1 MSN", "STR D2 MSN", "STR Hybrid MSN"]
# combined_MSN_corrected = combine_results(meta, MSN_types, "All_MSN_corrected")

def plot_pair(
    meta,
    pair,
    pair_col = 'pair',
    order=None,
    ax=None,
    color='blue',
    label=None,
    rasterized=False,
    opacity=1.0,
    region_col = "brain_region",
):
    
    meta_pair = meta[meta[pair_col] == pair]
    if order is not None:
        order = [region for region in order if region in meta_pair[region_col].values]
        meta_pair = meta_pair.set_index(region_col).loc[order].reset_index()
    else:
        meta_pair = meta_pair.sort_values(by='mu')
    # meta_pair = meta_pair if order else meta_pair.sort_values(by='mu')

    if ax is None:
        fig, ax = plt.subplots(figsize=(6, 4))
    ax.errorbar(x=meta_pair[region_col], y=meta_pair['mu'], yerr=meta_pair['se'], fmt='o', color=color, capsize=5, label=label, alpha=opacity, rasterized=rasterized)
    ax.plot(meta_pair[region_col], meta_pair['mu'], color=color, linestyle='--', alpha=opacity, rasterized=rasterized)
    ax.axhline(0, color='black', linestyle='--', linewidth=1, rasterized=rasterized)
    ax.set_xlabel('Brain Region')
    ax.set_ylabel('Pooled Z-score')
    ax.set_title(f'Pooled Contact Enrichment Z-scores for {pair} Across Brain Regions')
    # ax.set_xticks(order)
    # ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
    return ax

In [None]:
br_to_brc_map = adata.obs[['brain_region', 'brain_region_corr']].set_index('brain_region').drop_duplicates().to_dict()['brain_region_corr']

#### Dopaminergic Interactions

In [None]:
rs = [15, 30, 50]
for r in rs: 
    DIR = Path(f'/home/x-aklein2/projects/aklein/BICAN/BG/data/CPS/meta_contacts/gr_{r}um_2')
    df = pd.read_csv(DIR / "contacts_meta_input.csv")
    meta = pd.read_csv(DIR / "meta_region_pooled.csv")
    meta['brain_region_corr'] = meta['brain_region'].map(br_to_brc_map)

    meta['ct1'] = meta['pair'].astype(str).str.split('|').str[0]
    meta['ct2'] = meta['pair'].astype(str).str.split('|').str[1]

    dopa_types = ["SN SOX6 Dopa", "SN-VTR CALB1 Dopa", "SN-VTR GAD2 Dopa"]
    combined_Dopa = combine_results(meta, dopa_types, "All_Dopa", "Astrocyte")
    combined_Dopa['brain_region_corr'] = combined_Dopa['brain_region'].map(br_to_brc_map)

    fig, axs = plt.subplots(1, 1, figsize=(8, 5), dpi=100)
    ORDER = ['MGM1', 'STH'] # ['CaH', 'CaB', 'CaT', 'Pu', 'NAC']
    for _dopa_type in dopa_types:
        pair = "Astrocyte|" + _dopa_type
        sub = meta[(meta['ct1'] == 'Astrocyte') & (meta['ct2'] == _dopa_type)]
        if sub.empty: 
            pair = _dopa_type + "|Astrocyte"  
            sub = meta[(meta['ct2'] == 'Astrocyte') & (meta['ct1'] == _dopa_type)]
        if sub.empty: 
            print(f"No data for Astrocyte and {_dopa_type}, skipping.")
            continue
        # print(f"{_dopa_type}:")
        # print(sub[['brain_region', 'mu', 'p_fdr']])
        plot_pair(sub, pair, order=ORDER, ax=axs, color=adata.uns['Group_palette'].get(_dopa_type, 'blue'), label=_dopa_type, opacity=0.5, rasterized=True, region_col="brain_region_corr")

    plot_pair(combined_Dopa, 'All_Dopa', pair_col="category", order=ORDER, ax=axs, color='red', label='Combined Dopamine', rasterized=True, region_col="brain_region_corr")

    axs.set_title(f'Pooled Contact Enrichment Z-scores for Dopamine - Astrocytes for {r}um')
    axs.legend(loc='upper left', bbox_to_anchor=(1, 1))
    # plt.savefig(image_path / f'astrocyte_msn_contact_enrichment_{r}um.png', bbox_inches='tight', dpi=300)
    # plt.savefig(image_path / f'astrocyte_msn_contact_enrichment_{r}um.svg', bbox_inches='tight', dpi=300)
    plt.show()
    plt.close()


In [None]:
rs = [15, 30, 50]
for r in rs: 
    DIR = Path(f'/home/x-aklein2/projects/aklein/BICAN/BG/data/CPS/meta_contacts/gr_{r}um_2')
    df = pd.read_csv(DIR / "contacts_meta_input.csv")
    meta = pd.read_csv(DIR / "meta_region_pooled.csv")
    meta['brain_region_corr'] = meta['brain_region'].map(br_to_brc_map)

    meta['ct1'] = meta['pair'].astype(str).str.split('|').str[0]
    meta['ct2'] = meta['pair'].astype(str).str.split('|').str[1]

    dopa_types = ["SN SOX6 Dopa", "SN-VTR CALB1 Dopa", "SN-VTR GAD2 Dopa"]
    combined_Dopa = combine_results(meta, dopa_types, "All_Dopa", "Microglia")
    combined_Dopa['brain_region_corr'] = combined_Dopa['brain_region'].map(br_to_brc_map)

    fig, axs = plt.subplots(1, 1, figsize=(8, 5), dpi=100)
    ORDER = ['MGM1', 'STH'] # ['CaH', 'CaB', 'CaT', 'Pu', 'NAC']
    for _dopa_type in dopa_types:
        pair = "Microglia|" + _dopa_type
        sub = meta[(meta['ct1'] == 'Microglia') & (meta['ct2'] == _dopa_type)]
        if sub.empty: 
            pair = _dopa_type + "|Microglia"  
            sub = meta[(meta['ct2'] == 'Microglia') & (meta['ct1'] == _dopa_type)]
        if sub.empty: 
            print(f"No data for Microglia and {_dopa_type}, skipping.")
            continue
        # print(f"{_dopa_type}:")
        # print(sub[['brain_region', 'mu', 'p_fdr']])
        plot_pair(sub, pair, order=ORDER, ax=axs, color=adata.uns['Group_palette'].get(_dopa_type, 'blue'), label=_dopa_type, opacity=0.5, rasterized=True, region_col="brain_region_corr")

    plot_pair(combined_Dopa, 'All_Dopa', pair_col="category", order=ORDER, ax=axs, color='red', label='Combined Dopamine', rasterized=True, region_col="brain_region_corr")

    axs.set_title(f'Pooled Contact Enrichment Z-scores for Dopamine - Microglia for {r}um')
    axs.legend(loc='upper left', bbox_to_anchor=(1, 1))
    # plt.savefig(image_path / f'microglia_msn_contact_enrichment_{r}um.png', bbox_inches='tight', dpi=300)
    # plt.savefig(image_path / f'microglia_msn_contact_enrichment_{r}um.svg', bbox_inches='tight', dpi=300)
    plt.show()
    plt.close()


In [None]:
rs = [15, 30, 50]
for r in rs: 
    DIR = Path(f'/home/x-aklein2/projects/aklein/BICAN/BG/data/CPS/meta_contacts/gr_{r}um_2')
    df = pd.read_csv(DIR / "contacts_meta_input.csv")
    meta = pd.read_csv(DIR / "meta_region_pooled.csv")
    meta['brain_region_corr'] = meta['brain_region'].map(br_to_brc_map)

    meta['ct1'] = meta['pair'].astype(str).str.split('|').str[0]
    meta['ct2'] = meta['pair'].astype(str).str.split('|').str[1]

    dopa_types = ["SN SOX6 Dopa", "SN-VTR CALB1 Dopa", "SN-VTR GAD2 Dopa"]
    combined_Dopa = combine_results(meta, dopa_types, "All_Dopa", "Oligo OPALIN")
    combined_Dopa['brain_region_corr'] = combined_Dopa['brain_region'].map(br_to_brc_map)

    fig, axs = plt.subplots(1, 1, figsize=(8, 5), dpi=100)
    ORDER = ['MGM1', 'STH'] # ['CaH', 'CaB', 'CaT', 'Pu', 'NAC']
    for _dopa_type in dopa_types:
        pair = "Oligo OPALIN|" + _dopa_type
        sub = meta[(meta['ct1'] == 'Oligo OPALIN') & (meta['ct2'] == _dopa_type)]
        if sub.empty: 
            pair = _dopa_type + "|Oligo OPALIN"  
            sub = meta[(meta['ct2'] == 'Oligo OPALIN') & (meta['ct1'] == _dopa_type)]
        if sub.empty: 
            print(f"No data for Oligo OPALIN and {_dopa_type}, skipping.")
            continue
        # print(f"{_dopa_type}:")
        # print(sub[['brain_region', 'mu', 'p_fdr']])
        plot_pair(sub, pair, order=ORDER, ax=axs, color=adata.uns['Group_palette'].get(_dopa_type, 'blue'), label=_dopa_type, opacity=0.5, rasterized=True, region_col="brain_region_corr")

    plot_pair(combined_Dopa, 'All_Dopa', pair_col="category", order=ORDER, ax=axs, color='red', label='Combined Dopamine', rasterized=True, region_col="brain_region_corr")

    axs.set_title(f'Pooled Contact Enrichment Z-scores for Dopamine - Oligo OPALIN for {r}um')
    axs.legend(loc='upper left', bbox_to_anchor=(1, 1))
    # plt.savefig(image_path / f'microglia_msn_contact_enrichment_{r}um.png', bbox_inches='tight', dpi=300)
    # plt.savefig(image_path / f'microglia_msn_contact_enrichment_{r}um.svg', bbox_inches='tight', dpi=300)
    plt.show()
    plt.close()


In [None]:
rs = [15, 30, 50]
for r in rs: 
    DIR = Path(f'/home/x-aklein2/projects/aklein/BICAN/BG/data/CPS/meta_contacts/gr_{r}um_2')
    df = pd.read_csv(DIR / "contacts_meta_input.csv")
    meta = pd.read_csv(DIR / "meta_region_pooled.csv")
    meta['brain_region_corr'] = meta['brain_region'].map(br_to_brc_map)

    meta['ct1'] = meta['pair'].astype(str).str.split('|').str[0]
    meta['ct2'] = meta['pair'].astype(str).str.split('|').str[1]

    dopa_types = ["SN SOX6 Dopa", "SN-VTR CALB1 Dopa", "SN-VTR GAD2 Dopa"]
    combined_Dopa = combine_results(meta, dopa_types, "All_Dopa", "Oligo PLEKHG1")
    combined_Dopa['brain_region_corr'] = combined_Dopa['brain_region'].map(br_to_brc_map)

    fig, axs = plt.subplots(1, 1, figsize=(8, 5), dpi=100)
    ORDER = ['MGM1', 'STH'] # ['CaH', 'CaB', 'CaT', 'Pu', 'NAC']
    for _dopa_type in dopa_types:
        pair = "Oligo PLEKHG1|" + _dopa_type
        sub = meta[(meta['ct1'] == 'Oligo PLEKHG1') & (meta['ct2'] == _dopa_type)]
        if sub.empty: 
            pair = _dopa_type + "|Oligo PLEKHG1"  
            sub = meta[(meta['ct2'] == 'Oligo PLEKHG1') & (meta['ct1'] == _dopa_type)]
        if sub.empty: 
            print(f"No data for Oligo PLEKHG1 and {_dopa_type}, skipping.")
            continue
        # print(f"{_dopa_type}:")
        # print(sub[['brain_region', 'mu', 'p_fdr']])
        plot_pair(sub, pair, order=ORDER, ax=axs, color=adata.uns['Group_palette'].get(_dopa_type, 'blue'), label=_dopa_type, opacity=0.5, rasterized=True, region_col="brain_region_corr")

    plot_pair(combined_Dopa, 'All_Dopa', pair_col="category", order=ORDER, ax=axs, color='red', label='Combined Dopamine', rasterized=True, region_col="brain_region_corr")

    axs.set_title(f'Pooled Contact Enrichment Z-scores for Dopamine - Oligo PLEKHG1 for {r}um')
    axs.legend(loc='upper left', bbox_to_anchor=(1, 1))
    # plt.savefig(image_path / f'microglia_msn_contact_enrichment_{r}um.png', bbox_inches='tight', dpi=300)
    # plt.savefig(image_path / f'microglia_msn_contact_enrichment_{r}um.svg', bbox_inches='tight', dpi=300)
    plt.show()
    plt.close()


#### Astrocyte Interactions

In [None]:
MSN_types = ["STRv D1 MSN", "STRv D1 NUDAP MSN", "STR D1D2 Hybrid MSN", "OT D1 ICj", "STRd D1 Striosome MSN", "STRv D2 MSN", "STRd D2 StrioMat Hybrid MSN", "STRd D2 Matrix MSN", "STRd D1 Matrix MSN", "STRd D2 Striosome MSN"]
GP_types = ["GPi Core", "GPe SOX6-CTXND1 GABA", "GPi Shell", "GPin-BF Cholinergic GABA", "GPe-NDB-SI LHX6-LHX8-GBX1 GABA", "GPe MEIS2-SOX6 GABA"]
INT_types = ["VIP GABA", "STR TAC3-PLPP4 GABA", "STR SST-CHODL GABA",  "STR FS PTHLH-PVALB GABA", "LAMP5-CXCL14 GABA", "STRd Cholinergic GABA", "LAMP5-LHX6 GABA", "STR Cholinergic GABA"]

In [None]:
rs = [15, 30, 50]
for r in rs: 
    DIR = Path(f'/home/x-aklein2/projects/aklein/BICAN/BG/data/CPS/meta_contacts/gr_{r}um_2')
    df = pd.read_csv(DIR / "contacts_meta_input.csv")
    meta = pd.read_csv(DIR / "meta_region_pooled.csv")
    meta['brain_region_corr'] = meta['brain_region'].map(br_to_brc_map)

    meta['ct1'] = meta['pair'].astype(str).str.split('|').str[0]
    meta['ct2'] = meta['pair'].astype(str).str.split('|').str[1]
    
    combined_MSN = combine_results(meta, MSN_types, "All_MSN", "Astrocyte")
    combined_MSN['brain_region_corr'] = combined_MSN['brain_region'].map(br_to_brc_map)

    fig, axs = plt.subplots(1, 1, figsize=(8, 5), dpi=100)
    ORDER = ['CaH', 'CaB', 'CaT', 'Pu', 'NAC']
    for _msn_type in MSN_types:
        pair = "Astrocyte|" + _msn_type
        sub = meta[(meta['ct1'] == 'Astrocyte') & (meta['ct2'] == _msn_type)]
        if sub.empty: 
            pair = _msn_type + "|Astrocyte"  
            sub = meta[(meta['ct2'] == 'Astrocyte') & (meta['ct1'] == _msn_type)]
        if sub.empty: 
            print(f"No data for Astrocyte and {_msn_type}, skipping.")
            continue
        # print(f"{_msn_type}:")
        # print(sub[['brain_region', 'mu', 'p_fdr']])
        plot_pair(sub, pair, order=ORDER, ax=axs, color=adata.uns['MSN_Groups_palette'].get(_msn_type, 'blue'), label=_msn_type, opacity=0.5, rasterized=True, region_col="brain_region_corr")

    plot_pair(combined_MSN, 'All_MSN', pair_col="category", order=ORDER, ax=axs, color='red', label='Combined MSNs', rasterized=True, region_col="brain_region_corr")
    axs.set_title(f'Pooled Contact Enrichment Z-scores for MSNs - Astrocytes for {r}um')
    axs.legend(loc='upper left', bbox_to_anchor=(1, 1))
    # plt.savefig(image_path / f'astrocyte_gp_contact_enrichment_{r}um.png', bbox_inches='tight', dpi=300)
    # plt.savefig(image_path / f'astrocyte_gp_contact_enrichment_{r}um.svg', bbox_inches='tight', dpi=300)
    plt.show()
    plt.close()

In [None]:
rs = [15, 30, 50]
for r in rs: 
    DIR = Path(f'/home/x-aklein2/projects/aklein/BICAN/BG/data/CPS/meta_contacts/gr_{r}um_2')
    df = pd.read_csv(DIR / "contacts_meta_input.csv")
    meta = pd.read_csv(DIR / "meta_region_pooled.csv")
    meta['brain_region_corr'] = meta['brain_region'].map(br_to_brc_map)

    meta['ct1'] = meta['pair'].astype(str).str.split('|').str[0]
    meta['ct2'] = meta['pair'].astype(str).str.split('|').str[1]

    # GP_types = ["CN Cholinergic GABA", "CN LHX8 GABA", "CN MEIS2 GABA", "CN ONECUT1 GABA", "CN GABA-Glut"]
    # GP_types = ["CN LHX8 GABA", "CN MEIS2 GABA", "CN ONECUT1 GABA", "CN GABA-Glut"]
    
    combined_GP = combine_results(meta, GP_types, "All_GP", "Astrocyte")
    combined_GP['brain_region_corr'] = combined_GP['brain_region'].map(br_to_brc_map)

    fig, axs = plt.subplots(1, 1, figsize=(8, 5), dpi=100)
    ORDER = ['CaH', 'CaB', 'CaT', 'Pu', 'GP', 'NAC']
    for _gp_type in GP_types:
        pair = "Astrocyte|" + _gp_type
        sub = meta[(meta['ct1'] == 'Astrocyte') & (meta['ct2'] == _gp_type)]
        if sub.empty: 
            pair = _gp_type + "|Astrocyte"  
            sub = meta[(meta['ct2'] == 'Astrocyte') & (meta['ct1'] == _gp_type)]
        if sub.empty: 
            print(f"No data for Astrocyte and {_gp_type}, skipping.")
            continue
        # print(f"{_gp_type}:")
        # print(sub[['brain_region', 'mu', 'p_fdr']])
        plot_pair(sub, pair, order=ORDER, ax=axs, color=adata.uns['Group_palette'].get(_gp_type, 'blue'), label=_gp_type, opacity=0.5, rasterized=True, region_col="brain_region_corr")

    plot_pair(combined_GP, 'All_GP', pair_col="category", order=ORDER, ax=axs, color='red', label='Combined GPs', rasterized=True, region_col="brain_region_corr")

    axs.set_title(f'Pooled Contact Enrichment Z-scores for GPs - Astrocytes for {r}um')
    axs.legend(loc='upper left', bbox_to_anchor=(1, 1))
    # plt.savefig(image_path / f'astrocyte_gp_contact_enrichment_{r}um.png', bbox_inches='tight', dpi=300)
    # plt.savefig(image_path / f'astrocyte_gp_contact_enrichment_{r}um.svg', bbox_inches='tight', dpi=300)
    plt.show()
    plt.close()

In [None]:
rs = [15, 30, 50]
for r in rs: 
    DIR = Path(f'/home/x-aklein2/projects/aklein/BICAN/BG/data/CPS/meta_contacts/gr_{r}um_2')
    df = pd.read_csv(DIR / "contacts_meta_input.csv")
    meta = pd.read_csv(DIR / "meta_region_pooled.csv")
    meta['brain_region_corr'] = meta['brain_region'].map(br_to_brc_map)

    meta['ct1'] = meta['pair'].astype(str).str.split('|').str[0]
    meta['ct2'] = meta['pair'].astype(str).str.split('|').str[1]

    # IT_types = ["CN ST18 GABA", "STR RSPO2 GABA", "CN LAMP5-CXCL14 GABA", "CN LAMP5-LHX6 GABA", "CN VIP GABA"]
    combined_IT = combine_results(meta, INT_types, "All_IT", "Astrocyte")
    combined_IT['brain_region_corr'] = combined_IT['brain_region'].map(br_to_brc_map)

    fig, axs = plt.subplots(1, 1, figsize=(8, 5), dpi=100)
    # ORDER = ['CAH', 'CAB', 'PU', 'GP', 'NAC']
    ORDER = ['CaH', 'CaB', 'CaT', 'Pu', 'GP', 'NAC', 'MGM1']
    for _it_type in INT_types:
        pair = "Astrocyte|" + _it_type
        sub = meta[(meta['ct1'] == 'Astrocyte') & (meta['ct2'] == _it_type)]
        if sub.empty: 
            pair = _it_type + "|Astrocyte"
            sub = meta[(meta['ct2'] == 'Astrocyte') & (meta['ct1'] == _it_type)]
        if sub.empty: 
            print(f"No data for Astrocyte and {_it_type}, skipping.")
            continue
        # print(f"{_it_type}:")
        # print(sub[['brain_region', 'mu', 'p_fdr']])
        plot_pair(sub, pair, order=ORDER, ax=axs, color=adata.uns['Group_palette'].get(_it_type, 'blue'), label=_it_type, opacity=0.5, rasterized=True, region_col="brain_region_corr")

    plot_pair(combined_IT, 'All_IT', pair_col="category", order=ORDER, ax=axs, color='red', label='Combined ITs', rasterized=True, region_col="brain_region_corr")

    axs.set_title(f'Pooled Contact Enrichment Z-scores for ITs - Astrocytes for {r}um')
    axs.legend(loc='upper left', bbox_to_anchor=(1, 1))
    # plt.savefig(image_path / f'astrocyte_it_contact_enrichment_{r}um.png', bbox_inches='tight', dpi=300)
    # plt.savefig(image_path / f'astrocyte_it_contact_enrichment_{r}um.svg', bbox_inches='tight', dpi=300)
    plt.show()
    plt.close()

### Microglia Interactions

In [None]:
rs = [15, 30, 50]
for r in rs: 
    DIR = Path(f'/home/x-aklein2/projects/aklein/BICAN/BG/data/CPS/meta_contacts/{r}um')
    df = pd.read_csv(DIR / "contacts_meta_input.csv")
    meta = pd.read_csv(DIR / "meta_region_pooled.csv")
    meta['brain_region_corr'] = meta['brain_region'].map(br_to_brc_map)

    meta['ct1'] = meta['pair'].astype(str).str.split('|').str[0]
    meta['ct2'] = meta['pair'].astype(str).str.split('|').str[1]

    MSN_types = ["STR D1 MSN", "STR D2 MSN", "STR Hybrid MSN"]
    combined_MSN = combine_results(meta, MSN_types, "All_MSN", "Microglia")
    combined_MSN['brain_region_corr'] = combined_MSN['brain_region'].map(br_to_brc_map)

    fig, axs = plt.subplots(1, 1, figsize=(8, 5), dpi=100)
    # ORDER = ['CAH', 'CAB', 'PU', 'NAC']
    ORDER = ['CaH', 'CaB', 'CaT', 'Pu', 'NAC']
    for _msn_type in MSN_types:
        pair = "Microglia|" + _msn_type
        sub = meta[(meta['ct1'] == 'Microglia') & (meta['ct2'] == _msn_type)]
        if sub.empty: 
            pair = _msn_type + "|Microglia"  
            sub = meta[(meta['ct2'] == 'Microglia') & (meta['ct1'] == _msn_type)]
        if sub.empty: 
            print(f"No data for Microglia and {_msn_type}, skipping.")
            continue
        # print(f"{_msn_type}:")
        # print(sub[['brain_region', 'mu', 'p_fdr']])
        plot_pair(sub, pair, order=ORDER, ax=axs, color=adata.uns['Subclass_palette'].get(_msn_type, 'blue'), label=_msn_type, opacity=0.5, rasterized=True, region_col="brain_region_corr")

    plot_pair(combined_MSN, 'All_MSN', pair_col="category", order=ORDER, ax=axs, color='red', label='Combined MSNs', rasterized=True, region_col="brain_region_corr")

    axs.set_title(f'Pooled Contact Enrichment Z-scores for MSNs - Microglia for {r}um')
    axs.legend(loc='upper left', bbox_to_anchor=(1, 1))
    plt.savefig(image_path / f'microglia_msn_contact_enrichment_{r}um.png', bbox_inches='tight', dpi=300)
    plt.savefig(image_path / f'microglia_msn_contact_enrichment_{r}um.svg', bbox_inches='tight', dpi=300)
    plt.show()
    plt.close()


In [None]:
rs = [15, 30, 50]
for r in rs: 
    DIR = Path(f'/home/x-aklein2/projects/aklein/BICAN/BG/data/CPS/meta_contacts/{r}um')
    df = pd.read_csv(DIR / "contacts_meta_input.csv")
    meta = pd.read_csv(DIR / "meta_region_pooled.csv")
    meta['brain_region_corr'] = meta['brain_region'].map(br_to_brc_map)

    meta['ct1'] = meta['pair'].astype(str).str.split('|').str[0]
    meta['ct2'] = meta['pair'].astype(str).str.split('|').str[1]

    # GP_types = ["CN Cholinergic GABA", "CN LHX8 GABA", "CN MEIS2 GABA", "CN ONECUT1 GABA", "CN GABA-Glut"]
    GP_types = ["CN LHX8 GABA", "CN MEIS2 GABA", "CN ONECUT1 GABA", "CN GABA-Glut"]
    combined_GP = combine_results(meta, GP_types, "All_GP", "Microglia")
    combined_GP['brain_region_corr'] = combined_GP['brain_region'].map(br_to_brc_map)

    fig, axs = plt.subplots(1, 1, figsize=(8, 5), dpi=100)
    # ORDER = ['CAH', 'CAB', 'PU', 'GP', 'NAC']
    ORDER = ['CaH', 'CaB', 'CaT', 'Pu', 'GP', 'NAC']
    for _gp_type in GP_types:
        pair = "Microglia|" + _gp_type
        sub = meta[(meta['ct1'] == 'Microglia') & (meta['ct2'] == _gp_type)]
        if sub.empty: 
            pair = _gp_type + "|Microglia"  
            sub = meta[(meta['ct2'] == 'Microglia') & (meta['ct1'] == _gp_type)]
        if sub.empty: 
            print(f"No data for Microglia and {_gp_type}, skipping.")
            continue
        # print(f"{_gp_type}:")
        # print(sub[['brain_region', 'mu', 'p_fdr']])
        plot_pair(sub, pair, order=ORDER, ax=axs, color=adata.uns['Subclass_palette'].get(_gp_type, 'blue'), label=_gp_type, opacity=0.5, rasterized=True, region_col="brain_region_corr")

    plot_pair(combined_GP, 'All_GP', pair_col="category", order=ORDER, ax=axs, color='red', label='Combined GPs', rasterized=True, region_col="brain_region_corr")

    # axs.set_xticks(ORDER)
    # axs.set_xticklabels(axs.get_xticklabels(), rotation=45)
    axs.set_title(f'Pooled Contact Enrichment Z-scores for GPs - Microglia for {r}um')
    axs.legend(loc='upper left', bbox_to_anchor=(1, 1))
    plt.savefig(image_path / f'microglia_gp_contact_enrichment_{r}um.png', bbox_inches='tight', dpi=300)
    plt.savefig(image_path / f'microglia_gp_contact_enrichment_{r}um.svg', bbox_inches='tight', dpi=300)
    plt.show()
    plt.close()

In [None]:
br_to_brc_map

In [None]:
rs = [15, 30, 50]
for r in rs: 
    DIR = Path(f'/home/x-aklein2/projects/aklein/BICAN/BG/data/CPS/meta_contacts/{r}um')
    df = pd.read_csv(DIR / "contacts_meta_input.csv")
    meta = pd.read_csv(DIR / "meta_region_pooled.csv")
    meta['brain_region_corr'] = meta['brain_region'].map(br_to_brc_map)

    meta['ct1'] = meta['pair'].astype(str).str.split('|').str[0]
    meta['ct2'] = meta['pair'].astype(str).str.split('|').str[1]

    IT_types = ["CN ST18 GABA", "STR RSPO2 GABA", "CN LAMP5-CXCL14 GABA", "CN LAMP5-LHX6 GABA", "CN VIP GABA"]
    combined_IT = combine_results(meta, IT_types, "All_IT", "Microglia")
    combined_IT['brain_region_corr'] = combined_IT['brain_region'].map(br_to_brc_map)

    fig, axs = plt.subplots(1, 1, figsize=(8, 5), dpi=100)
    # ORDER = ['CAH', 'CAB', 'PU', 'GP', 'NAC']
    ORDER = ['CaH', 'CaB', 'CaT', 'Pu', 'GP', 'NAC', 'MGM1']
    for _it_type in IT_types:
        pair = "Microglia|" + _it_type
        sub = meta[(meta['ct1'] == 'Microglia') & (meta['ct2'] == _it_type)]
        if sub.empty: 
            pair = _it_type + "|Microglia"
            sub = meta[(meta['ct2'] == 'Microglia') & (meta['ct1'] == _it_type)]
        if sub.empty: 
            print(f"No data for Microglia and {_it_type}, skipping.")
            continue
        # print(f"{_it_type}:")
        # print(sub[['brain_region', 'mu', 'p_fdr']])
        plot_pair(sub, pair, order=ORDER, ax=axs, color=adata.uns['Subclass_palette'].get(_it_type, 'blue'), label=_it_type, opacity=0.5, rasterized=True, region_col="brain_region_corr")

    plot_pair(combined_IT, 'All_IT', pair_col="category", order=ORDER, ax=axs, color='red', label='Combined ITs', rasterized=True, region_col="brain_region_corr")

    axs.set_title(f'Pooled Contact Enrichment Z-scores for ITs - Microglia for {r}um')
    axs.legend(loc='upper left', bbox_to_anchor=(1, 1))
    plt.savefig(image_path / f'microglia_it_contact_enrichment_{r}um.png', bbox_inches='tight', dpi=300)
    plt.savefig(image_path / f'microglia_it_contact_enrichment_{r}um.svg', bbox_inches='tight', dpi=300)
    plt.show()
    plt.close()

### Oligodendrocytes

In [None]:
rs = [15, 30, 50]
for r in rs: 
    DIR = Path(f'/home/x-aklein2/projects/aklein/BICAN/BG/data/CPS/meta_contacts/{r}um')
    df = pd.read_csv(DIR / "contacts_meta_input.csv")
    meta = pd.read_csv(DIR / "meta_region_pooled.csv")
    meta['brain_region_corr'] = meta['brain_region'].map(br_to_brc_map)

    meta['ct1'] = meta['pair'].astype(str).str.split('|').str[0]
    meta['ct2'] = meta['pair'].astype(str).str.split('|').str[1]

    MSN_types = ["STR D1 MSN", "STR D2 MSN", "STR Hybrid MSN"]
    combined_MSN = combine_results(meta, MSN_types, "All_MSN", "Oligodendrocyte")
    combined_MSN['brain_region_corr'] = combined_MSN['brain_region'].map(br_to_brc_map)

    fig, axs = plt.subplots(1, 1, figsize=(8, 5), dpi=100)
    # ORDER = ['CAH', 'CAB', 'PU', 'NAC']
    ORDER = ['CaH', 'CaB', 'CaT', 'Pu', 'NAC']

    for _msn_type in MSN_types:
        pair = "Oligodendrocyte|" + _msn_type
        sub = meta[(meta['ct1'] == 'Oligodendrocyte') & (meta['ct2'] == _msn_type)]
        if sub.empty: 
            pair = _msn_type + "|Oligodendrocyte"  
            sub = meta[(meta['ct2'] == 'Oligodendrocyte') & (meta['ct1'] == _msn_type)]
        if sub.empty: 
            print(f"No data for Oligodendrocyte and {_msn_type}, skipping.")
            continue
        # print(f"{_msn_type}:")
        # print(sub[['brain_region', 'mu', 'p_fdr']])
        plot_pair(sub, pair, order=ORDER, ax=axs, color=adata.uns['Subclass_palette'].get(_msn_type, 'blue'), label=_msn_type, opacity=0.5, rasterized=True, region_col="brain_region_corr")

    plot_pair(combined_MSN, 'All_MSN', pair_col="category", order=ORDER, ax=axs, color='red', label='Combined MSNs', rasterized=True, region_col="brain_region_corr")

    axs.set_title(f'Pooled Contact Enrichment Z-scores for MSNs - Oligodendrocyte for {r}um')
    axs.legend(loc='upper left', bbox_to_anchor=(1, 1))
    plt.savefig(image_path / f'oligodendrocyte_msn_contact_enrichment_{r}um.png', bbox_inches='tight', dpi=300)
    plt.savefig(image_path / f'oligodendrocyte_msn_contact_enrichment_{r}um.svg', bbox_inches='tight', dpi=300)
    plt.show()
    plt.close()


In [None]:
rs = [15, 30, 50]
for r in rs: 
    DIR = Path(f'/home/x-aklein2/projects/aklein/BICAN/BG/data/CPS/meta_contacts/{r}um')
    df = pd.read_csv(DIR / "contacts_meta_input.csv")
    meta = pd.read_csv(DIR / "meta_region_pooled.csv")
    meta['brain_region_corr'] = meta['brain_region'].map(br_to_brc_map)

    meta['ct1'] = meta['pair'].astype(str).str.split('|').str[0]
    meta['ct2'] = meta['pair'].astype(str).str.split('|').str[1]

    # GP_types = ["CN Cholinergic GABA", "CN LHX8 GABA", "CN MEIS2 GABA", "CN ONECUT1 GABA", "CN GABA-Glut"]
    GP_types = ["CN LHX8 GABA", "CN MEIS2 GABA", "CN ONECUT1 GABA", "CN GABA-Glut"]
    combined_GP = combine_results(meta, GP_types, "All_GP", "Oligodendrocyte")
    combined_GP['brain_region_corr'] = combined_GP['brain_region'].map(br_to_brc_map)

    fig, axs = plt.subplots(1, 1, figsize=(8, 5), dpi=100)
    # ORDER = ['CAH', 'CAB', 'PU', 'GP', 'NAC']
    ORDER = ['CaH', 'CaB', 'CaT', 'Pu', 'GP', 'NAC']

    for _gp_type in GP_types:
        pair = "Oligodendrocyte|" + _gp_type
        sub = meta[(meta['ct1'] == 'Oligodendrocyte') & (meta['ct2'] == _gp_type)]
        if sub.empty: 
            pair = _gp_type + "|Oligodendrocyte"  
            sub = meta[(meta['ct2'] == 'Oligodendrocyte') & (meta['ct1'] == _gp_type)]
        if sub.empty: 
            print(f"No data for Oligodendrocyte and {_gp_type}, skipping.")
            continue
        # print(f"{_gp_type}:")
        # print(sub[['brain_region', 'mu', 'p_fdr']])
        plot_pair(sub, pair, order=ORDER, ax=axs, color=adata.uns['Subclass_palette'].get(_gp_type, 'blue'), label=_gp_type, opacity=0.5, rasterized=True, region_col="brain_region_corr")

    plot_pair(combined_GP, 'All_GP', pair_col="category", order=ORDER, ax=axs, color='red', label='Combined GPs', rasterized=True, region_col="brain_region_corr")

    # axs.set_xticks(ORDER)
    # axs.set_xticklabels(axs.get_xticklabels(), rotation=45)
    axs.set_title(f'Pooled Contact Enrichment Z-scores for GPs - Oligodendrocyte for {r}um')
    axs.legend(loc='upper left', bbox_to_anchor=(1, 1))
    plt.savefig(image_path / f'oligodendrocyte_gp_contact_enrichment_{r}um.png', bbox_inches='tight', dpi=300)
    plt.savefig(image_path / f'oligodendrocyte_gp_contact_enrichment_{r}um.svg', bbox_inches='tight', dpi=300)
    plt.show()
    plt.close()

In [None]:
rs = [15, 30, 50]
for r in rs: 
    DIR = Path(f'/home/x-aklein2/projects/aklein/BICAN/BG/data/CPS/meta_contacts/{r}um')
    df = pd.read_csv(DIR / "contacts_meta_input.csv")
    meta = pd.read_csv(DIR / "meta_region_pooled.csv")
    meta['brain_region_corr'] = meta['brain_region'].map(br_to_brc_map)

    meta['ct1'] = meta['pair'].astype(str).str.split('|').str[0]
    meta['ct2'] = meta['pair'].astype(str).str.split('|').str[1]

    IT_types = ["CN ST18 GABA", "STR RSPO2 GABA", "CN LAMP5-CXCL14 GABA", "CN LAMP5-LHX6 GABA", "CN VIP GABA"]
    combined_IT = combine_results(meta, IT_types, "All_IT", "Oligodendrocyte")
    combined_IT['brain_region_corr'] = combined_IT['brain_region'].map(br_to_brc_map)

    fig, axs = plt.subplots(1, 1, figsize=(8, 5), dpi=100)
    # ORDER = ['CAH', 'CAB', 'PU', 'GP', 'NAC']
    ORDER = ['CaH', 'CaB', 'CaT', 'Pu', 'GP', 'NAC', 'MGM1']
    for _it_type in IT_types:
        pair = "Oligodendrocyte|" + _it_type
        sub = meta[(meta['ct1'] == 'Oligodendrocyte') & (meta['ct2'] == _it_type)]
        if sub.empty: 
            pair = _it_type + "|Oligodendrocyte"
            sub = meta[(meta['ct2'] == 'Oligodendrocyte') & (meta['ct1'] == _it_type)]
        if sub.empty: 
            print(f"No data for Oligodendrocyte and {_it_type}, skipping.")
            continue
        # print(f"{_it_type}:")
        # print(sub[['brain_region', 'mu', 'p_fdr']])
        plot_pair(sub, pair, order=ORDER, ax=axs, color=adata.uns['Subclass_palette'].get(_it_type, 'blue'), label=_it_type, opacity=0.5, rasterized=True, region_col="brain_region_corr")

    plot_pair(combined_IT, 'All_IT', pair_col="category", order=ORDER, ax=axs, color='red', label='Combined ITs', rasterized=True, region_col="brain_region_corr")

    axs.set_title(f'Pooled Contact Enrichment Z-scores for ITs - Oligodendrocyte for {r}um')
    axs.legend(loc='upper left', bbox_to_anchor=(1, 1))
    plt.savefig(image_path / f'oligodendrocyte_it_contact_enrichment_{r}um.png', bbox_inches='tight', dpi=300)
    plt.savefig(image_path / f'oligodendrocyte_it_contact_enrichment_{r}um.svg', bbox_inches='tight', dpi=300)
    plt.show()
    plt.close()

## Combine Regions

In [None]:
def combine_regions(df_meta, col_pair='pair', regions=None): 
    if isinstance(regions, str):
        regions = [regions]
    results = []
    for pair, pair_df in df_meta.groupby(col_pair): 
        if regions is not None:
            pair_df = pair_df[pair_df['brain_region'].isin(regions)]
            if pair_df.empty:
                continue
        mus = pair_df['mu'].values
        ses = pair_df['se'].values
        regs = pair_df['brain_region'].values
        n = len(pair_df)

        # Simple average of the means (unweighted)
        combined_mu = np.mean(mus)
        
        # Standard error of the mean across different effects
        # This is: sqrt(sum(SE_i^2)) / n
        # This accounts for the uncertainty in each individual estimate
        combined_se = np.sqrt(np.sum(ses**2)) / n
        
        # Alternative: if you want to weight by precision
        # weights = 1 / (ses ** 2)
        # combined_mu = np.sum(weights * mus) / np.sum(weights)
        # combined_se = 1 / np.sqrt(np.sum(weights))
        
        results.append({
            col_pair: pair,
            'mu': combined_mu,
            'se': combined_se,
            'n_combined': n,
            'original_pairs': pair_df[col_pair].tolist(),
            'individual_mus': mus.tolist(),
            'individual_ses': ses.tolist(),
            'regions': regs.tolist()
        })

    return pd.DataFrame(results)

### Astrocytes

In [None]:
dopa_types = ["SN SOX6 Dopa", "SN-VTR CALB1 Dopa", "SN-VTR GAD2 Dopa"]
    combined_Dopa = combine_results(meta, dopa_types, "All_Dopa", "Oligo PLEKHG1")
    combined_Dopa['brain_region_corr'] = combined_Dopa['brain_region'].map(br_to_brc_map)

    fig, axs = plt.subplots(1, 1, figsize=(8, 5), dpi=100)
    ORDER = ['MGM1', 'STH'] # ['CaH', 'CaB', 'CaT', 'Pu', 'NAC']

In [None]:
combined_Dopa

In [None]:
astro_dopa_all = []
rs = [15, 30, 50]
dopa_types = ["SN SOX6 Dopa" , "SN-VTR CALB1 Dopa", "SN-VTR GAD2 Dopa"]
for _ct in dopa_types:
    print(f"{_ct}:")
    astro_dopa = []
    for r in rs: 
        DIR = Path(f'/home/x-aklein2/projects/aklein/BICAN/BG/data/CPS/meta_contacts/gr_{r}um_2')
        df = pd.read_csv(DIR / "contacts_meta_input.csv")
        meta = pd.read_csv(DIR / "meta_region_pooled.csv")

        meta['ct1'] = meta['pair'].astype(str).str.split('|').str[0]
        meta['ct2'] = meta['pair'].astype(str).str.split('|').str[1]

        # dopa_types = ["SN SOX6 Dopa"] # , "SN-VTR CALB1 Dopa", "SN-VTR GAD2 Dopa"]
        combined_Dopa = combine_results(meta, [_ct], _ct, "Astrocyte")
        rc_Dopa = combine_regions(combined_Dopa, col_pair='category')
        rc_Dopa['radius_um'] = r
        astro_dopa.append(rc_Dopa)

    astro_dopa = pd.concat(astro_dopa)
    astro_dopa['id'] = astro_dopa['radius_um'].astype(str) + "um, Dopa"
    astro_dopa['rid'] = astro_dopa['radius_um'].astype(str) + "um"
    astro_dopa_all.append(astro_dopa)
astro_dopa_all = pd.concat(astro_dopa_all)

In [None]:
astro_dopa_all

In [None]:
astro_dopa_all['regions'].unique()

In [None]:
fig, ax = plt.subplots(figsize=(8, 5), dpi=100)

xpos = np.arange(astro_dopa_all.shape[0] + 2)
x1 = xpos[:len(astro_dopa)]
x2 = xpos[len(astro_dopa)+1:len(astro_dopa)+len(astro_dopa)+1]
x3 = xpos[len(astro_dopa)+len(astro_dopa)+2:len(astro_dopa)+len(astro_dopa)+len(astro_dopa)+2]
# xlabels = astro_msn['id'].tolist() + astro_gp['id'].tolist() + astro_it['id'].tolist()
xlabels = astro_dopa_all['radius_um'].tolist()

for (xp, dt) in zip([x1, x2, x3], dopa_types):
    ad = astro_dopa_all[astro_dopa_all['category'] == dt]
    ax.bar(x=xp, height=ad['mu'], yerr=ad['se'], capsize=5, color='purple', alpha=0.7, width=0.9, edgecolor='black')
    for _i, _reg in enumerate(ad['regions'].iloc[0]): 
        ax.plot(
            xp,
            [ad['individual_mus'].iloc[i][_i] for i in range(len(ad))],
            marker='o',
            linestyle='--',
            linewidth=1,
            markersize=5,
            color=adata.uns['brain_region_palette'].get(_reg, 'gray')
        )

# ax.bar(x=x2, height=astro_dopa['mu'], yerr=astro_dopa['se'], capsize=5, color='green', alpha=0.7, width=0.9, edgecolor='black')
# for _i, _reg in enumerate(astro_dopa['regions'].iloc[0]): 
#     ax.plot(
#         x2,
#         [astro_dopa['individual_mus'].iloc[i][_i] for i in range(len(astro_dopa))],
#         marker='o',
#         linestyle='--',
#         linewidth=1,
#         markersize=5,
#         color=adata.uns['brain_region_palette'].get(_reg, 'gray')
#     )

# ax.bar(x=x3, height=astro_dopa['mu'], yerr=astro_dopa['se'], capsize=5, color='blue', alpha=0.7, width=0.9, edgecolor='black')
# for _i, _reg in enumerate(astro_dopa['regions'].iloc[0]): 
#     ax.plot(
#         x3,
#         [astro_dopa['individual_mus'].iloc[i][_i] for i in range(len(astro_dopa))],
#         marker='o',
#         linestyle='--',
#         linewidth=1,
#         markersize=5,
#         color=adata.uns['brain_region_palette'].get(_reg, 'gray')
#     )

ax.axhline(0, color='black', linestyle='--', linewidth=1)

# making legend: 
handles = []
for _reg in astro_dopa_all['regions'].unique(): 
    handles.append(ax.plot([], [], color=adata.uns['brain_region_palette'].get(_reg, 'gray'), marker='o', linestyle='--', label=br_to_brc_map[_reg]))
ax.legend(handles=[h[0] for h in handles], loc='upper left', bbox_to_anchor=(1, 1))

ax.set_ylim((-4, 4))
ax.set_xticks(np.concatenate((xpos[x1], xpos[x2], xpos[x3])))
# ax.set_xticklabels(xlabels, rotation=45, ha='right')
ax.set_xticklabels(xlabels)
sec = ax.secondary_xaxis(location=0)
sec.set_xticks([x1[1], x2[1], x3[1]])
sec.set_xticklabels([f'\n{c}' for c in dopa_types])

sec2 = ax.secondary_xaxis(location=0)
sec2.set_xticks([x1[0]-0.5, x1[-1]+0.5, x2[0]-0.5, x2[-1]+0.5, x3[0]-0.5, x3[-1]+0.5], labels=[])
sec2.tick_params('x', length=40, width=1.5)

# ax.set_xlabel('Radius and Cell Type Combination', y=-1)
ax.set_ylabel('Combined Pooled Z-score', fontsize=14)

ax.set_title('Contact Enrichment Z-scores for Astrocyte - Neuronal Type Pairs, By Radius', y=1.05)
# plt.savefig(image_path / f'astrocyte_combined_contact_enrichment_allradii.png', bbox_inches='tight', dpi=300)
# plt.savefig(image_path / f'astrocyte_combined_contact_enrichment_allradii.svg', bbox_inches='tight', dpi=300)
plt.show()

In [None]:
# astro_gp = []
# rs = [15, 30, 50]
# for r in rs: 
#     DIR = Path(f'/home/x-aklein2/projects/aklein/BICAN/BG/data/CPS/meta_contacts/{r}um')
#     df = pd.read_csv(DIR / "contacts_meta_input.csv")
#     meta = pd.read_csv(DIR / "meta_region_pooled.csv")

#     meta['ct1'] = meta['pair'].astype(str).str.split('|').str[0]
#     meta['ct2'] = meta['pair'].astype(str).str.split('|').str[1]

#     GP_types = ["CN Cholinergic GABA", "CN LHX8 GABA", "CN MEIS2 GABA", "CN ONECUT1 GABA", "CN GABA-Glut"]
#     combined_GP = combine_results(meta, GP_types, "All_GP", "Astrocyte")
#     rc_GP = combine_regions(combined_GP, col_pair='category', regions=['GP'])
#     rc_GP['radius_um'] = r
#     astro_gp.append(rc_GP)

# astro_gp = pd.concat(astro_gp)
# astro_gp['id'] = astro_gp['radius_um'].astype(str) + "um, GP"
# astro_gp['rid'] = astro_gp['radius_um'].astype(str) + "um"

In [None]:
# astro_it = []
# rs = [15, 30, 50]
# for r in rs: 
#     DIR = Path(f'/home/x-aklein2/projects/aklein/BICAN/BG/data/CPS/meta_contacts/{r}um')
#     df = pd.read_csv(DIR / "contacts_meta_input.csv")
#     meta = pd.read_csv(DIR / "meta_region_pooled.csv")

#     meta['ct1'] = meta['pair'].astype(str).str.split('|').str[0]
#     meta['ct2'] = meta['pair'].astype(str).str.split('|').str[1]

#     IT_types = ["CN ST18 GABA", "STR RSPO2 GABA", "CN LAMP5-CXCL14 GABA", "CN LAMP5-LHX6 GABA", "CN VIP GABA"]
#     combined_IT = combine_results(meta, IT_types, "All_IT", "Astrocyte")

#     rc_IT = combine_regions(combined_IT, col_pair='category', regions=['CAH', 'CAB', 'PU', 'NAC', 'CAT', 'GP', 'MGM1'])
#     rc_IT['radius_um'] = r
#     astro_it.append(rc_IT)

# astro_it = pd.concat(astro_it)
# astro_it['id'] = astro_it['radius_um'].astype(str) + "um, IT"
# astro_it['rid'] = astro_it['radius_um'].astype(str) + "um"

In [None]:
fig, ax = plt.subplots(figsize=(8, 5), dpi=100)

xpos = np.arange(len(astro_msn) + len(astro_gp) + len(astro_it) + 2)
xmsn = xpos[:len(astro_msn)]
xgp = xpos[len(astro_msn)+1:len(astro_msn)+len(astro_gp)+1]
xit = xpos[len(astro_msn)+len(astro_gp)+2:len(astro_msn)+len(astro_gp)+len(astro_it)+2]
# xlabels = astro_msn['id'].tolist() + astro_gp['id'].tolist() + astro_it['id'].tolist()
xlabels = astro_msn['radius_um'].tolist() + astro_gp['radius_um'].tolist() + astro_it['radius_um'].tolist()

ax.bar(x=xmsn, height=astro_msn['mu'], yerr=astro_msn['se'], capsize=5, color='purple', alpha=0.7, width=0.9, edgecolor='black')
for _i, _reg in enumerate(astro_msn['regions'].iloc[0]): 
    ax.plot(
        xmsn,
        [astro_msn['individual_mus'].iloc[i][_i] for i in range(len(astro_msn))],
        marker='o',
        linestyle='--',
        linewidth=1,
        markersize=5,
        color=adata.uns['brain_region_palette'].get(_reg, 'gray')
    )

ax.bar(x=xgp, height=astro_gp['mu'], yerr=astro_gp['se'], capsize=5, color='green', alpha=0.7, width=0.9, edgecolor='black')
for _i, _reg in enumerate(astro_gp['regions'].iloc[0]): 
    ax.plot(
        xgp,
        [astro_gp['individual_mus'].iloc[i][_i] for i in range(len(astro_gp))],
        marker='o',
        linestyle='--',
        linewidth=1,
        markersize=5,
        color=adata.uns['brain_region_palette'].get(_reg, 'gray')
    )

ax.bar(x=xit, height=astro_it['mu'], yerr=astro_it['se'], capsize=5, color='blue', alpha=0.7, width=0.9, edgecolor='black')
for _i, _reg in enumerate(astro_it['regions'].iloc[0]): 
    ax.plot(
        xit,
        [astro_it['individual_mus'].iloc[i][_i] for i in range(len(astro_it))],
        marker='o',
        linestyle='--',
        linewidth=1,
        markersize=5,
        color=adata.uns['brain_region_palette'].get(_reg, 'gray')
    )

ax.axhline(0, color='black', linestyle='--', linewidth=1)

# making legend: 
handles = []
for _reg in astro_it['regions'].iloc[0]: 
    handles.append(ax.plot([], [], color=adata.uns['brain_region_palette'].get(_reg, 'gray'), marker='o', linestyle='--', label=br_to_brc_map[_reg]))
ax.legend(handles=[h[0] for h in handles], loc='upper left', bbox_to_anchor=(1, 1))

ax.set_ylim((-4, 4))
ax.set_xticks(np.concatenate((xpos[xmsn], xpos[xgp], xpos[xit])))
# ax.set_xticklabels(xlabels, rotation=45, ha='right')
ax.set_xticklabels(xlabels)
sec = ax.secondary_xaxis(location=0)
sec.set_xticks([xmsn[1], xgp[1], xit[1]])
sec.set_xticklabels(['\nMSN', '\nGP', '\nIT'])

sec2 = ax.secondary_xaxis(location=0)
sec2.set_xticks([xmsn[0]-0.5, xmsn[-1]+0.5, xgp[0]-0.5, xgp[-1]+0.5, xit[0]-0.5, xit[-1]+0.5], labels=[])
sec2.tick_params('x', length=40, width=1.5)

# ax.set_xlabel('Radius and Cell Type Combination', y=-1)
ax.set_ylabel('Combined Pooled Z-score', fontsize=14)

ax.set_title('Contact Enrichment Z-scores for Astrocyte - Neuronal Type Pairs, By Radius', y=1.05)
# plt.savefig(image_path / f'astrocyte_combined_contact_enrichment_allradii.png', bbox_inches='tight', dpi=300)
# plt.savefig(image_path / f'astrocyte_combined_contact_enrichment_allradii.svg', bbox_inches='tight', dpi=300)
plt.show()

### Microglia

In [None]:
ct_msn = []
rs = [15, 30, 50]
for r in rs: 
    DIR = Path(f'/home/x-aklein2/projects/aklein/BICAN/BG/data/CPS/meta_contacts/{r}um')
    df = pd.read_csv(DIR / "contacts_meta_input.csv")
    meta = pd.read_csv(DIR / "meta_region_pooled.csv")

    meta['ct1'] = meta['pair'].astype(str).str.split('|').str[0]
    meta['ct2'] = meta['pair'].astype(str).str.split('|').str[1]

    MSN_types = ["STR D1 MSN", "STR D2 MSN", "STR Hybrid MSN"]
    combined_MSN = combine_results(meta, MSN_types, "All_MSN", "Microglia")
    rc_MSN = combine_regions(combined_MSN, col_pair='category')
    rc_MSN['radius_um'] = r
    ct_msn.append(rc_MSN)

ct_msn = pd.concat(ct_msn)
ct_msn['id'] = ct_msn['radius_um'].astype(str) + "um, MSN"

In [None]:
ct_gp = []
rs = [15, 30, 50]
for r in rs: 
    DIR = Path(f'/home/x-aklein2/projects/aklein/BICAN/BG/data/CPS/meta_contacts/{r}um')
    df = pd.read_csv(DIR / "contacts_meta_input.csv")
    meta = pd.read_csv(DIR / "meta_region_pooled.csv")

    meta['ct1'] = meta['pair'].astype(str).str.split('|').str[0]
    meta['ct2'] = meta['pair'].astype(str).str.split('|').str[1]

    GP_types = ["CN Cholinergic GABA", "CN LHX8 GABA", "CN MEIS2 GABA", "CN ONECUT1 GABA", "CN GABA-Glut"]
    combined_GP = combine_results(meta, GP_types, "All_GP", "Microglia")
    rc_GP = combine_regions(combined_GP, col_pair='category', regions=['GP'])
    rc_GP['radius_um'] = r
    ct_gp.append(rc_GP)

ct_gp = pd.concat(ct_gp)
ct_gp['id'] = ct_gp['radius_um'].astype(str) + "um, GP"

In [None]:
ct_it = []
rs = [15, 30, 50]
for r in rs: 
    DIR = Path(f'/home/x-aklein2/projects/aklein/BICAN/BG/data/CPS/meta_contacts/{r}um')
    df = pd.read_csv(DIR / "contacts_meta_input.csv")
    meta = pd.read_csv(DIR / "meta_region_pooled.csv")

    meta['ct1'] = meta['pair'].astype(str).str.split('|').str[0]
    meta['ct2'] = meta['pair'].astype(str).str.split('|').str[1]

    IT_types = ["CN ST18 GABA", "STR RSPO2 GABA", "CN LAMP5-CXCL14 GABA", "CN LAMP5-LHX6 GABA", "CN VIP GABA"]
    combined_IT = combine_results(meta, IT_types, "All_IT", "Microglia")

    rc_IT = combine_regions(combined_IT, col_pair='category', regions=['CAH', 'CAB', 'PU', 'NAC', 'CAT', "GP", 'MGM1'])
    rc_IT['radius_um'] = r
    ct_it.append(rc_IT)

ct_it = pd.concat(ct_it)
ct_it['id'] = ct_it['radius_um'].astype(str) + "um, IT"

In [None]:
fig, ax = plt.subplots(figsize=(8, 5), dpi=100)

xpos = np.arange(len(ct_msn) + len(ct_gp) + len(ct_it) + 2)
xmsn = xpos[:len(ct_msn)]
xgp = xpos[len(ct_msn)+1:len(ct_msn)+len(ct_gp)+1]
xit = xpos[len(ct_msn)+len(ct_gp)+2:len(ct_msn)+len(ct_gp)+len(ct_it)+2]
# xlabels = ct_msn['id'].tolist() + ct_gp['id'].tolist() + ct_it['id'].tolist()
xlabels = ct_msn['radius_um'].tolist() + ct_gp['radius_um'].tolist() + ct_it['radius_um'].tolist()

ax.bar(x=xmsn, height=ct_msn['mu'], yerr=ct_msn['se'], capsize=5, color='purple', alpha=0.7, width=0.9, edgecolor='black')
for _i, _reg in enumerate(ct_msn['regions'].iloc[0]): 
    ax.plot(
        xmsn,
        [ct_msn['individual_mus'].iloc[i][_i] for i in range(len(ct_msn))],
        marker='o',
        linestyle='--',
        linewidth=1,
        markersize=5,
        color=adata.uns['brain_region_palette'].get(_reg, 'gray')
    )

ax.bar(x=xgp, height=ct_gp['mu'], yerr=ct_gp['se'], capsize=5, color='green', alpha=0.7, width=0.9, edgecolor='black')
for _i, _reg in enumerate(ct_gp['regions'].iloc[0]): 
    ax.plot(
        xgp,
        [ct_gp['individual_mus'].iloc[i][_i] for i in range(len(ct_gp))],
        marker='o',
        linestyle='--',
        linewidth=1,
        markersize=5,
        color=adata.uns['brain_region_palette'].get(_reg, 'gray')
    )

ax.bar(x=xit, height=ct_it['mu'], yerr=ct_it['se'], capsize=5, color='blue', alpha=0.7, width=0.9, edgecolor='black')
for _i, _reg in enumerate(ct_it['regions'].iloc[0]): 
    ax.plot(
        xit,
        [ct_it['individual_mus'].iloc[i][_i] for i in range(len(ct_it))],
        marker='o',
        linestyle='--',
        linewidth=1,
        markersize=5,
        color=adata.uns['brain_region_palette'].get(_reg, 'gray')
    )

ax.axhline(0, color='black', linestyle='--', linewidth=1)

# making legend: 
handles = []
for _reg in ct_it['regions'].iloc[0]: 
    handles.append(ax.plot([], [], color=adata.uns['brain_region_palette'].get(_reg, 'gray'), marker='o', linestyle='--', label=br_to_brc_map[_reg]))
ax.legend(handles=[h[0] for h in handles], loc='upper left', bbox_to_anchor=(1, 1))

ax.set_ylim((-2, 3))
ax.set_xticks(np.concatenate((xpos[xmsn], xpos[xgp], xpos[xit])))
# ax.set_xticklabels(xlabels, rotation=45, ha='right')
ax.set_xticklabels(xlabels)
sec = ax.secondary_xaxis(location=0)
sec.set_xticks([xmsn[1], xgp[1], xit[1]])
sec.set_xticklabels(['\nMSN', '\nGP', '\nIT'])

sec2 = ax.secondary_xaxis(location=0)
sec2.set_xticks([xmsn[0]-0.5, xmsn[-1]+0.5, xgp[0]-0.5, xgp[-1]+0.5, xit[0]-0.5, xit[-1]+0.5], labels=[])
sec2.tick_params('x', length=40, width=1.5)

# ax.set_xlabel('Radius and Cell Type Combination', y=-1)
ax.set_ylabel('Combined Pooled Z-score', fontsize=14)

ax.set_title('Contact Enrichment Z-scores for Microglia - Neuronal Type Pairs, By Radius', y=1.05)
plt.savefig(image_path / f'microglia_combined_contact_enrichment_allradii.png', bbox_inches='tight', dpi=300)
plt.savefig(image_path / f'microglia_combined_contact_enrichment_allradii.svg', bbox_inches='tight', dpi=300)
plt.show()

### Oligos

In [None]:
ct_msn = []
rs = [15, 30, 50]
for r in rs: 
    DIR = Path(f'/home/x-aklein2/projects/aklein/BICAN/BG/data/CPS/meta_contacts/{r}um')
    df = pd.read_csv(DIR / "contacts_meta_input.csv")
    meta = pd.read_csv(DIR / "meta_region_pooled.csv")

    meta['ct1'] = meta['pair'].astype(str).str.split('|').str[0]
    meta['ct2'] = meta['pair'].astype(str).str.split('|').str[1]

    MSN_types = ["STR D1 MSN", "STR D2 MSN", "STR Hybrid MSN"]
    combined_MSN = combine_results(meta, MSN_types, "All_MSN", "Oligodendrocyte")
    rc_MSN = combine_regions(combined_MSN, col_pair='category')
    rc_MSN['radius_um'] = r
    ct_msn.append(rc_MSN)

ct_msn = pd.concat(ct_msn)
ct_msn['id'] = ct_msn['radius_um'].astype(str) + "um, MSN"

In [None]:
ct_gp = []
rs = [15, 30, 50]
for r in rs: 
    DIR = Path(f'/home/x-aklein2/projects/aklein/BICAN/BG/data/CPS/meta_contacts/{r}um')
    df = pd.read_csv(DIR / "contacts_meta_input.csv")
    meta = pd.read_csv(DIR / "meta_region_pooled.csv")

    meta['ct1'] = meta['pair'].astype(str).str.split('|').str[0]
    meta['ct2'] = meta['pair'].astype(str).str.split('|').str[1]

    GP_types = ["CN Cholinergic GABA", "CN LHX8 GABA", "CN MEIS2 GABA", "CN ONECUT1 GABA", "CN GABA-Glut"]
    combined_GP = combine_results(meta, GP_types, "All_GP", "Oligodendrocyte")
    rc_GP = combine_regions(combined_GP, col_pair='category', regions=['GP'])
    rc_GP['radius_um'] = r
    ct_gp.append(rc_GP)

ct_gp = pd.concat(ct_gp)
ct_gp['id'] = ct_gp['radius_um'].astype(str) + "um, GP"

In [None]:
ct_it = []
rs = [15, 30, 50]
for r in rs: 
    DIR = Path(f'/home/x-aklein2/projects/aklein/BICAN/BG/data/CPS/meta_contacts/{r}um')
    df = pd.read_csv(DIR / "contacts_meta_input.csv")
    meta = pd.read_csv(DIR / "meta_region_pooled.csv")

    meta['ct1'] = meta['pair'].astype(str).str.split('|').str[0]
    meta['ct2'] = meta['pair'].astype(str).str.split('|').str[1]

    IT_types = ["CN ST18 GABA", "STR RSPO2 GABA", "CN LAMP5-CXCL14 GABA", "CN LAMP5-LHX6 GABA", "CN VIP GABA"]
    combined_IT = combine_results(meta, IT_types, "All_IT", "Oligodendrocyte")

    rc_IT = combine_regions(combined_IT, col_pair='category', regions=['CAH', 'CAB', 'PU', 'NAC', 'CAT', 'GP', 'MGM1'])
    rc_IT['radius_um'] = r
    ct_it.append(rc_IT)

ct_it = pd.concat(ct_it)
ct_it['id'] = ct_it['radius_um'].astype(str) + "um, IT"

In [None]:
fig, ax = plt.subplots(figsize=(8, 5), dpi=100)

xpos = np.arange(len(ct_msn) + len(ct_gp) + len(ct_it) + 2)
xmsn = xpos[:len(ct_msn)]
xgp = xpos[len(ct_msn)+1:len(ct_msn)+len(ct_gp)+1]
xit = xpos[len(ct_msn)+len(ct_gp)+2:len(ct_msn)+len(ct_gp)+len(ct_it)+2]
# xlabels = ct_msn['id'].tolist() + ct_gp['id'].tolist() + ct_it['id'].tolist()
xlabels = ct_msn['radius_um'].tolist() + ct_gp['radius_um'].tolist() + ct_it['radius_um'].tolist()

ax.bar(x=xmsn, height=ct_msn['mu'], yerr=ct_msn['se'], capsize=5, color='purple', alpha=0.7, width=0.9, edgecolor='black')
for _i, _reg in enumerate(ct_msn['regions'].iloc[0]): 
    ax.plot(
        xmsn,
        [ct_msn['individual_mus'].iloc[i][_i] for i in range(len(ct_msn))],
        marker='o',
        linestyle='--',
        linewidth=1,
        markersize=5,
        color=adata.uns['brain_region_palette'].get(_reg, 'gray')
    )

ax.bar(x=xgp, height=ct_gp['mu'], yerr=ct_gp['se'], capsize=5, color='green', alpha=0.7, width=0.9, edgecolor='black')
for _i, _reg in enumerate(ct_gp['regions'].iloc[0]): 
    ax.plot(
        xgp,
        [ct_gp['individual_mus'].iloc[i][_i] for i in range(len(ct_gp))],
        marker='o',
        linestyle='--',
        linewidth=1,
        markersize=5,
        color=adata.uns['brain_region_palette'].get(_reg, 'gray')
    )

ax.bar(x=xit, height=ct_it['mu'], yerr=ct_it['se'], capsize=5, color='blue', alpha=0.7, width=0.9, edgecolor='black')
for _i, _reg in enumerate(ct_it['regions'].iloc[0]): 
    ax.plot(
        xit,
        [ct_it['individual_mus'].iloc[i][_i] for i in range(len(ct_it))],
        marker='o',
        linestyle='--',
        linewidth=1,
        markersize=5,
        color=adata.uns['brain_region_palette'].get(_reg, 'gray')
    )

ax.axhline(0, color='black', linestyle='--', linewidth=1)

# making legend: 
handles = []
for _reg in ct_it['regions'].iloc[0]: 
    handles.append(ax.plot([], [], color=adata.uns['brain_region_palette'].get(_reg, 'gray'), marker='o', linestyle='--', label=br_to_brc_map[_reg]))
ax.legend(handles=[h[0] for h in handles], loc='upper left', bbox_to_anchor=(1, 1))

ax.set_ylim((-10,5))
ax.set_xticks(np.concatenate((xpos[xmsn], xpos[xgp], xpos[xit])))
# ax.set_xticklabels(xlabels, rotation=45, ha='right')
ax.set_xticklabels(xlabels)
sec = ax.secondary_xaxis(location=0)
sec.set_xticks([xmsn[1], xgp[1], xit[1]])
sec.set_xticklabels(['\nMSN', '\nGP', '\nIT'])

sec2 = ax.secondary_xaxis(location=0)
sec2.set_xticks([xmsn[0]-0.5, xmsn[-1]+0.5, xgp[0]-0.5, xgp[-1]+0.5, xit[0]-0.5, xit[-1]+0.5], labels=[])
sec2.tick_params('x', length=40, width=1.5)

# ax.set_xlabel('Radius and Cell Type Combination', y=-1)
ax.set_ylabel('Combined Pooled Z-score', fontsize=14)

ax.set_title('Contact Enrichment Z-scores for Oligodendrocyte - Neuronal Type Pairs, By Radius', y=1.05)
plt.savefig(image_path / f'oligodendrocyte_combined_contact_enrichment_allradii.png', bbox_inches='tight', dpi=300)
plt.savefig(image_path / f'oligodendrocyte_combined_contact_enrichment_allradii.svg', bbox_inches='tight', dpi=300)
plt.show()

In [None]:
rs = [15, 30, 50]
for r in rs: 
    DIR = Path(f'/home/x-aklein2/projects/aklein/BICAN/BG/data/CPS/meta_contacts/{r}um')
    df = pd.read_csv(DIR / "contacts_meta_input.csv")
    meta = pd.read_csv(DIR / "meta_region_pooled.csv")

    meta['ct1'] = meta['pair'].astype(str).str.split('|').str[0]
    meta['ct2'] = meta['pair'].astype(str).str.split('|').str[1]

    MSN_types = ["STR D1 MSN", "STR D2 MSN", "STR Hybrid MSN"]
    combined_MSN = combine_results(meta, MSN_types, "All_MSN", "Astrocyte")

    fig, axs = plt.subplots(1, 1, figsize=(8, 5), dpi=100)
    ORDER = ['CAH', 'CAB', 'PU', 'NAC']
    for _msn_type in MSN_types:
        pair = "Astrocyte|" + _msn_type
        sub = meta[(meta['ct1'] == 'Astrocyte') & (meta['ct2'] == _msn_type)]
        if sub.empty: 
            pair = _msn_type + "|Astrocyte"  
            sub = meta[(meta['ct2'] == 'Astrocyte') & (meta['ct1'] == _msn_type)]
        if sub.empty: 
            print(f"No data for Astrocyte and {_msn_type}, skipping.")
            continue
        # print(f"{_msn_type}:")
        # print(sub[['brain_region', 'mu', 'p_fdr']])
        plot_pair(sub, pair, order=ORDER, ax=axs, color=adata.uns['Subclass_palette'].get(_msn_type, 'blue'), label=_msn_type, opacity=0.5)

    plot_pair(combined_MSN, 'All_MSN', pair_col="category", order=ORDER, ax=axs, color='red', label='Combined MSNs')

    axs.set_title(f'Pooled Contact Enrichment Z-scores for MSNs - Astrocytes for {r}um')
    axs.legend(loc='upper left', bbox_to_anchor=(1, 1))
    plt.savefig(image_path / f'astrocyte_msn_contact_enrichment_{r}um.png', bbox_inches='tight', dpi=300)
    plt.show()
    plt.close()


## Sandbox

In [None]:
DIR = Path('/home/x-aklein2/projects/aklein/BICAN/BG/data/CPS/meta_contacts/15um')
df = pd.read_csv(DIR / "contacts_meta_input.csv")
meta = pd.read_csv(DIR / "meta_region_pooled.csv")

In [None]:
meta['ct1'] = meta['pair'].astype(str).str.split('|').str[0]
meta['ct2'] = meta['pair'].astype(str).str.split('|').str[1]
meta.head()

In [None]:
MSN_types = ["STR D1 MSN", "STR D2 MSN", "STR Hybrid MSN"]
combined_MSN = combine_meta_results(meta, MSN_types, "All_MSN")

In [None]:
fig, axs = plt.subplots(1, 1, figsize=(8, 5))
ORDER = ['CAH', 'CAB', 'PU', 'NAC']
for _msn_type in MSN_types:
    pair = "Astrocyte|" + _msn_type
    sub = meta[(meta['ct1'] == 'Astrocyte') & (meta['ct2'] == _msn_type)]
    if sub.empty: 
        pair = _msn_type + "|Astrocyte"  
        sub = meta[(meta['ct2'] == 'Astrocyte') & (meta['ct1'] == _msn_type)]
    if sub.empty: 
        print(f"No data for Astrocyte and {_msn_type}, skipping.")
        continue
    print(f"{_msn_type}:")
    print(sub[['brain_region', 'mu', 'p_fdr']])
    plot_pair(sub, pair, order=ORDER, ax=axs, color=adata.uns['Subclass_palette'].get(_msn_type, 'blue'), label=_msn_type, opacity=0.5)

plot_pair(combined_MSN, 'All_MSN', pair_col="category", order=ORDER, ax=axs, color='red', label='All_MSN')

axs.legend()
plt.show()

In [None]:
combined_MSN

In [None]:
meta_pair

In [None]:
def plot_pair_updated(meta, pair, pair_col='pair', order=None, ax=None, color='blue', label=None, opacity=1.0):
    """
    Updated plotting function that can handle both 'pair' and 'category' columns
    """
    meta_pair = meta[meta[pair_col] == pair]
    if order is not None: 
        order = [region for region in order if region in meta_pair['brain_region'].values]
        meta_pair = meta_pair.set_index('brain_region').loc[order].reset_index()
    else:
        meta_pair = meta_pair.sort_values(by='mu')

    if ax is None:
        fig, ax = plt.subplots(figsize=(6, 4))
    
    ax.errorbar(x=meta_pair['brain_region'], y=meta_pair['mu'], yerr=meta_pair['se'], 
                fmt='o', color=color, capsize=5, label=label, alpha=opacity)
    ax.plot(meta_pair['brain_region'], meta_pair['mu'], color=color, linestyle='--', alpha=opacity)
    ax.axhline(0, color='black', linestyle='--', linewidth=1)
    ax.set_xlabel('Brain Region')
    ax.set_ylabel('Pooled Z-score')
    ax.set_title(f'Pooled Contact Enrichment Z-scores for {pair} Across Brain Regions')
    ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
    return ax

In [None]:
pair = meta['pair'].sample(1).values[0]
meta_pair = meta[meta['pair'] == pair].sort_values(by='mu')

fig, ax = plt.subplots(figsize=(6, 4))
ax.errorbar(x=meta_pair['brain_region'], y=meta_pair['mu'], yerr=meta_pair['se'], fmt='o', color='blue', capsize=5)
ax.plot(meta_pair['brain_region'], meta_pair['mu'], color='blue', linestyle='--')
ax.axhline(0, color='black', linestyle='--', linewidth=1)
ax.set_xlabel('Brain Region')
ax.set_ylabel('Pooled Z-score')
ax.set_title(f'Pooled Contact Enrichment Z-scores for {pair} Across Brain Regions')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()