In [None]:
import os
from pathlib import Path
from typing import Annotated

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scanpy as sc
import seaborn as sns
import tifffile

from sklearn.cluster import KMeans
from skimage.color import label2rgb
from sklearn.neighbors import radius_neighbors_graph
from sklearn.neighbors import NearestNeighbors

from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import euclidean_distances
from scipy.stats import wilcoxon
from statsmodels.stats.multitest import multipletests
from scipy.stats import entropy, chi2_contingency
from matplotlib.backends.backend_pdf import PdfPages
from statannotations.Annotator import Annotator

from scipy import sparse

plt.rcParams['svg.fonttype'] = 'none'
plt.rcParams['pdf.fonttype'] = 42 #make text editable in pdf

os.chdir('/diskmnt/Projects/myeloma_scRNA_analysis/MMY_IRD/Xenium/analysis/lineage_specific_analysis/B_interaction')
os.getcwd()


In [None]:
merged = sc.read_h5ad("/diskmnt/Projects/myeloma_scRNA_analysis/MMY_IRD/Xenium/analysis/radial_neighborhoods/Output/merged_RN.h5ad")

In [None]:
collection_order = ["NBM", "NDMM", "PT"] 
timecols = {"NBM": "#0C7515", "NDMM": "#E619B9", "PT": "#CF99C3"} 

In [None]:
merged.obs['ct'].value_counts()

In [None]:
merged.obs['UPN_Collection'] = merged.obs['UPN'].astype('string').str.cat(merged.obs['Collection'].astype('string'), sep='_', na_rep='')
uc_to_collection = merged.obs.set_index('UPN_Collection')['Collection'].to_dict()

In [None]:
bpc = merged[merged.obs['ct'].isin(['Early B', 'Mature B', 'PC'])].copy()
mye = merged[merged.obs['ct'].isin(['GMP', 'Late Myeloid', 'Neutrophil', 'cDC', 'Ba/Eo/Ma', 'Monocyte', 'Macrophage'])].copy()

In [None]:
def frac_ct_gene_pos(
    adata,
    gene,
    cell_type,
    sample_key="UPN_Collection",   
    ct_key="ct",         
    threshold=0.0         
):

    x = adata[:, gene].X # (ncells, 1)
    x = x.toarray().ravel()
    
    df = adata.obs[[sample_key, ct_key]].copy()
    df["expr"] = x
    
    df["is_ct"] = (df[ct_key] == cell_type)
    df["is_pos"] = df["expr"] > threshold
    df["is_ct_pos"] = df["is_ct"] & df["is_pos"]
    
    summary = (
        df.groupby(sample_key, observed=True)
          .agg(
              n_total = ("expr", "size"),                 # all cells in sample
              n_ct_pos = ("is_ct_pos", "sum"),            # ct & gene+
          )
          .reset_index()
    )
    
    summary["gene"] = gene
    summary["cell_type"] = cell_type
    summary["frac_ct_pos_of_all_cells"] = summary["n_ct_pos"] / summary["n_total"]

    summary = summary[[sample_key, "gene", "cell_type", "n_ct_pos", "n_total",
                       "frac_ct_pos_of_all_cells"]]
    
    return summary


In [None]:
genes = ["TNFRSF13B", "TNFRSF13C", "TNFRSF17"]
cts = bpc.obs['ct'].unique()
all_tables_rec = []

for g in genes:
    for ct in cts:
        t = frac_ct_gene_pos(
            merged,
            gene=g,
            cell_type=ct,
            sample_key="UPN_Collection",
            ct_key="ct",
            threshold=0
        )
        all_tables_rec.append(t)

big_table_rec = pd.concat(all_tables_rec, ignore_index=True)
big_table_rec['Collection'] = big_table_rec['UPN_Collection'].map(uc_to_collection)
big_table_rec


In [None]:
pdf_path = "aprilbaffReceptor_meanfracposCT_boxplot_xenium.pdf"
genes = ["TNFRSF13B", "TNFRSF13C", "TNFRSF17"] 
cts = ['Early B', 'Mature B', 'PC']

collection_order = ["NBM", "NDMM", "PT"]
with PdfPages(pdf_path) as pdf:
     for gene in genes:
         for ct in cts: 
            sub = big_table_rec[(big_table_rec["gene"] == gene) & (big_table_rec["cell_type"] == ct)].copy()
    
            plt.figure(figsize=(2, 4))
            ax = sns.boxplot(
                data=sub, palette=timecols, order=collection_order,
                x="Collection", y="frac_ct_pos_of_all_cells", fliersize=0
            )
            sns.stripplot(
                data=sub, order=collection_order,
                x="Collection", y="frac_ct_pos_of_all_cells", color='black', alpha=1, size=2
            )
        
            pairs = []
            # generate all ordered pairs in collection_order
            for i in range(len(collection_order)):
                for j in range(i+1, len(collection_order)):
                    pairs.append((collection_order[i], collection_order[j]))
        
            annot = Annotator(
                ax, pairs, data=sub,
                x="Collection", y="frac_ct_pos_of_all_cells",
                order=collection_order
            )
            annot.configure(
                test="Mann-Whitney",
                text_format="star",
                comparisons_correction=None,
                loc="inside",
                line_height=0.03,
                line_offset=0.02
            )
            annot.apply_and_annotate()
        
            plt.title(f'{gene} : {ct}')
            sns.despine()
            plt.tight_layout()
            pdf.savefig()  
            plt.close()   

In [None]:
genes = ["TNFSF13", "TNFSF13B"]
cts = mye.obs['ct'].unique()

all_tables_lig = []

for g in genes:
    for ct in cts:
        t = frac_ct_gene_pos(
            merged,
            gene=g,
            cell_type=ct,
            sample_key="UPN_Collection",
            ct_key="ct",
            threshold=0
        )
        all_tables_lig.append(t)

big_table_lig = pd.concat(all_tables_lig, ignore_index=True)
big_table_lig['Collection'] = big_table_lig['UPN_Collection'].map(uc_to_collection)
big_table_lig


In [None]:
big_table = pd.concat([big_table_rec, big_table_lig], ignore_index=True)
big_table.to_csv("fraction_ct_gene_positive.tsv", sep="\t", index=False)


In [None]:
def remove_outliers_iqr(df, col='frac_ct', k=1.5):
    q1 = df[col].quantile(0.25)
    q3 = df[col].quantile(0.75)
    iqr = q3 - q1
    lower = q1 - k * iqr
    upper = q3 + k * iqr
    return df[(df[col] >= lower) & (df[col] <= upper)]

In [None]:
adata = bpc.copy()
gene = "TNFRSF13B"
threshold = 0

Xg = adata[:, gene].X
if sparse.issparse(Xg):
    Xg = Xg.toarray()
expr = np.ravel(Xg)

adata.obs["gene_pos"] = (expr > threshold).astype(int)

# get fraction positive per UPN × Collection × RN
df = (
    adata.obs
    .groupby(["UPN", "Collection", "rn"], observed=True)
    .agg(
        n_cells=("gene_pos", "size"),
        n_pos=("gene_pos", "sum")
    )
    .reset_index()
)

df["frac_pos"] = df["n_pos"] / df["n_cells"]


collection_order = ["NBM", "NDMM", "PT"]
rns = sorted(df["rn"].unique())
pdf_path = "TNFRSF13B_bpc_fracPos_by_RN_groupedBoxplot.pdf"

with PdfPages(pdf_path) as pdf:
    plt.figure(figsize=(10,5))
    ax = sns.boxplot(
        data= remove_outliers_iqr(df, col='frac_pos'),
        x="rn",
        y="frac_pos",
        hue="Collection",
        hue_order=collection_order,
        palette=timecols,
        fliersize=0
    )
    sns.stripplot(
        data=remove_outliers_iqr(df, col='frac_pos'),
        x="rn",
        y="frac_pos",
        hue="Collection",
        hue_order=collection_order,
        dodge=True,
        alpha=0.7,
        size=2,
        palette="dark:black"
    )

    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles[:3], labels[:3], title="Collection")

    pairs = []
    for rn in rns:
        for i in range(len(collection_order)):
            for j in range(i+1, len(collection_order)):
                pairs.append(((rn, collection_order[i]), (rn, collection_order[j])))

    annot = Annotator(
        ax,
        pairs,
        data=df,
        x="rn",
        y="frac_pos",
        hue="Collection",
        order=rns,
        hue_order=collection_order
    )
    annot.configure(
        test="Mann-Whitney",
        text_format="star",
        comparisons_correction=None,
        hide_non_significant=True,
        loc="inside",
        line_height=0.03,
        line_offset=0.02
    )
    annot.apply_and_annotate()

    plt.title(f'{gene}+ of fraction of total B/PC cells in radial neighborhoods')
    plt.tight_layout()
    pdf.savefig()
    plt.close()


In [None]:
adata = bpc.copy()
gene = "TNFRSF17"
threshold = 0

Xg = adata[:, gene].X
if sparse.issparse(Xg):
    Xg = Xg.toarray()
expr = np.ravel(Xg)

adata.obs["gene_pos"] = (expr > threshold).astype(int)

# get fraction positive per UPN × Collection × RN
df = (
    adata.obs
    .groupby(["UPN", "Collection", "rn"], observed=True)
    .agg(
        n_cells=("gene_pos", "size"),
        n_pos=("gene_pos", "sum")
    )
    .reset_index()
)

df["frac_pos"] = df["n_pos"] / df["n_cells"]


collection_order = ["NBM", "NDMM", "PT"]
rns = sorted(df["rn"].unique())
pdf_path = "TNFRSF17_bpc_fracPos_by_RN_groupedBoxplot.pdf"

with PdfPages(pdf_path) as pdf:
    plt.figure(figsize=(10,5))
    ax = sns.boxplot(
        data= remove_outliers_iqr(df, col='frac_pos'),
        x="rn",
        y="frac_pos",
        hue="Collection",
        hue_order=collection_order,
        palette=timecols,
        fliersize=0
    )
    sns.stripplot(
        data=remove_outliers_iqr(df, col='frac_pos'),
        x="rn",
        y="frac_pos",
        hue="Collection",
        hue_order=collection_order,
        dodge=True,
        alpha=0.7,
        size=2,
        palette="dark:black"
    )

    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles[:3], labels[:3], title="Collection")

    pairs = []
    for rn in rns:
        for i in range(len(collection_order)):
            for j in range(i+1, len(collection_order)):
                pairs.append(((rn, collection_order[i]), (rn, collection_order[j])))

    annot = Annotator(
        ax,
        pairs,
        data=df,
        x="rn",
        y="frac_pos",
        hue="Collection",
        order=rns,
        hue_order=collection_order
    )
    annot.configure(
        test="Mann-Whitney",
        text_format="star",
        comparisons_correction=None,
        hide_non_significant=True,
        loc="inside",
        line_height=0.03,
        line_offset=0.02
    )
    annot.apply_and_annotate()

    plt.title(f'{gene}+ of fraction of total B/PC cells in radial neighborhoods')
    plt.tight_layout()
    pdf.savefig()
    plt.close()


In [None]:
adata = bpc.copy()
gene = "TNFRSF17"
threshold = 0

Xg = adata[:, gene].X
if sparse.issparse(Xg):
    Xg = Xg.toarray()
expr = np.ravel(Xg)

adata.obs["gene_pos"] = (expr > threshold).astype(int)

# get fraction positive per UPN × Collection × RN
df = (
    adata.obs
    .groupby(["UPN", "Collection", "rn"], observed=True)
    .agg(
        n_cells=("gene_pos", "size"),
        n_pos=("gene_pos", "sum")
    )
    .reset_index()
)

df["frac_pos"] = df["n_pos"] / df["n_cells"]


collection_order = ["NBM", "NDMM", "PT"]
rns = sorted(df["rn"].unique())
pdf_path = "TNFRSF17_bpc_fracPos_by_RN_groupedBoxplot.pdf"

with PdfPages(pdf_path) as pdf:
    plt.figure(figsize=(10,5))
    ax = sns.boxplot(
        data= remove_outliers_iqr(df, col='frac_pos'),
        x="rn",
        y="frac_pos",
        hue="Collection",
        hue_order=collection_order,
        palette=timecols,
        fliersize=0
    )
    sns.stripplot(
        data=remove_outliers_iqr(df, col='frac_pos'),
        x="rn",
        y="frac_pos",
        hue="Collection",
        hue_order=collection_order,
        dodge=True,
        alpha=0.7,
        size=2,
        palette="dark:black"
    )

    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles[:3], labels[:3], title="Collection")

    pairs = []
    for rn in rns:
        for i in range(len(collection_order)):
            for j in range(i+1, len(collection_order)):
                pairs.append(((rn, collection_order[i]), (rn, collection_order[j])))

    annot = Annotator(
        ax,
        pairs,
        data=df,
        x="rn",
        y="frac_pos",
        hue="Collection",
        order=rns,
        hue_order=collection_order
    )
    annot.configure(
        test="Mann-Whitney",
        text_format="star",
        comparisons_correction=None,
        hide_non_significant=True,
        loc="inside",
        line_height=0.03,
        line_offset=0.02
    )
    annot.apply_and_annotate()

    plt.title(f'{gene}+ of fraction of total B/PC cells in radial neighborhoods')
    plt.tight_layout()
    pdf.savefig()
    plt.close()


In [None]:
adata = mye[mye.obs['Panel']=='BYGXJ6_hMulti'].copy()
gene = "TNFSF13B"
threshold = 0

Xg = adata[:, gene].X
if sparse.issparse(Xg):
    Xg = Xg.toarray()
expr = np.ravel(Xg)

adata.obs["gene_pos"] = (expr > threshold).astype(int)

# get fraction positive per UPN × Collection × RN
df = (
    adata.obs
    .groupby(["UPN", "Collection", "rn"], observed=True)
    .agg(
        n_cells=("gene_pos", "size"),
        n_pos=("gene_pos", "sum")
    )
    .reset_index()
)

df["frac_pos"] = df["n_pos"] / df["n_cells"]


collection_order = ["NBM", "NDMM", "PT"]
rns = sorted(df["rn"].unique())
pdf_path = "TNFSF13B_mye_fracPos_by_RN_groupedBoxplot.pdf"

with PdfPages(pdf_path) as pdf:
    plt.figure(figsize=(10,5))
    ax = sns.boxplot(
        data= remove_outliers_iqr(df, col='frac_pos'),
        x="rn",
        y="frac_pos",
        hue="Collection",
        hue_order=collection_order,
        palette=timecols,
        fliersize=0
    )
    sns.stripplot(
        data=remove_outliers_iqr(df, col='frac_pos'),
        x="rn",
        y="frac_pos",
        hue="Collection",
        hue_order=collection_order,
        dodge=True,
        alpha=0.7,
        size=2,
        palette="dark:black"
    )

    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles[:3], labels[:3], title="Collection")

    pairs = []
    for rn in rns:
        for i in range(len(collection_order)):
            for j in range(i+1, len(collection_order)):
                pairs.append(((rn, collection_order[i]), (rn, collection_order[j])))

    annot = Annotator(
        ax,
        pairs,
        data=df,
        x="rn",
        y="frac_pos",
        hue="Collection",
        order=rns,
        hue_order=collection_order
    )
    annot.configure(
        test="Mann-Whitney",
        text_format="star",
        comparisons_correction='BH',
        hide_non_significant=True,
        loc="inside",
        line_height=0.03,
        line_offset=0.02
    )
    annot.apply_and_annotate()

    plt.title(f'{gene}+ of fraction of total myeloid cells in radial neighborhoods')
    plt.tight_layout()
    pdf.savefig()
    plt.close()


In [None]:
mye.obs['Panel'].unique()

In [None]:
adata = mye[mye.obs['Panel']=='BYGXJ6_hMulti'].copy()
gene = "TNFSF13"
threshold = 0

Xg = adata[:, gene].X
if sparse.issparse(Xg):
    Xg = Xg.toarray()
expr = np.ravel(Xg)

adata.obs["gene_pos"] = (expr > threshold).astype(int)

# get fraction positive per UPN × Collection × RN
df = (
    adata.obs
    .groupby(["UPN", "Collection", "rn"], observed=True)
    .agg(
        n_cells=("gene_pos", "size"),
        n_pos=("gene_pos", "sum")
    )
    .reset_index()
)

df["frac_pos"] = df["n_pos"] / df["n_cells"]


collection_order = ["NBM", "NDMM", "PT"]
rns = sorted(df["rn"].unique())
pdf_path = "TNFSF13_mye_fracPos_by_RN_groupedBoxplot.pdf"

with PdfPages(pdf_path) as pdf:
    plt.figure(figsize=(10,5))
    ax = sns.boxplot(
        data= remove_outliers_iqr(df, col='frac_pos'),
        x="rn",
        y="frac_pos",
        hue="Collection",
        hue_order=collection_order,
        palette=timecols,
        fliersize=0
    )
    sns.stripplot(
        data=remove_outliers_iqr(df, col='frac_pos'),
        x="rn",
        y="frac_pos",
        hue="Collection",
        hue_order=collection_order,
        dodge=True,
        alpha=0.7,
        size=2,
        palette="dark:black"
    )

    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles[:3], labels[:3], title="Collection")

    pairs = []
    for rn in rns:
        for i in range(len(collection_order)):
            for j in range(i+1, len(collection_order)):
                pairs.append(((rn, collection_order[i]), (rn, collection_order[j])))

    annot = Annotator(
        ax,
        pairs,
        data=df,
        x="rn",
        y="frac_pos",
        hue="Collection",
        order=rns,
        hue_order=collection_order
    )
    annot.configure(
        test="Mann-Whitney",
        text_format="star",
        comparisons_correction=None,
        hide_non_significant=True,
        loc="inside",
        line_height=0.03,
        line_offset=0.02
    )
    annot.apply_and_annotate()

    plt.title(f'{gene}+ of fraction of total myeloid cells in radial neighborhoods')
    plt.tight_layout()
    pdf.savefig()
    plt.close()


In [None]:
# check MSCs expressing CXCL12 vs B/PC expressing CXCR4
adata = merged[merged.obs['ct'].isin(['MSC'])].copy()
gene = "CXCL12"
threshold = 0

Xg = adata[:, gene].X
if sparse.issparse(Xg):
    Xg = Xg.toarray()
expr = np.ravel(Xg)

adata.obs["gene_pos"] = (expr > threshold).astype(int)

# get fraction positive per UPN × Collection × RN
df = (
    adata.obs
    .groupby(["UPN", "Collection", "rn"], observed=True)
    .agg(
        n_cells=("gene_pos", "size"),
        n_pos=("gene_pos", "sum")
    )
    .reset_index()
)

df["frac_pos"] = df["n_pos"] / df["n_cells"]


collection_order = ["NBM", "NDMM", "PT"]
rns = sorted(df["rn"].unique())
pdf_path = "CXCL12_msc_fracPos_by_RN_groupedBoxplot.pdf"

with PdfPages(pdf_path) as pdf:
    plt.figure(figsize=(10,5))
    ax = sns.boxplot(
        data= remove_outliers_iqr(df, col='frac_pos'),
        x="rn",
        y="frac_pos",
        hue="Collection",
        hue_order=collection_order,
        palette=timecols,
        fliersize=0
    )
    sns.stripplot(
        data=remove_outliers_iqr(df, col='frac_pos'),
        x="rn",
        y="frac_pos",
        hue="Collection",
        hue_order=collection_order,
        dodge=True,
        alpha=0.7,
        size=2,
        palette="dark:black"
    )

    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles[:3], labels[:3], title="Collection")

    pairs = []
    for rn in rns:
        for i in range(len(collection_order)):
            for j in range(i+1, len(collection_order)):
                pairs.append(((rn, collection_order[i]), (rn, collection_order[j])))

    annot = Annotator(
        ax,
        pairs,
        data=df,
        x="rn",
        y="frac_pos",
        hue="Collection",
        order=rns,
        hue_order=collection_order
    )
    annot.configure(
        test="Mann-Whitney",
        text_format="star",
        comparisons_correction=None,
        hide_non_significant=True,
        loc="inside",
        line_height=0.03,
        line_offset=0.02
    )
    annot.apply_and_annotate()

    plt.title(f'{gene}+ of fraction of total MSCs in radial neighborhoods')
    plt.tight_layout()
    pdf.savefig()
    plt.close()

In [None]:
mye.obs

In [None]:
from sklearn.neighbors import KDTree

group_keys = ["UPN", "Collection"]
receptor_types = ["Early B", "Mature B", "PC"]

results = []

lig_adata=mye.copy()
rec_adata=bpc.copy()

# all UPN+Collection combos seen in either ligands or receptors
lig_groups = set(zip(lig_adata.obs["Sample"], lig_adata.obs["Collection"]))
rec_groups = set(zip(rec_adata.obs["Sample"], rec_adata.obs["Collection"]))
all_groups = sorted(lig_groups.union(rec_groups))

for sample, coll in all_groups:
    lig_mask = (lig_adata.obs["Sample"] == sample) & (lig_adata.obs["Collection"] == coll)
    n_lig = int(lig_mask.sum())

    if n_lig == 0:
        # no ligand cells at all for this sample
        for rtype in receptor_types:
            results.append({
                "Sample": sample,
                "Collection": coll,
                "receptor_type": rtype,
                "min_dist": np.nan,
                "mean_dist": np.nan,
                "median_dist": np.nan,
                "n_ligand_cells": 0,
                "n_receptor_cells": 0,
            })
        continue

    lig_coords = np.c_[
        lig_adata.obs.loc[lig_mask, "x_centroid"].to_numpy(),
        lig_adata.obs.loc[lig_mask, "y_centroid"].to_numpy()
    ]

    for rtype in receptor_types:
        rec_mask = (
            (rec_adata.obs["Sample"] == sample) &
            (rec_adata.obs["Collection"] == coll) &
            (rec_adata.obs["ct"] == rtype)
        )
        n_rec = int(rec_mask.sum())

        if n_rec == 0:
            # no receptors of this type in this sample
            results.append({
                "Sample": sample,
                "Collection": coll,
                "receptor_type": rtype,
                "min_dist": np.nan,
                "mean_dist": np.nan,
                "median_dist": np.nan,
                "n_ligand_cells": n_lig,
                "n_receptor_cells": 0,
            })
            continue

        rec_coords = np.c_[
            rec_adata.obs.loc[rec_mask, "x_centroid"].to_numpy(),
            rec_adata.obs.loc[rec_mask, "y_centroid"].to_numpy()
        ]

        # KDTree of this receptor type
        tree = KDTree(rec_coords, leaf_size=40)
        dists, _ = tree.query(lig_coords, k=1)  # nearest receptor of this type for each ligand cell
        dists = dists.ravel()

        results.append({
            "Sample": sample,
            "Collection": coll,
            "receptor_type": rtype,
            "min_dist": float(dists.min()),
            "mean_dist": float(dists.mean()),
            "median_dist": float(np.median(dists)),
            "n_ligand_cells": n_lig,
            "n_receptor_cells": n_rec,
        })

dist_df = pd.DataFrame(results)


In [None]:
dist_df

In [None]:
pdf_path = 'median_distance_myeToBPC.pdf'
with PdfPages(pdf_path) as pdf:
    plt.figure(figsize=(3,5))
    ax = sns.boxplot(
        data=remove_outliers_iqr(dist_df, col='median_dist'),
        order=receptor_types,
        x="receptor_type",
        y="median_dist",
        hue="Collection",
        palette=timecols,
        hue_order=collection_order,
        fliersize=0
    )
    sns.stripplot(
        data=remove_outliers_iqr(dist_df, col='median_dist'),
        order=receptor_types,
        x="receptor_type",
        y="median_dist",
        hue="Collection",
        hue_order=collection_order,
        dodge=True,
        color="black",
        size=3,
        alpha=0.7
    )
    
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles[:3], labels[:3], title="Collection")

    pairs = []
    for r in receptor_types:
        for i in range(len(collection_order)):
            for j in range(i+1, len(collection_order)):
                pairs.append(((r, collection_order[i]), (r, collection_order[j])))

    annot = Annotator(
        ax,
        pairs,
        data=dist_df,
        x="receptor_type",
        y="median_dist",
        hue="Collection",
        order=receptor_types,
        hue_order=collection_order
    )
    annot.configure(
        test="Mann-Whitney",
        text_format="star",
        comparisons_correction='BH',
        hide_non_significant=True,
        loc="inside",
        line_height=0.03,
        line_offset=0.02
    )
    annot.apply_and_annotate()
    
    plt.ylabel("Median distance (myeloid to B/PC)")
    plt.tight_layout()
    pdf.savefig()
    plt.close()


In [None]:
pdf_path = 'min_distance_myeToBPC.pdf'
with PdfPages(pdf_path) as pdf:
    plt.figure(figsize=(3,5))
    ax = sns.boxplot(
        data=remove_outliers_iqr(dist_df, col='min_dist'),
        order=receptor_types,
        x="receptor_type",
        y="min_dist",
        hue="Collection",
        palette=timecols,
        hue_order=collection_order,
        fliersize=0
    )
    sns.stripplot(
        data=remove_outliers_iqr(dist_df, col='min_dist'),
        order=receptor_types,
        x="receptor_type",
        y="min_dist",
        hue="Collection",
        hue_order=collection_order,
        dodge=True,
        color="black",
        size=3,
        alpha=0.7
    )
    
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles[:3], labels[:3], title="Collection")

    pairs = []
    for r in receptor_types:
        for i in range(len(collection_order)):
            for j in range(i+1, len(collection_order)):
                pairs.append(((r, collection_order[i]), (r, collection_order[j])))

    annot = Annotator(
        ax,
        pairs,
        data=dist_df,
        x="receptor_type",
        y="min_dist",
        hue="Collection",
        order=receptor_types,
        hue_order=collection_order
    )
    annot.configure(
        test="Mann-Whitney",
        text_format="star",
        comparisons_correction=None,
        hide_non_significant=True,
        loc="inside",
        line_height=0.03,
        line_offset=0.02
    )
    annot.apply_and_annotate()
    
    plt.ylabel("Min distance (myeloid to B/PC)")
    plt.tight_layout()
    pdf.savefig()
    plt.close()


In [None]:
pdf_path = 'mean_distance_myeToBPC.pdf'
with PdfPages(pdf_path) as pdf:
    plt.figure(figsize=(3,5))
    ax = sns.boxplot(
        data=remove_outliers_iqr(dist_df, col='mean_dist'),
        order=receptor_types,
        x="receptor_type",
        y="min_dist",
        hue="Collection",
        palette=timecols,
        hue_order=collection_order,
        fliersize=0
    )
    sns.stripplot(
        data=remove_outliers_iqr(dist_df, col='mean_dist'),
        order=receptor_types,
        x="receptor_type",
        y="mean_dist",
        hue="Collection",
        hue_order=collection_order,
        dodge=True,
        color="black",
        size=3,
        alpha=0.7
    )
    
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles[:3], labels[:3], title="Collection")

    pairs = []
    for r in receptor_types:
        for i in range(len(collection_order)):
            for j in range(i+1, len(collection_order)):
                pairs.append(((r, collection_order[i]), (r, collection_order[j])))

    annot = Annotator(
        ax,
        pairs,
        data=dist_df,
        x="receptor_type",
        y="mean_dist",
        hue="Collection",
        order=receptor_types,
        hue_order=collection_order
    )
    annot.configure(
        test="Mann-Whitney",
        text_format="star",
        comparisons_correction=None,
        hide_non_significant=True,
        loc="inside",
        line_height=0.03,
        line_offset=0.02
    )
    annot.apply_and_annotate()
    
    plt.ylabel("Mean distance (myeloid to B/PC)")
    plt.tight_layout()
    pdf.savefig()
    plt.close()


In [None]:
receptor_types = ["Mature B", "PC"]

lig_gene = "TNFSF13"  
rec_gene = "TNFRSF13B"    
threshold = 0             # > threshold = positive

lig_adata = lig_adata[lig_adata.obs['Panel']=='BYGXJ6_hMulti'].copy()
rec_adata = rec_adata[rec_adata.obs['Panel']=='BYGXJ6_hMulti'].copy()

def get_expr(adata, gene):
    x = adata[:, gene].X
    if sparse.issparse(x):
        x = x.toarray()
    return np.ravel(x)


# ligand+ in mye (lig_adata)
lig_expr = get_expr(lig_adata, lig_gene)
lig_adata.obs["lig_pos"] = lig_expr > threshold

# receptor+ in bpc (rec_adata)
rec_expr = get_expr(rec_adata, rec_gene)
rec_adata.obs["rec_pos"] = rec_expr > threshold

lig_adata_pos = lig_adata[lig_adata.obs["lig_pos"]].copy()
rec_adata_pos = rec_adata[rec_adata.obs["rec_pos"]].copy()


results = []

# use *_pos instead of original
lig_groups = set(zip(lig_adata_pos.obs["Sample"], lig_adata_pos.obs["Collection"]))
rec_groups = set(zip(rec_adata_pos.obs["Sample"], rec_adata_pos.obs["Collection"]))
all_groups = sorted(lig_groups.union(rec_groups))

for sample, coll in all_groups:
    lig_mask = ((lig_adata_pos.obs["Sample"] == sample) &
                (lig_adata_pos.obs["Collection"] == coll))
    n_lig = int(lig_mask.sum())

    if n_lig == 0:
        for rtype in receptor_types:
            results.append({
                "Sample": sample,
                "Collection": coll,
                "receptor_type": rtype,
                "min_dist": np.nan,
                "mean_dist": np.nan,
                "median_dist": np.nan,
                "n_ligand_cells": 0,
                "n_receptor_cells": 0,
            })
        continue

    lig_coords = np.c_[
        lig_adata_pos.obs.loc[lig_mask, "x_centroid"].to_numpy(),
        lig_adata_pos.obs.loc[lig_mask, "y_centroid"].to_numpy()
    ]

    for rtype in receptor_types:
        rec_mask = (
            (rec_adata_pos.obs["Sample"] == sample) &
            (rec_adata_pos.obs["Collection"] == coll) &
            (rec_adata_pos.obs["ct"] == rtype)
        )
        n_rec = int(rec_mask.sum())

        if n_rec == 0:
            results.append({
                "Sample": sample,
                "Collection": coll,
                "receptor_type": rtype,
                "min_dist": np.nan,
                "mean_dist": np.nan,
                "median_dist": np.nan,
                "n_ligand_cells": n_lig,
                "n_receptor_cells": 0,
            })
            continue

        rec_coords = np.c_[
            rec_adata_pos.obs.loc[rec_mask, "x_centroid"].to_numpy(),
            rec_adata_pos.obs.loc[rec_mask, "y_centroid"].to_numpy()
        ]

        tree = KDTree(rec_coords, leaf_size=40)
        dists, _ = tree.query(lig_coords, k=1)
        dists = dists.ravel()

        results.append({
            "Sample": sample,
            "Collection": coll,
            "receptor_type": rtype,
            "min_dist": float(dists.min()),
            "mean_dist": float(dists.mean()),
            "median_dist": float(np.median(dists)),
            "n_ligand_cells": n_lig,
            "n_receptor_cells": n_rec,
        })

dist_df = pd.DataFrame(results)

pdf_path = 'min_distance_TNFSF13_TNFRSF13B_pos_myeBPC.pdf'
with PdfPages(pdf_path) as pdf:
    plt.figure(figsize=(2.5,5))
    ax = sns.boxplot(
        data=remove_outliers_iqr(dist_df, col='min_dist'),
        order=receptor_types,
        x="receptor_type",
        y="min_dist",
        hue="Collection",
        palette=timecols,
        hue_order=collection_order,
        fliersize=0
    )
    sns.stripplot(
        data=remove_outliers_iqr(dist_df, col='min_dist'),
        order=receptor_types,
        x="receptor_type",
        y="min_dist",
        hue="Collection",
        hue_order=collection_order,
        dodge=True,
        color="black",
        size=3,
        alpha=0.7
    )
    
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles[:3], labels[:3], title="Collection")

    pairs = []
    for r in receptor_types:
        for i in range(len(collection_order)):
            for j in range(i+1, len(collection_order)):
                pairs.append(((r, collection_order[i]), (r, collection_order[j])))

    annot = Annotator(
        ax,
        pairs,
        data=dist_df,
        x="receptor_type",
        y="min_dist",
        hue="Collection",
        order=receptor_types,
        hue_order=collection_order
    )
    annot.configure(
        test="Mann-Whitney",
        text_format="star",
        comparisons_correction=None,
        hide_non_significant=True,
        loc="inside",
        line_height=0.03,
        line_offset=0.02
    )
    annot.apply_and_annotate()
    
    plt.ylabel("Min distance (TNFSF13+ Mye to TNFRSF13B+ B/PC)")
    plt.tight_layout()
    pdf.savefig()
    plt.close()


pdf_path = 'median_distance_TNFSF13_TNFRSF13B_pos_myeBPC.pdf'
with PdfPages(pdf_path) as pdf:
    plt.figure(figsize=(2.5,5))
    ax = sns.boxplot(
        data=remove_outliers_iqr(dist_df, col='median_dist'),
        order=receptor_types,
        x="receptor_type",
        y="median_dist",
        hue="Collection",
        palette=timecols,
        hue_order=collection_order,
        fliersize=0
    )
    sns.stripplot(
        data=remove_outliers_iqr(dist_df, col='median_dist'),
        order=receptor_types,
        x="receptor_type",
        y="median_dist",
        hue="Collection",
        hue_order=collection_order,
        dodge=True,
        color="black",
        size=3,
        alpha=0.7
    )
    
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles[:3], labels[:3], title="Collection")

    pairs = []
    for r in receptor_types:
        for i in range(len(collection_order)):
            for j in range(i+1, len(collection_order)):
                pairs.append(((r, collection_order[i]), (r, collection_order[j])))

    annot = Annotator(
        ax,
        pairs,
        data=dist_df,
        x="receptor_type",
        y="median_dist",
        hue="Collection",
        order=receptor_types,
        hue_order=collection_order
    )
    annot.configure(
        test="Mann-Whitney",
        text_format="star",
        comparisons_correction='BH',
        hide_non_significant=True,
        loc="inside",
        line_height=0.03,
        line_offset=0.02
    )
    annot.apply_and_annotate()
    
    plt.ylabel("Median distance (TNFSF13+ Mye to TNFRSF13B+ B/PC)")
    plt.tight_layout()
    pdf.savefig()
    plt.close()

In [None]:
receptor_types = ["Early B", "Mature B"]

lig_gene = "TNFSF13B"  
rec_gene = "TNFRSF13C"    
threshold = 0             # > threshold = positive

lig_adata = lig_adata[lig_adata.obs['Panel']=='BYGXJ6_hMulti'].copy()
rec_adata = rec_adata[rec_adata.obs['Panel']=='BYGXJ6_hMulti'].copy()

def get_expr(adata, gene):
    x = adata[:, gene].X
    if sparse.issparse(x):
        x = x.toarray()
    return np.ravel(x)


# ligand+ in mye (lig_adata)
lig_expr = get_expr(lig_adata, lig_gene)
lig_adata.obs["lig_pos"] = lig_expr > threshold

# receptor+ in bpc (rec_adata)
rec_expr = get_expr(rec_adata, rec_gene)
rec_adata.obs["rec_pos"] = rec_expr > threshold

lig_adata_pos = lig_adata[lig_adata.obs["lig_pos"]].copy()
rec_adata_pos = rec_adata[rec_adata.obs["rec_pos"]].copy()


results = []

# use *_pos instead of original
lig_groups = set(zip(lig_adata_pos.obs["Sample"], lig_adata_pos.obs["Collection"]))
rec_groups = set(zip(rec_adata_pos.obs["Sample"], rec_adata_pos.obs["Collection"]))
all_groups = sorted(lig_groups.union(rec_groups))

for sample, coll in all_groups:
    lig_mask = ((lig_adata_pos.obs["Sample"] == sample) &
                (lig_adata_pos.obs["Collection"] == coll))
    n_lig = int(lig_mask.sum())

    if n_lig == 0:
        for rtype in receptor_types:
            results.append({
                "Sample": sample,
                "Collection": coll,
                "receptor_type": rtype,
                "min_dist": np.nan,
                "mean_dist": np.nan,
                "median_dist": np.nan,
                "n_ligand_cells": 0,
                "n_receptor_cells": 0,
            })
        continue

    lig_coords = np.c_[
        lig_adata_pos.obs.loc[lig_mask, "x_centroid"].to_numpy(),
        lig_adata_pos.obs.loc[lig_mask, "y_centroid"].to_numpy()
    ]

    for rtype in receptor_types:
        rec_mask = (
            (rec_adata_pos.obs["Sample"] == sample) &
            (rec_adata_pos.obs["Collection"] == coll) &
            (rec_adata_pos.obs["ct"] == rtype)
        )
        n_rec = int(rec_mask.sum())

        if n_rec == 0:
            results.append({
                "Sample": sample,
                "Collection": coll,
                "receptor_type": rtype,
                "min_dist": np.nan,
                "mean_dist": np.nan,
                "median_dist": np.nan,
                "n_ligand_cells": n_lig,
                "n_receptor_cells": 0,
            })
            continue

        rec_coords = np.c_[
            rec_adata_pos.obs.loc[rec_mask, "x_centroid"].to_numpy(),
            rec_adata_pos.obs.loc[rec_mask, "y_centroid"].to_numpy()
        ]

        tree = KDTree(rec_coords, leaf_size=40)
        dists, _ = tree.query(lig_coords, k=1)
        dists = dists.ravel()

        results.append({
            "Sample": sample,
            "Collection": coll,
            "receptor_type": rtype,
            "min_dist": float(dists.min()),
            "mean_dist": float(dists.mean()),
            "median_dist": float(np.median(dists)),
            "n_ligand_cells": n_lig,
            "n_receptor_cells": n_rec,
        })

dist_df = pd.DataFrame(results)

pdf_path = 'median_distance_TNFSF13B_TNFRSF13C_pos_myeBPC.pdf'
with PdfPages(pdf_path) as pdf:
    plt.figure(figsize=(2.5,5))
    ax = sns.boxplot(
        data=remove_outliers_iqr(dist_df, col='median_dist'),
        order=receptor_types,
        x="receptor_type",
        y="median_dist",
        hue="Collection",
        palette=timecols,
        hue_order=collection_order,
        fliersize=0
    )
    sns.stripplot(
        data=remove_outliers_iqr(dist_df, col='median_dist'),
        order=receptor_types,
        x="receptor_type",
        y="median_dist",
        hue="Collection",
        hue_order=collection_order,
        dodge=True,
        color="black",
        size=3,
        alpha=0.7
    )
    
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles[:3], labels[:3], title="Collection")

    pairs = []
    for r in receptor_types:
        for i in range(len(collection_order)):
            for j in range(i+1, len(collection_order)):
                pairs.append(((r, collection_order[i]), (r, collection_order[j])))

    annot = Annotator(
        ax,
        pairs,
        data=dist_df,
        x="receptor_type",
        y="median_dist",
        hue="Collection",
        order=receptor_types,
        hue_order=collection_order
    )
    annot.configure(
        test="Mann-Whitney",
        text_format="star",
        comparisons_correction='BH',
        hide_non_significant=True,
        loc="inside",
        line_height=0.03,
        line_offset=0.02
    )
    annot.apply_and_annotate()
    
    plt.ylabel("Median distance (TNFSF13B+ Mye to TNFRSF13C+ B)")
    plt.tight_layout()
    pdf.savefig()
    plt.close()


pdf_path = 'min_distance_TNFSF13B_TNFRSF13C_pos_myeBPC.pdf'
with PdfPages(pdf_path) as pdf:
    plt.figure(figsize=(2.5,5))
    ax = sns.boxplot(
        data=remove_outliers_iqr(dist_df, col='min_dist'),
        order=receptor_types,
        x="receptor_type",
        y="min_dist",
        hue="Collection",
        palette=timecols,
        hue_order=collection_order,
        fliersize=0
    )
    sns.stripplot(
        data=remove_outliers_iqr(dist_df, col='min_dist'),
        order=receptor_types,
        x="receptor_type",
        y="min_dist",
        hue="Collection",
        hue_order=collection_order,
        dodge=True,
        color="black",
        size=3,
        alpha=0.7
    )
    
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles[:3], labels[:3], title="Collection")

    pairs = []
    for r in receptor_types:
        for i in range(len(collection_order)):
            for j in range(i+1, len(collection_order)):
                pairs.append(((r, collection_order[i]), (r, collection_order[j])))

    annot = Annotator(
        ax,
        pairs,
        data=dist_df,
        x="receptor_type",
        y="min_dist",
        hue="Collection",
        order=receptor_types,
        hue_order=collection_order
    )
    annot.configure(
        test="Mann-Whitney",
        text_format="star",
        comparisons_correction=None,
        hide_non_significant=True,
        loc="inside",
        line_height=0.03,
        line_offset=0.02
    )
    annot.apply_and_annotate()
    
    plt.ylabel("Min distance (TNFSF13B+ Mye to TNFRSF13C+ B)")
    plt.tight_layout()
    pdf.savefig()
    plt.close()

In [None]:
receptor_types = ["PC"]

lig_gene = "TNFSF13"  
rec_gene = "TNFRSF17"    
threshold = 0             # > threshold = positive

lig_adata = lig_adata[lig_adata.obs['Panel']=='BYGXJ6_hMulti'].copy()
rec_adata = rec_adata[rec_adata.obs['Panel']=='BYGXJ6_hMulti'].copy()

def get_expr(adata, gene):
    x = adata[:, gene].X
    if sparse.issparse(x):
        x = x.toarray()
    return np.ravel(x)


# ligand+ in mye (lig_adata)
lig_expr = get_expr(lig_adata, lig_gene)
lig_adata.obs["lig_pos"] = lig_expr > threshold

# receptor+ in bpc (rec_adata)
rec_expr = get_expr(rec_adata, rec_gene)
rec_adata.obs["rec_pos"] = rec_expr > threshold

lig_adata_pos = lig_adata[lig_adata.obs["lig_pos"]].copy()
rec_adata_pos = rec_adata[rec_adata.obs["rec_pos"]].copy()


results = []

# use *_pos instead of original
lig_groups = set(zip(lig_adata_pos.obs["Sample"], lig_adata_pos.obs["Collection"]))
rec_groups = set(zip(rec_adata_pos.obs["Sample"], rec_adata_pos.obs["Collection"]))
all_groups = sorted(lig_groups.union(rec_groups))

for sample, coll in all_groups:
    lig_mask = ((lig_adata_pos.obs["Sample"] == sample) &
                (lig_adata_pos.obs["Collection"] == coll))
    n_lig = int(lig_mask.sum())

    if n_lig == 0:
        for rtype in receptor_types:
            results.append({
                "Sample": sample,
                "Collection": coll,
                "receptor_type": rtype,
                "min_dist": np.nan,
                "mean_dist": np.nan,
                "median_dist": np.nan,
                "n_ligand_cells": 0,
                "n_receptor_cells": 0,
            })
        continue

    lig_coords = np.c_[
        lig_adata_pos.obs.loc[lig_mask, "x_centroid"].to_numpy(),
        lig_adata_pos.obs.loc[lig_mask, "y_centroid"].to_numpy()
    ]

    for rtype in receptor_types:
        rec_mask = (
            (rec_adata_pos.obs["Sample"] == sample) &
            (rec_adata_pos.obs["Collection"] == coll) &
            (rec_adata_pos.obs["ct"] == rtype)
        )
        n_rec = int(rec_mask.sum())

        if n_rec == 0:
            results.append({
                "Sample": sample,
                "Collection": coll,
                "receptor_type": rtype,
                "min_dist": np.nan,
                "mean_dist": np.nan,
                "median_dist": np.nan,
                "n_ligand_cells": n_lig,
                "n_receptor_cells": 0,
            })
            continue

        rec_coords = np.c_[
            rec_adata_pos.obs.loc[rec_mask, "x_centroid"].to_numpy(),
            rec_adata_pos.obs.loc[rec_mask, "y_centroid"].to_numpy()
        ]

        tree = KDTree(rec_coords, leaf_size=40)
        dists, _ = tree.query(lig_coords, k=1)
        dists = dists.ravel()

        results.append({
            "Sample": sample,
            "Collection": coll,
            "receptor_type": rtype,
            "min_dist": float(dists.min()),
            "mean_dist": float(dists.mean()),
            "median_dist": float(np.median(dists)),
            "n_ligand_cells": n_lig,
            "n_receptor_cells": n_rec,
        })

dist_df = pd.DataFrame(results)

pdf_path = 'median_distance_TNFSF13_TNFRSF17_pos_myePC.pdf'
with PdfPages(pdf_path) as pdf:
    plt.figure(figsize=(2,5))
    ax = sns.boxplot(
        data=remove_outliers_iqr(dist_df, col='median_dist'),
        order=receptor_types,
        x="receptor_type",
        y="median_dist",
        hue="Collection",
        palette=timecols,
        hue_order=collection_order,
        fliersize=0
    )
    sns.stripplot(
        data=remove_outliers_iqr(dist_df, col='median_dist'),
        order=receptor_types,
        x="receptor_type",
        y="median_dist",
        hue="Collection",
        hue_order=collection_order,
        dodge=True,
        color="black",
        size=3,
        alpha=0.7
    )
    
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles[:3], labels[:3], title="Collection")

    pairs = []
    for r in receptor_types:
        for i in range(len(collection_order)):
            for j in range(i+1, len(collection_order)):
                pairs.append(((r, collection_order[i]), (r, collection_order[j])))

    annot = Annotator(
        ax,
        pairs,
        data=dist_df,
        x="receptor_type",
        y="median_dist",
        hue="Collection",
        order=receptor_types,
        hue_order=collection_order
    )
    annot.configure(
        test="Mann-Whitney",
        text_format="star",
        comparisons_correction=None,
        hide_non_significant=True,
        loc="inside",
        line_height=0.03,
        line_offset=0.02
    )
    annot.apply_and_annotate()
    
    plt.ylabel("Median distance (TNFSF13+ Mye to TNFRSF17+ PC)")
    plt.tight_layout()
    pdf.savefig()
    plt.close()


pdf_path = 'min_distance_TNFSF13_TNFRSF17_pos_myePC.pdf'
with PdfPages(pdf_path) as pdf:
    plt.figure(figsize=(2,5))
    ax = sns.boxplot(
        data=remove_outliers_iqr(dist_df, col='min_dist'),
        order=receptor_types,
        x="receptor_type",
        y="min_dist",
        hue="Collection",
        palette=timecols,
        hue_order=collection_order,
        fliersize=0
    )
    sns.stripplot(
        data=remove_outliers_iqr(dist_df, col='min_dist'),
        order=receptor_types,
        x="receptor_type",
        y="min_dist",
        hue="Collection",
        hue_order=collection_order,
        dodge=True,
        color="black",
        size=3,
        alpha=0.7
    )
    
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles[:3], labels[:3], title="Collection")

    pairs = []
    for r in receptor_types:
        for i in range(len(collection_order)):
            for j in range(i+1, len(collection_order)):
                pairs.append(((r, collection_order[i]), (r, collection_order[j])))

    annot = Annotator(
        ax,
        pairs,
        data=dist_df,
        x="receptor_type",
        y="min_dist",
        hue="Collection",
        order=receptor_types,
        hue_order=collection_order
    )
    annot.configure(
        test="Mann-Whitney",
        text_format="star",
        comparisons_correction=None,
        hide_non_significant=True,
        loc="inside",
        line_height=0.03,
        line_offset=0.02
    )
    annot.apply_and_annotate()
    
    plt.ylabel("Min distance (TNFSF13+ Mye to TNFRSF17+ B)")
    plt.tight_layout()
    pdf.savefig()
    plt.close()

In [None]:
# check MSC expression of CXCR4
msc = merged[merged.obs['ct'] =='MSC'].copy()

sc.pl.dotplot(
    msc,
    var_names='CXCL12',
    groupby='Collection',
    standard_scale="var",     
    dot_max=0.6,              
    color_map="viridis",     
    dendrogram=False
)
plt.show()

In [None]:
sc.pl.dotplot(
    bpc,
    var_names='CXCR4',
    groupby=['ct', 'Collection'],
    standard_scale="var",     
    dot_max=0.6,              
    color_map="viridis",     
    dendrogram=False
)
plt.show()

In [None]:
def compute_positive_fraction(adata, gene, threshold=0):
    x = adata[:, gene].X
    x = x.toarray().ravel()
    pos = x > threshold
    needed_cols = ["Collection", "UPN", "ct"]
    obs_cols = [c for c in needed_cols if c in adata.obs.columns]

    df = adata.obs[obs_cols].copy()
    df["pos"] = pos
    # group by: UPN, Collection, and ct (if ct exists)
    group_cols = [col for col in ["UPN", "Collection", "ct"] if col in df.columns]

    out = (
        df.groupby(group_cols, observed=True)
          .agg(frac_pos=("pos", "mean"), n_cells=("pos", "size"))
          .reset_index()
    )

    out["gene"] = gene
    return out


In [None]:
# compare % of CXCR4+ B cells 

gene = "CXCR4"  
collection_order = ["NBM", "NDMM", "PT"]     
pdf_path = "CXCR4_positive_fraction_B_PC_boxplots.pdf"
threshold = 0     

cts = bpc.obs['ct'].unique()

df_all = compute_positive_fraction(bpc, gene, threshold=threshold)

with PdfPages(pdf_path) as pdf:
    for ct in cts:
        sub = df_all[df_all["ct"] == ct].copy()

        plt.figure(figsize=(2, 4))
        ax = sns.boxplot(
            data=remove_outliers_iqr(sub, col='frac_pos'),
            x="Collection",
            y="frac_pos",
            palette=timecols,
            order=collection_order,
            fliersize=0
        )
        sns.stripplot(
            data=remove_outliers_iqr(sub, col='frac_pos'),
            x="Collection",
            y="frac_pos",
            order=collection_order,
            color="black",
            size=3,
            alpha=0.8,
            jitter=True
        )

        pairs = []
        for i in range(len(collection_order)):
            for j in range(i+1, len(collection_order)):
                pairs.append((collection_order[i], collection_order[j]))

        annot = Annotator(
            ax, pairs, data=sub,
            x="Collection", y="frac_pos",
            order=collection_order
        )
        annot.configure(
            test="Mann-Whitney",
            text_format="star",
            comparisons_correction='BH',
            hide_non_significant=True,
            loc="inside",
            line_height=0.03,
            line_offset=0.02
        )
        annot.apply_and_annotate()

        plt.title(f"{ct}: {gene}+ fraction")
        plt.ylabel("Fraction positive")
        plt.xlabel("Collection")
        plt.tight_layout()
        pdf.savefig()
        plt.close()


In [None]:
# compare % of CXCL12+ MSC cells 

gene = "CXCL12"  
collection_order = ["NBM", "NDMM", "PT"]     
pdf_path = "CXCL12_positive_fraction_MSC_boxplots.pdf"
threshold = 0     

cts = msc.obs['ct'].unique()

df_all = compute_positive_fraction(msc, gene, threshold=threshold)

with PdfPages(pdf_path) as pdf:
    for ct in cts:
        sub = df_all[df_all["ct"] == ct].copy()
    
        plt.figure(figsize=(2, 4))
        ax = sns.boxplot(
            data=remove_outliers_iqr(sub, col='frac_pos'),
            x="Collection",
            y="frac_pos",
            palette=timecols,
            order=collection_order,
            fliersize=0
        )
        sns.stripplot(
            data=remove_outliers_iqr(sub, col='frac_pos'),
            x="Collection",
            y="frac_pos",
            order=collection_order,
            color="black",
            size=3,
            alpha=0.8,
            jitter=True
        )

        pairs = []
        for i in range(len(collection_order)):
            for j in range(i+1, len(collection_order)):
                pairs.append((collection_order[i], collection_order[j]))

        annot = Annotator(
            ax, pairs, data=sub,
            x="Collection", y="frac_pos",
            order=collection_order
        )
        annot.configure(
            test="Mann-Whitney",
            text_format="star",
            comparisons_correction=None,
            hide_non_significant=True,
            loc="inside",
            line_height=0.03,
            line_offset=0.02
        )
        annot.apply_and_annotate()

        plt.title(f"{ct}: {gene}+ fraction")
        plt.ylabel("Fraction positive")
        plt.xlabel("Collection")
        plt.tight_layout()
        pdf.savefig()
        plt.close()


In [None]:
gene = "CXCR4"
threshold = 0
pdf_path = "CXCR4_positive_fraction_B_PC_boxplots_pairedUPN_wilcoxon.pdf"

collection_order = ["NDMM", "PT"]

# get paired UPNs
paired_upns = (
    bpc.obs[bpc.obs["Collection"].isin(collection_order)]
       .groupby("UPN")["Collection"]
       .nunique()
)
paired_upns = paired_upns[paired_upns == 2].index.tolist()
bpc_paired = bpc[bpc.obs["UPN"].isin(paired_upns)].copy()

# get pos fraction
df_all = compute_positive_fraction(bpc_paired, gene, threshold=threshold)
cts = bpc_paired.obs["ct"].unique()
df_all

In [None]:
with PdfPages(pdf_path) as pdf:
    for ct in cts:
        # subset for this ct 
        sub = df_all[df_all["ct"] == ct].copy()

        plt.figure(figsize=(2.2, 4))
        ax = sns.boxplot(
            data=sub,
            x="Collection",
            y="frac_pos",
            palette=timecols,
            order=collection_order,
            fliersize=0
        )
        sns.stripplot(
            data=sub,
            x="Collection",
            y="frac_pos",
            order=collection_order,
            color="black",
            size=3,
            alpha=0.8,
            jitter=True
        )

        # Paired lines
        for upn, g in sub.groupby('UPN'):
            if len(g) == 2:
                pts = g.sort_values("Collection")["frac_pos"].values
                xs = [0, 1]
                ax.plot(xs, pts, color='gray', alpha=1, linewidth=1)
    
        
        pairs = [("NDMM", "PT")]
        annot = Annotator(
            ax,
            pairs,
            data=sub,
            x="Collection",
            y="frac_pos",
            order=["NDMM", "PT"],
        )
        annot.configure(
            test='Wilcoxon',  # paired Wilcoxon
            text_format='star',
            loc='inside',
            hide_non_significant=True
        )
        annot.apply_and_annotate()


        plt.title(f"{ct}: {gene}+ fraction")
        plt.tight_layout()
        pdf.savefig()
        plt.close()

In [None]:
sc.pl.matrixplot(
    bpc, var_names='CXCR4', groupby=['ct', "Collection"],
    dendrogram=False,              
    standard_scale='var',           
    figsize=(2, 6),
    show=False
)

In [None]:
sc.pl.matrixplot(
    msc, var_names='CXCL12', groupby=["Collection"],
    dendrogram=False,              
    standard_scale='var',           
    figsize=(2, 2),
    show=False
)

In [None]:

subset_key = "ct"       
collection_key = "Collection" 
upn_key = "DI_UPN"
gene = "CXCR4"

# get expr vector
x = bpc_paired[:, gene].X
x = np.ravel(x.toarray()) if not isinstance(x, np.ndarray) else np.ravel(x)

df = bpc_paired.obs[[subset_key, collection_key, upn_key]].copy()
df["expr"] = x

target_subsets = ["Early B", "Mature B", "PC"]
df = df[df[subset_key].isin(target_subsets)]

# get average expression per UPN × Collection × Subset
mean_expr = (
    df.groupby([upn_key, collection_key, subset_key], observed=True)
      .agg(mean_expr=("expr", "mean"))
      .reset_index()
)

# pivot to wide
wide = mean_expr.pivot_table(
    index=upn_key,
    columns=[collection_key, subset_key],
    values="mean_expr"
)
diff = pd.DataFrame(index=wide.index)

for ss in target_subsets:
    diff[ss] = wide[("PT", ss)] - wide[("NDMM", ss)]


plt.figure(figsize=(6, 4))
sns.heatmap(
    diff,
    cmap="coolwarm",
    center=0,
    linewidths=0.5,
    linecolor="gray",
    annot=False,
    fmt=".2f"
)
plt.title("CXCR4: PT – NDMM difference\n(Early B / Mature B / PC)")
plt.ylabel("UPN")
plt.xlabel("Subset")
plt.tight_layout()


plt.savefig("CXCR4_paired_difference_PT-NDMM.pdf", bbox_inches="tight")

In [None]:
msc_paired = msc[msc.obs["UPN"].isin(paired_upns)].copy()

subset_key = "ct"       
collection_key = "Collection" 
upn_key = "DI_UPN"
gene = "CXCL12"

# get expr vector
x = msc_paired[:, gene].X
x = np.ravel(x.toarray()) if not isinstance(x, np.ndarray) else np.ravel(x)

df = msc_paired.obs[[subset_key, collection_key, upn_key]].copy()
df["expr"] = x

target_subsets = ["MSC"]
df = df[df[subset_key].isin(target_subsets)]

# get average expression per UPN × Collection × Subset
mean_expr = (
    df.groupby([upn_key, collection_key, subset_key], observed=True)
      .agg(mean_expr=("expr", "mean"))
      .reset_index()
)

# pivot to wide
wide = mean_expr.pivot_table(
    index=upn_key,
    columns=[collection_key, subset_key],
    values="mean_expr"
)
diff = pd.DataFrame(index=wide.index)

for ss in target_subsets:
    diff[ss] = wide[("PT", ss)] - wide[("NDMM", ss)]


plt.figure(figsize=(3, 4))
sns.heatmap(
    diff,
    cmap="coolwarm",
    center=0,
    linewidths=0.5,
    linecolor="gray",
    annot=False,
    fmt=".2f"
)
plt.title("CXCL12: PT – NDMM difference\n(MSC)")
plt.ylabel("UPN")
plt.xlabel("Subset")
plt.tight_layout()


plt.savefig("CXCL12_paired_difference_PT-NDMM.pdf", bbox_inches="tight")

In [None]:
# check B/PC expressing CXCR4 
adata = bpc.copy()
gene = "CXCR4"
threshold = 0

Xg = adata[:, gene].X
if sparse.issparse(Xg):
    Xg = Xg.toarray()
expr = np.ravel(Xg)

adata.obs["gene_pos"] = (expr > threshold).astype(int)

# get fraction positive per UPN × Collection × RN
df = (
    adata.obs
    .groupby(["UPN", "Collection", "ct"], observed=True)
    .agg(
        n_cells=("gene_pos", "size"),
        n_pos=("gene_pos", "sum")
    )
    .reset_index()
)

df["frac_pos"] = df["n_pos"] / df["n_cells"]


collection_order = ["NBM", "NDMM", "PT"]
cts = sorted(df["ct"].unique())
pdf_path = "CXCR4_bpc_fracPos_by_ct_groupedBoxplot.pdf"

with PdfPages(pdf_path) as pdf:
    plt.figure(figsize=(3,4))
    ax = sns.boxplot(
        data= remove_outliers_iqr(df, col='frac_pos'),
        x="ct",
        y="frac_pos",
        order=['Early B', 'Mature B', 'PC'],
        hue="Collection",
        hue_order=collection_order,
        palette=timecols,
        fliersize=0
    )
    sns.stripplot(
        data=remove_outliers_iqr(df, col='frac_pos'),
        x="ct",
        y="frac_pos",
        hue="Collection",
        order=['Early B', 'Mature B', 'PC'],
        hue_order=collection_order,
        dodge=True,
        alpha=0.7,
        size=2,
        palette="dark:black"
    )

    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles[:3], labels[:3], title="Collection")

    pairs = []
    for ct in cts:
        for i in range(len(collection_order)):
            for j in range(i+1, len(collection_order)):
                pairs.append(((ct, collection_order[i]), (ct, collection_order[j])))

    annot = Annotator(
        ax,
        pairs,
        data=df,
        x="ct",
        y="frac_pos",
        hue="Collection",
        order=['Early B', 'Mature B', 'PC'],
        hue_order=collection_order
    )
    annot.configure(
        test="Mann-Whitney",
        text_format="star",
        comparisons_correction='BH',
        hide_non_significant=True,
        loc="inside",
        line_height=0.03,
        line_offset=0.02
    )
    annot.apply_and_annotate()

    plt.title(f'{gene}+ %')
    plt.tight_layout()
    pdf.savefig()
    plt.close()

In [None]:
adata = bpc[bpc.obs['ct'].isin(['Early B', 'Mature B'])].copy()
gene = "CXCR4"
threshold = 0

Xg = adata[:, gene].X
if sparse.issparse(Xg):
    Xg = Xg.toarray()
expr = np.ravel(Xg)

adata.obs["gene_pos"] = (expr > threshold).astype(int)

# get fraction positive per UPN × Collection × RN
df = (
    adata.obs
    .groupby(["UPN", "Collection", "rn"], observed=True)
    .agg(
        n_cells=("gene_pos", "size"),
        n_pos=("gene_pos", "sum")
    )
    .reset_index()
)

df["frac_pos"] = df["n_pos"] / df["n_cells"]


collection_order = ["NBM", "NDMM", "PT"]
rns = sorted(df["rn"].unique())
pdf_path = "CXCR4_b_fracPos_by_RN_groupedBoxplot.pdf"

with PdfPages(pdf_path) as pdf:
    plt.figure(figsize=(10,5))
    ax = sns.boxplot(
        data= remove_outliers_iqr(df, col='frac_pos'),
        x="rn",
        y="frac_pos",
        hue="Collection",
        hue_order=collection_order,
        palette=timecols,
        fliersize=0
    )
    sns.stripplot(
        data=remove_outliers_iqr(df, col='frac_pos'),
        x="rn",
        y="frac_pos",
        hue="Collection",
        hue_order=collection_order,
        dodge=True,
        alpha=0.7,
        size=2,
        palette="dark:black"
    )

    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles[:3], labels[:3], title="Collection")

    pairs = []
    for rn in rns:
        for i in range(len(collection_order)):
            for j in range(i+1, len(collection_order)):
                pairs.append(((rn, collection_order[i]), (rn, collection_order[j])))

    annot = Annotator(
        ax,
        pairs,
        data=df,
        x="rn",
        y="frac_pos",
        hue="Collection",
        order=rns,
        hue_order=collection_order
    )
    annot.configure(
        test="Mann-Whitney",
        text_format="star",
        comparisons_correction=None,
        hide_non_significant=True,
        loc="inside",
        line_height=0.03,
        line_offset=0.02
    )
    annot.apply_and_annotate()

    plt.title(f'{gene}+ of fraction of total B cells in radial neighborhoods')
    plt.tight_layout()
    pdf.savefig()
    plt.close()
