In [None]:
import os
from pathlib import Path
from typing import Annotated

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scanpy as sc
import seaborn as sns
import tifffile

from sklearn.cluster import KMeans
from skimage.color import label2rgb
from sklearn.neighbors import radius_neighbors_graph
from sklearn.neighbors import NearestNeighbors

from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import euclidean_distances
from scipy.stats import wilcoxon
from statsmodels.stats.multitest import multipletests
from scipy.stats import entropy, chi2_contingency
from matplotlib.backends.backend_pdf import PdfPages
from statannotations.Annotator import Annotator

from scipy import sparse

plt.rcParams['svg.fonttype'] = 'none'
plt.rcParams['pdf.fonttype'] = 42 #make text editable in pdf

os.chdir('/diskmnt/Projects/myeloma_scRNA_analysis/MMY_IRD/Xenium/analysis/lineage_specific_analysis/B/')
os.getcwd()


In [None]:
merged = sc.read_h5ad("/diskmnt/Projects/myeloma_scRNA_analysis/MMY_IRD/Xenium/analysis/radial_neighborhoods/Output/merged_RN.h5ad")

In [None]:
# make a new row combining UPN and collection
merged.obs['UPN_Collection'] = merged.obs['UPN'].astype('string').str.cat(merged.obs['Collection'].astype('string'), sep='_', na_rep='')

sample_to_collection = merged.obs.set_index('Sample')['Collection'].to_dict()
uc_to_collection = merged.obs.set_index('UPN_Collection')['Collection'].to_dict()
uc_to_upn =  merged.obs.set_index('UPN_Collection')['UPN'].to_dict()

In [None]:
merged.obs['ct'].value_counts()

In [None]:
collection_order = ["NBM", "NDMM", "PT"] 
timecols = {"NBM": "#0C7515", "NDMM": "#E619B9", "PT": "#CF99C3"} 

In [None]:
adata = merged[merged.obs['ct'].isin(['Early B', 'Mature B'])].copy()
obs = adata.obs.copy()
counts = (
    obs.groupby(["UPN", "Collection", "ct"], observed=True)
        .size().rename("n")
        .reset_index()
    .pivot_table(index=["UPN", "Collection"], columns="ct", values="n", fill_value=0)
    .reset_index()
)

# ratio Early / Mature (add small pseudocount to avoid division by zero)
counts["ratio_early_to_mature"] = (counts["Early B"] + 1e-1) / (counts["Mature B"] + 1e-1)
counts["log2_ratio"] = np.log2(counts["ratio_early_to_mature"])
counts["Collection"] = pd.Categorical(counts["Collection"], categories=collection_order, ordered=True)
counts = counts.sort_values(["UPN", "Collection"]).reset_index(drop=True)
counts["ln_ratio"] = np.log((counts["Early B"] + 1e-1) / (counts["Mature B"] + 1e-1))
counts

In [None]:
paired = (counts.pivot(index="UPN", columns="Collection", values="ln_ratio")
                .reindex(columns=["NDMM", "PT"]))
paired = paired.dropna()  # keep UPNs with both timepoints

paired_long = (
    paired.reset_index()
               .melt(id_vars="UPN", value_vars=["NDMM", "PT"],
                     var_name="Collection", value_name="ln_ratio")
)
paired_long["Collection"] = pd.Categorical(
    paired_long["Collection"], categories=["NDMM", "PT"], ordered=True
)
paired_long

In [None]:
pdf_path = "pairedNDMMtoPT_EarlyMatureB_ratio_ln_xenium.pdf"

with PdfPages(pdf_path) as pdf:
    plt.figure(figsize=(2, 4))
    ax = sns.boxplot(
        data=paired_long, palette=timecols,
        x="Collection", y="ln_ratio",
        color="white", fliersize=0
    )
    sns.stripplot(
        data=paired_long,
        x="Collection", y="ln_ratio", color='black', alpha=1, size=2
    )

    # paired lines
    for upn, g in paired_long.groupby("UPN"):
        plt.plot(g["Collection"], g["ln_ratio"], color="lightgray",
                 linewidth=1, alpha=0.8, zorder=1)
    pairs = [("NDMM", "PT")]
    annot = Annotator(
        ax, pairs, data=paired_long,
        x="Collection", y="ln_ratio", order=["NDMM", "PT"]
    )
    annot.configure(
        test="Wilcoxon",
        text_format="star",
        loc="inside",
        comparisons_correction=None,
        line_height=0.03,
        line_offset=0.02
    )
    annot.apply_and_annotate()

    plt.ylabel("Early/Mature B ratio (ln)")
    plt.title("Paired change NDMM to PT")
    sns.despine()
    plt.tight_layout()
    pdf.savefig()  
    plt.close()   

In [None]:
# compare % of SOX4+ B cells 

genes = ["SOX4", "MS4A1", "DNTT"]  
collection_order = ["NBM", "NDMM", "PT"]     
pdf_path = "gene_positive_fraction_B_boxplots.pdf"
threshold = 0     

def compute_positive_fraction(adata, gene, threshold=0):
    x = adata[:, gene].X
    x = x.toarray().ravel()
    pos = x > threshold
    
    needed_cols = ["Collection", "UPN"]
    obs_cols = [c for c in needed_cols if c in adata.obs.columns]
    df = adata.obs[obs_cols].copy()
    df["pos"] = pos
    group_cols = [col for col in ["UPN", "Collection"] if col in df.columns]

    out = (
        df.groupby(group_cols, observed=True)
          .agg(frac_pos=("pos", "mean"), n_cells=("pos", "size"))
          .reset_index()
    )
    out["gene"] = gene
    return out

all_results = []
for gene in genes:
    df_gene = compute_positive_fraction(adata, gene, threshold=threshold)
    all_results.append(df_gene)

df_all = pd.concat(all_results, ignore_index=True)

with PdfPages(pdf_path) as pdf:
    for gene in genes:
        sub = df_all[df_all["gene"] == gene].copy()

        plt.figure(figsize=(2, 4))
        ax = sns.boxplot(
            data=sub,
            x="Collection",
            y="frac_pos", palette=timecols,
            order=collection_order,
            fliersize=0
        )
        sns.stripplot(
            data=sub,
            x="Collection",
            y="frac_pos",
            order=collection_order,
            color="black",
            size=3,
            alpha=0.8,
            jitter=True
        )
        pairs = []
        # generate all ordered pairs in collection_order
        for i in range(len(collection_order)):
            for j in range(i+1, len(collection_order)):
                pairs.append((collection_order[i], collection_order[j]))

        annot = Annotator(
            ax, pairs, data=sub,
            x="Collection", y="frac_pos",
            order=collection_order
        )
        annot.configure(
            test="Mann-Whitney",
            text_format="star",
            comparisons_correction=None,
            loc="inside",
            line_height=0.03,
            line_offset=0.02
        )
        annot.apply_and_annotate()

        plt.title(f"{gene}")
        plt.ylabel("Fraction positive B cells")
        plt.xlabel("Collection")
        plt.tight_layout()
        pdf.savefig()
        plt.close()


In [None]:
'''
"Early B": [
       "DNTT","IL7R", "CD19", "SOX4", "TCL1A", "CD79A","LY6D", "CD93",   
    ],
    "Mature B":[
        "PTPRC", "MS4A1", "BANK1", "PLCG2"
    ],
'''

gene_groups = {
    "Proliferation": [
        "MKI67"
    ],
    "Costimulation": [
         "CD69", "CD27",  "CD86", "SLAMF1", "CD70","CD83"
    ],
    "Ag Presentation": [
        "HLA-DQB2"
    ],
    "Chemotaxis": [
       "CXCR4", "RGS16", "CCR7", "GPR183", "SELL"
    ],
    "TLR modulator": [
        "LY86"
    ],
    "GC regulation": [
       "SPI1", "SPIB", "MEF2C", "IRF8", 
    ],
    "CSR/SHM": [
         "TP53BP1","BCL2L11"
    ],
    "BAFF/APRIL sig.": [
        "NFKB1", "TNFRSF13B"
    ],
    
}

In [None]:
early_eo = adata[(adata.obs['ct'] == "Early B") & (adata.obs['DI_Sample']!='P149_T0_S1')].copy()
mature_eo = adata[(adata.obs['ct'] == "Mature B") & (adata.obs['DI_Sample']!='P149_T0_S1')].copy()

sc.pl.matrixplot(
    mature_eo, var_names=gene_groups, groupby="Collection",
    dendrogram=False,              
    standard_scale='var',           
    figsize=(10, 1),
    show=False
)

plt.savefig("matureB_sharedGenes_mtxPlot_exclOutlier.pdf", bbox_inches="tight")

sc.pl.matrixplot(
    early_eo, var_names=gene_groups, groupby="Collection",
    dendrogram=False,              
    standard_scale='var',           
    figsize=(10, 1),
    show=False
)

plt.savefig("earlyB_sharedGenes_mtxPlot_exclOutlier.pdf", bbox_inches="tight")