In [None]:
import os
from pathlib import Path
from typing import Annotated

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scanpy as sc
import seaborn as sns
import tifffile

from sklearn.cluster import KMeans
from skimage.color import label2rgb
from sklearn.neighbors import radius_neighbors_graph
from sklearn.neighbors import NearestNeighbors

from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import euclidean_distances
from scipy.stats import wilcoxon
from statsmodels.stats.multitest import multipletests
from scipy.stats import entropy, chi2_contingency
from matplotlib.backends.backend_pdf import PdfPages
from statannotations.Annotator import Annotator

from scipy import sparse

plt.rcParams['svg.fonttype'] = 'none'
plt.rcParams['pdf.fonttype'] = 42 #make text editable in pdf

os.chdir('/diskmnt/Projects/myeloma_scRNA_analysis/MMY_IRD/revision/merge/no_harmony/B')
os.getcwd()


In [None]:
adata = sc.read_h5ad('annotated.h5ad')

In [None]:
adata.obs['subset'].unique()

In [None]:
collection_order = ["NBM", "NDMM", "PT"] 
timecols = {"NBM": "#0C7515", "NDMM": "#E619B9", "PT": "#CF99C3"} 

In [None]:
#  Relabel subsets into Early vs Mature B 
obs = adata.obs.copy()
sub = obs["subset"].astype(str)
early_b = {"Pro/Pre B", "Immature B", "Transitional B"}
mature_b = {"Naive B", "Memory B"}
mapping = {**{k: "Early B" for k in early_b},
           **{k: "Mature B" for k in mature_b}}

obs["B_stage"] = sub.map(mapping)
obs["B_stage"] =obs["B_stage"].astype(str)

In [None]:
counts = (
    obs.groupby(["UPN", "Collection", "B_stage"], observed=True)
        .size().rename("n")
        .reset_index()
    .pivot_table(index=["UPN", "Collection"], columns="B_stage", values="n", fill_value=0)
    .reset_index()
)


# ratio Early / Mature (add small pseudocount to avoid division by zero)
counts["ratio_early_to_mature"] = (counts["Early B"] + 1e-1) / (counts["Mature B"] + 1e-1)
counts["log2_ratio"] = np.log2(counts["ratio_early_to_mature"])
counts["Collection"] = pd.Categorical(counts["Collection"], categories=collection_order, ordered=True)
counts = counts.sort_values(["UPN", "Collection"]).reset_index(drop=True)
counts["ln_ratio"] = np.log((counts["Early B"] + 1e-1) / (counts["Mature B"] + 1e-1))
counts

In [None]:
paired = (counts.pivot(index="UPN", columns="Collection", values="ln_ratio")
                .reindex(columns=["NDMM", "PT"]))
paired = paired.dropna()  # keep UPNs with both timepoints

paired_long = (
    paired.reset_index()
               .melt(id_vars="UPN", value_vars=["NDMM", "PT"],
                     var_name="Collection", value_name="ln_ratio")
)
paired_long["Collection"] = pd.Categorical(
    paired_long["Collection"], categories=["NDMM", "PT"], ordered=True
)
paired_long.head()

In [None]:

pdf_path = "pairedNDMMtoPT_EarlyMatureB_ratio_ln.pdf"

with PdfPages(pdf_path) as pdf:
    plt.figure(figsize=(2, 4))
    ax = sns.boxplot(
        data=paired_long, palette=timecols,
        x="Collection", y="ln_ratio",
        color="white", fliersize=0
    )
    sns.stripplot(
        data=paired_long,
        x="Collection", y="ln_ratio", color='black', alpha=1, size=2
    )

    # paired lines
    for upn, g in paired_long.groupby("UPN"):
        plt.plot(g["Collection"], g["ln_ratio"], color="lightgray",
                 linewidth=1, alpha=0.8, zorder=1)

    # stats annotation
    pairs = [("NDMM", "PT")]
    annot = Annotator(
        ax, pairs, data=paired_long,
        x="Collection", y="ln_ratio", order=["NDMM", "PT"]
    )
    annot.configure(
        test="Wilcoxon",
        text_format="star",
        loc="inside",
        comparisons_correction=None,
        line_height=0.03,
        line_offset=0.02
    )
    annot.apply_and_annotate()

    plt.ylabel("Early/Mature B ratio (ln)")
    plt.title("Paired change NDMM â†’ PT")
    sns.despine()
    plt.tight_layout()
    pdf.savefig()   # save the current figure to the PDF
    plt.close()     # close figure to avoid showing multiple copies
