In [None]:
import os
from pathlib import Path
from typing import Annotated

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scanpy as sc
import seaborn as sns
import tifffile

from sklearn.cluster import KMeans
from skimage.color import label2rgb
from sklearn.neighbors import radius_neighbors_graph
from sklearn.neighbors import NearestNeighbors

from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import euclidean_distances
from scipy.stats import wilcoxon
from statsmodels.stats.multitest import multipletests
from scipy.stats import entropy, chi2_contingency
from matplotlib.backends.backend_pdf import PdfPages
from statannotations.Annotator import Annotator

from scipy import sparse

plt.rcParams['svg.fonttype'] = 'none'
plt.rcParams['pdf.fonttype'] = 42 #make text editable in pdf

os.chdir('/diskmnt/Projects/myeloma_scRNA_analysis/MMY_IRD/revision/merge/compare_celltype_abundance/')
os.getcwd()

In [None]:
adata = sc.read_h5ad('/diskmnt/Projects/myeloma_scRNA_analysis/MMY_IRD/revision/merge/no_harmony/combined_cleaned.h5ad')

In [None]:
pt = adata[adata.obs['Collection']=='PT']
counts = (
    pt.obs.groupby(["UPN", "subset"], observed=False)
    .size()
    .reset_index(name="n_cells")
)

wide = counts.pivot_table(
    index="UPN",
    columns="subset",
    values="n_cells",
    fill_value=0
)

wide = wide.div(wide.sum(axis=1), axis=0)
corr = wide.corr(method="spearman")


g = sns.clustermap(
    corr,
    cmap="vlag",
    center=0,
    linewidths=0.5,
    figsize=(8, 8)
)
g.savefig("PT_subset_correlation_clustermap.pdf", dpi=300, bbox_inches="tight")
plt.close(g.fig)


import numpy as np
import pandas as pd
from scipy.stats import spearmanr

df=wide
pval = pd.DataFrame(np.ones_like(corr), index=corr.index, columns=corr.columns)

for i in corr.columns:
    for j in corr.columns:
        if i != j:
            _, p = spearmanr(df[i], df[j])
            pval.loc[i, j] = p
        else:
            pval.loc[i, j] = 0  
            
from statsmodels.stats.multitest import multipletests

pvals_flat = pval.values.flatten()
_, pvals_adj, _, _ = multipletests(pvals_flat, method='fdr_bh')

pval_adj = pd.DataFrame(
    pvals_adj.reshape(pval.shape),
    index=pval.index,
    columns=pval.columns
)
sig_mask = pval_adj < 0.05

plt.figure(figsize=(6,6))
sns.clustermap(
    corr, cmap="vlag", center=0, square=True, 
    mask=~sig_mask,  # show only significant cells
    linewidths=0.5, cbar_kws={"label":"Spearman ρ"}
)
plt.title("Significant correlations (FDR < 0.05)")
plt.tight_layout()
plt.savefig("PT_subset_correlation_significantonly.pdf", dpi=300, bbox_inches="tight")


In [None]:
pt = adata[adata.obs['Collection']=='PT']
counts = (
    pt.obs.groupby(["UPN", "lin"], observed=False)
    .size()
    .reset_index(name="n_cells")
)

wide = counts.pivot_table(
    index="UPN",
    columns="lin",
    values="n_cells",
    fill_value=0
)

wide = wide.div(wide.sum(axis=1), axis=0)
corr = wide.corr(method="spearman")


g = sns.clustermap(
    corr,
    cmap="vlag",
    center=0,
    linewidths=0.5,
    figsize=(8, 8)
)
g.savefig("PT_lin_correlation_clustermap.pdf", dpi=300, bbox_inches="tight")
plt.close(g.fig)


In [None]:
ndmm = adata[adata.obs['Collection']=='NDMM']
counts = (
    ndmm.obs.groupby(["UPN", "subset"], observed=False)
    .size()
    .reset_index(name="n_cells")
)

wide = counts.pivot_table(
    index="UPN",
    columns="subset",
    values="n_cells",
    fill_value=0
)

wide = wide.div(wide.sum(axis=1), axis=0)
corr = wide.corr(method="spearman")


g = sns.clustermap(
    corr,
    cmap="vlag",
    center=0,
    linewidths=0.5,
    figsize=(8, 8)
)
g.savefig("NDMM_subset_correlation_clustermap.pdf", dpi=300, bbox_inches="tight")
plt.close(g.fig)


In [None]:
nbm = adata[adata.obs['Collection']=='NBM']
counts = (
    nbm.obs.groupby(["UPN", "subset"], observed=False)
    .size()
    .reset_index(name="n_cells")
)

wide = counts.pivot_table(
    index="UPN",
    columns="subset",
    values="n_cells",
    fill_value=0
)

wide = wide.div(wide.sum(axis=1), axis=0)
corr = wide.corr(method="spearman")


g = sns.clustermap(
    corr,
    cmap="vlag",
    center=0,
    linewidths=0.5,
    figsize=(8, 8)
)
g.savefig("NBM_subset_correlation_clustermap.pdf", dpi=300, bbox_inches="tight")
plt.close(g.fig)


In [None]:
# correlate changes in paired samples 

obs = adata.obs[['UPN_Collection', 'UPN', 'Collection', 'subset']]

obs=obs[obs['subset'] != 'PC']
obs['subset'] = obs['subset'].cat.remove_unused_categories()

# Compute per-UPN fractions of each lin
counts = (
    obs.groupby(["UPN_Collection", "subset"], observed=False)
        .size()
        .reset_index(name="n_cells")
)

totals = (
    counts.groupby(["UPN_Collection"], observed=True)["n_cells"]
          .sum()
          .reset_index(name="total_cells")
)

frac_df = counts.merge(totals, on=["UPN_Collection"])
frac_df["frac_subset"] = frac_df["n_cells"] / frac_df["total_cells"]
frac_df[['UPN', 'Collection']] = frac_df['UPN_Collection'].str.split('|', expand=True)

paired_upns = (
    frac_df[frac_df["Collection"].isin(["NDMM", "PT"])]
    .groupby("UPN", observed=True)["Collection"]
    .nunique()
)
paired_upns = paired_upns[paired_upns == 2].index.tolist()

paired = frac_df[frac_df["UPN"].isin(paired_upns) &
                 frac_df["Collection"].isin(["NDMM", "PT"])]
wide = (
    paired.pivot_table(
        index=["UPN", "subset"],
        columns="Collection",
        values="frac_subset",
        fill_value=0
    )
)
wide["delta"] = wide["PT"] - wide["NDMM"]
delta_wide = wide["delta"].unstack(fill_value=0)
corr_delta = delta_wide.corr(method="spearman")  # or 'pearson'

g = sns.clustermap(
    corr_delta,
    cmap="vlag",
    center=0,
    linewidths=0.5,
    figsize=(8,8)
)
g.fig.suptitle("Correlation of Change (PT − NDMM) in subset fractions", y=1.02)
g.savefig("PT_vs_NDMM_delta_fraction_correlation_clustermap.pdf", dpi=300, bbox_inches="tight")
plt.close(g.fig)


In [None]:
import pandas as pd
delta_mean = (
    wide['delta']
    .groupby('subset')
    .mean()
    .sort_values(ascending=False)
)
delta_mean

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 2))
sns.heatmap(
    delta_mean.to_frame().T,
    cmap='coolwarm',
    center=0,
    linewidths=0.5,
    cbar_kws={'label': 'Mean delta (PT − NDMM)'},
    #annot=True,
    #fmt=".3f"
)
plt.title("Average change in subset fractions (PT − NDMM)")
plt.yticks([])  # hide single y tick
plt.tight_layout()
plt.savefig("mean_delta_heatmap.pdf", dpi=300, bbox_inches="tight")
plt.close()

In [None]:

df=delta_wide
corr=corr_delta
pval = pd.DataFrame(np.ones_like(corr), index=corr.index, columns=corr.columns)

for i in corr.columns:
    for j in corr.columns:
        if i != j:
            _, p = spearmanr(df[i], df[j])
            pval.loc[i, j] = p
        else:
            pval.loc[i, j] = 0  
            
from statsmodels.stats.multitest import multipletests

pvals_flat = pval.values.flatten()
_, pvals_adj, _, _ = multipletests(pvals_flat, method='fdr_bh')

pval_adj = pd.DataFrame(
    pvals_adj.reshape(pval.shape),
    index=pval.index,
    columns=pval.columns
)
sig_mask = pval_adj < 0.05

plt.figure(figsize=(6,6))
sns.clustermap(
    corr, cmap="vlag", center=0, square=True, 
    mask=~sig_mask,  # show only significant cells
    linewidths=0.5, cbar_kws={"label":"Spearman ρ"}
)
plt.title("Significant correlations (FDR < 0.05)")
plt.tight_layout()
plt.savefig("delta_correlation_significantonly.pdf", dpi=300, bbox_inches="tight")


delta_pct = (delta_mean * 100).round(1)
print(delta_pct)

In [None]:
'''
cd4t **
cd14 mc *
neutro *
propre **
imm ***
trans ***
cdc *
mem **