In [None]:
import os
from pathlib import Path
from typing import Annotated

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scanpy as sc
import seaborn as sns
import tifffile

from sklearn.cluster import KMeans
from skimage.color import label2rgb
from sklearn.neighbors import radius_neighbors_graph
from sklearn.neighbors import NearestNeighbors

from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import euclidean_distances
from scipy.stats import wilcoxon
from statsmodels.stats.multitest import multipletests
from scipy.stats import entropy, chi2_contingency
from matplotlib.backends.backend_pdf import PdfPages
from statannotations.Annotator import Annotator

from scipy import sparse

plt.rcParams['svg.fonttype'] = 'none'
plt.rcParams['pdf.fonttype'] = 42 #make text editable in pdf
os.chdir('/diskmnt/Projects/myeloma_scRNA_analysis/MMY_IRD/Xenium/analysis/compare_ct_abundance')
os.getcwd()


In [None]:
adata = sc.read_h5ad('/diskmnt/Projects/myeloma_scRNA_analysis/MMY_IRD/Xenium/analysis/merged.h5ad')


In [None]:
pt = adata[adata.obs['Collection']=='PT']
counts = (
    pt.obs.groupby(["UPN", "ct"], observed=False)
    .size()
    .reset_index(name="n_cells")
)

wide = counts.pivot_table(
    index="UPN",
    columns="ct",
    values="n_cells",
    fill_value=0
)

wide = wide.div(wide.sum(axis=1), axis=0)
corr = wide.corr(method="spearman")


g = sns.clustermap(
    corr,
    cmap="vlag",
    center=0,
    linewidths=0.5,
    figsize=(8, 8)
)
g.savefig("PT_ct_correlation_clustermap.pdf", dpi=300, bbox_inches="tight")
plt.close(g.fig)


import numpy as np
import pandas as pd
from scipy.stats import spearmanr

df=wide
pval = pd.DataFrame(np.ones_like(corr), index=corr.index, columns=corr.columns)

for i in corr.columns:
    for j in corr.columns:
        if i != j:
            _, p = spearmanr(df[i], df[j])
            pval.loc[i, j] = p
        else:
            pval.loc[i, j] = 0  
            
from statsmodels.stats.multitest import multipletests

pvals_flat = pval.values.flatten()
_, pvals_adj, _, _ = multipletests(pvals_flat, method='fdr_bh')

pval_adj = pd.DataFrame(
    pvals_adj.reshape(pval.shape),
    index=pval.index,
    columns=pval.columns
)
sig_mask = pval_adj < 0.05

plt.figure(figsize=(6,6))
sns.clustermap(
    corr, cmap="vlag", center=0, square=True, 
    mask=~sig_mask,  # show only significant cells
    linewidths=0.5, cbar_kws={"label":"Spearman ρ"}
)
plt.title("Significant correlations (FDR < 0.05)")
plt.tight_layout()
plt.savefig("PT_ct_correlation_significantonly.pdf", dpi=300, bbox_inches="tight")


In [None]:
pt = adata[adata.obs['Collection']=='NDMM']
counts = (
    pt.obs.groupby(["UPN", "ct"], observed=False)
    .size()
    .reset_index(name="n_cells")
)

wide = counts.pivot_table(
    index="UPN",
    columns="ct",
    values="n_cells",
    fill_value=0
)

wide = wide.div(wide.sum(axis=1), axis=0)
corr = wide.corr(method="spearman")


g = sns.clustermap(
    corr,
    cmap="vlag",
    center=0,
    linewidths=0.5,
    figsize=(8, 8)
)
g.savefig("NDMM_ct_correlation_clustermap.pdf", dpi=300, bbox_inches="tight")
plt.close(g.fig)


import numpy as np
import pandas as pd
from scipy.stats import spearmanr

df=wide
pval = pd.DataFrame(np.ones_like(corr), index=corr.index, columns=corr.columns)

for i in corr.columns:
    for j in corr.columns:
        if i != j:
            _, p = spearmanr(df[i], df[j])
            pval.loc[i, j] = p
        else:
            pval.loc[i, j] = 0  
            
from statsmodels.stats.multitest import multipletests

pvals_flat = pval.values.flatten()
_, pvals_adj, _, _ = multipletests(pvals_flat, method='fdr_bh')

pval_adj = pd.DataFrame(
    pvals_adj.reshape(pval.shape),
    index=pval.index,
    columns=pval.columns
)
sig_mask = pval_adj < 0.05

plt.figure(figsize=(6,6))
sns.clustermap(
    corr, cmap="vlag", center=0, square=True, 
    mask=~sig_mask,  # show only significant cells
    linewidths=0.5, cbar_kws={"label":"Spearman ρ"}
)
plt.title("Significant correlations (FDR < 0.05)")
plt.tight_layout()
plt.savefig("NDMM_ct_correlation_significantonly.pdf", dpi=300, bbox_inches="tight")


In [None]:
pt = adata[adata.obs['Collection']=='NBM']
counts = (
    pt.obs.groupby(["UPN", "ct"], observed=False)
    .size()
    .reset_index(name="n_cells")
)

wide = counts.pivot_table(
    index="UPN",
    columns="ct",
    values="n_cells",
    fill_value=0
)

wide = wide.div(wide.sum(axis=1), axis=0)
corr = wide.corr(method="spearman")


g = sns.clustermap(
    corr,
    cmap="vlag",
    center=0,
    linewidths=0.5,
    figsize=(8, 8)
)
g.savefig("NBM_ct_correlation_clustermap.pdf", dpi=300, bbox_inches="tight")
plt.close(g.fig)


import numpy as np
import pandas as pd
from scipy.stats import spearmanr

df=wide
pval = pd.DataFrame(np.ones_like(corr), index=corr.index, columns=corr.columns)

for i in corr.columns:
    for j in corr.columns:
        if i != j:
            _, p = spearmanr(df[i], df[j])
            pval.loc[i, j] = p
        else:
            pval.loc[i, j] = 0  
            
from statsmodels.stats.multitest import multipletests

pvals_flat = pval.values.flatten()
_, pvals_adj, _, _ = multipletests(pvals_flat, method='fdr_bh')

pval_adj = pd.DataFrame(
    pvals_adj.reshape(pval.shape),
    index=pval.index,
    columns=pval.columns
)
sig_mask = pval_adj < 0.05

plt.figure(figsize=(6,6))
sns.clustermap(
    corr, cmap="vlag", center=0, square=True, 
    mask=~sig_mask,  # show only significant cells
    linewidths=0.5, cbar_kws={"label":"Spearman ρ"}
)
plt.title("Significant correlations (FDR < 0.05)")
plt.tight_layout()
plt.savefig("NBM_ct_correlation_significantonly.pdf", dpi=300, bbox_inches="tight")
