In [None]:
import anndata
import scanpy as sc
import pandas as pd
import glob
import skimage
import re
import scanpy.external as sce
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from pathlib import Path
from matplotlib.backends.backend_pdf import PdfPages
from statsmodels.stats.multitest import multipletests
from scipy.stats import wilcoxon
from scipy.stats import mannwhitneyu

from scipy import sparse

from pydeseq2.dds import DeseqDataSet
from pydeseq2.ds import DeseqStats

plt.rcParams['svg.fonttype'] = 'none'
plt.rcParams['pdf.fonttype'] = 42 #make text editable in pdf

import os

# Set working directory
#-----------
lin='Mye'
#-----------
os.chdir(f"/diskmnt/Projects/myeloma_scRNA_analysis/MMY_IRD/revision/merge/no_harmony/{lin}")

# Verify current working directory
print(os.getcwd())


In [None]:
adata = sc.read_h5ad("annotated.h5ad")

In [None]:
adata.X = adata.layers['counts'].copy()
sc.pp.normalize_total(adata, target_sum=1e4, exclude_highly_expressed=False)
sc.pp.log1p(adata)
adata.X.max()

In [None]:
adata.layers['normalized'].max()

In [None]:
adata.write("annotated.h5ad")

In [None]:
# get scores for hallmark IFNG and inflammation
inflam =["ABCA1","ABI1","ACVR1B","ACVR2A","ADGRE1","ADM","ADORA2B","ADRM1","AHR","APLNR","AQP9","ATP2A2","ATP2B1","ATP2C1","AXL","BDKRB1","BEST1","BST2","BTG2","C3AR1","C5AR1","CALCRL","CCL17","CCL2","CCL20","CCL22","CCL24","CCL5","CCL7","CCR7","CCRL2","CD14","CD40","CD48","CD55","CD69","CD70","CD82","CDKN1A","CHST2","CLEC5A","CMKLR1","CSF1","CSF3","CSF3R","CX3CL1","CXCL10","CXCL11","CXCL6","CXCL8","CXCL9","CXCR6","CYBB","DCBLD2","EBI3","EDN1","EIF2AK2","EMP3","EREG","F3","FFAR2","FPR1","FZD5","GABBR1","GCH1","GNA15","GNAI3","GP1BA","GPC3","GPR132","GPR183","HAS2","HBEGF","HIF1A","HPN","HRH1","ICAM1","ICAM4","ICOSLG","IFITM1","IFNAR1","IFNGR2","IL10","IL10RA","IL12B","IL15","IL15RA","IL18","IL18R1","IL18RAP","IL1A","IL1B","IL1R1","IL2RB","IL4R","IL6","IL7R","INHBA","IRAK2","IRF1","IRF7","ITGA5","ITGB3","ITGB8","KCNA3","KCNJ2","KCNMB2","KIF1B","KLF6","LAMP3","LCK","LCP2","LDLR","LIF","LPAR1","LTA","LY6E","LYN","MARCO","MEFV","MEP1A","MET","MMP14","MSR1","MXD1","MYC","NAMPT","NDP","NFKB1","NFKBIA","NLRP3","NMI","NMUR1","NOD2","NPFFR2","OLR1","OPRK1","OSM","OSMR","P2RX4","P2RX7","P2RY2","PCDH7","PDE4B","PDPN","PIK3R5","PLAUR","PROK2","PSEN1","PTAFR","PTGER2","PTGER4","PTGIR","PTPRE","PVR","RAF1","RASGRP1","RELA","RGS1","RGS16","RHOG","RIPK2","RNF144B","ROS1","RTP4","SCARF1","SCN1B","SELE","SELENOS","SELL","SEMA4D","SERPINE1","SGMS2","SLAMF1","SLC11A2","SLC1A2","SLC28A2","SLC31A1","SLC31A2","SLC4A4","SLC7A1","SLC7A2","SPHK1","SRI","STAB1","TACR1","TACR3","TAPBP","TIMP1","TLR1","TLR2","TLR3","TNFAIP6","TNFRSF1B","TNFRSF9","TNFSF10","TNFSF15","TNFSF9","TPBG","VIP"]
ifng =["ADAR","APOL6","ARID5B","ARL4A","AUTS2","B2M","BANK1","BATF2","BPGM","BST2","BTG1","C1R","C1S","CASP1","CASP3","CASP4","CASP7","CASP8","CCL2","CCL5","CCL7","CD274","CD38","CD40","CD69","CD74","CD86","CDKN1A","CFB","CFH","CIITA","CMKLR1","CMPK2","CMTR1","CSF2RB","CXCL10","CXCL11","CXCL9","DDX60","DHX58","EIF2AK2","EIF4E3","EPSTI1","FAS","FCGR1A","FGL2","FPR1","GBP4","GBP6","GCH1","GPR18","GZMA","HELZ2","HERC6","HIF1A","HLA-A","HLA-B","HLA-DMA","HLA-DQA1","HLA-DRB1","HLA-G","ICAM1","IDO1","IFI27","IFI30","IFI35","IFI44","IFI44L","IFIH1","IFIT1","IFIT2","IFIT3","IFITM2","IFITM3","IFNAR2","IL10RA","IL15","IL15RA","IL18BP","IL2RB","IL4R","IL6","IL7","IRF1","IRF2","IRF4","IRF5","IRF7","IRF8","IRF9","ISG15","ISG20","ISOC1","ITGB7","JAK2","KLRK1","LAP3","LATS2","LCP2","LGALS3BP","LY6E","LYSMD2","MARCHF1","METTL7B","MT2A","MTHFD2","MVP","MX1","MX2","MYD88","NAMPT","NCOA3","NFKB1","NFKBIA","NLRC5","NMI","NOD1","NUP93","OAS2","OAS3","OASL","OGFR","P2RY14","PARP12","PARP14","PDE4B","PELI1","PFKP","PIM1","PLA2G4A","PLSCR1","PML","PNP","PNPT1","PSMA2","PSMA3","PSMB10","PSMB2","PSMB8","PSMB9","PSME1","PSME2","PTGS2","PTPN1","PTPN2","PTPN6","RAPGEF6","RBCK1","RIGI","RIPK1","RIPK2","RNF213","RNF31","RSAD2","RTP4","SAMD9L","SAMHD1","SECTM1","SELP","SERPING1","SLAMF7","SLC25A28","SOCS1","SOCS3","SOD2","SP110","SPPL2A","SRI","SSPN","ST3GAL5","ST8SIA4","STAT1","STAT2","STAT3","STAT4","TAP1","TAPBP","TDRD7","TNFAIP2","TNFAIP3","TNFAIP6","TNFSF10","TOR1B","TRAFD1","TRIM14","TRIM21","TRIM25","TRIM26","TXNIP","UBE2L6","UPP1","USP18","VAMP5","VAMP8","VCAM1","WARS1","XAF1","XCL1","ZBP1","ZNFX1"]


In [None]:
sc.tl.score_genes(
    adata,
    inflam,
    score_name='inflam_score',
    random_state=0
)

In [None]:
sc.tl.score_genes(
    adata,
    ifng,
    score_name='ifng_score',
    random_state=0
)

In [None]:

sc.pl.violin(
    adata,
    keys="inflam_score",     # column in adata.obs
    groupby="Collection",       # categorical variable to split on
    #jitter=0.4,                 # add cell-level jitter points
    stripplot=False,
    rotation=90,                # rotate x labels
    show=True
)


In [None]:

sc.pl.violin(
    adata,
    keys="ifng_score",     # column in adata.obs
    groupby="Collection",       # categorical variable to split on
    #jitter=0.4,                 # add cell-level jitter points
    stripplot=False,
    rotation=90,                # rotate x labels
    show=True
)


In [None]:
# Pull needed columns from adata.obs
df = adata.obs[["UPN", "Collection", "inflam_score"]].copy()

# Compute average per UPN
avg = (
    df.groupby(["UPN", "Collection"], observed=True)["inflam_score"]
      .mean()
      .reset_index()
)

plt.figure(figsize=(6, 4))
sns.boxplot(
    data=avg,
    x="Collection",
    y="inflam_score",
    #palette="Set2",
    showfliers=False
)
sns.stripplot(
    data=avg,
    x="Collection",
    y="inflam_score",
    hue="Collection",
    #palette="dark:Set2",
    dodge=False,
    jitter=True,
    size=5,
    alpha=0.8
)

plt.title("Average Hallmark Inflammatory Score per UPN")
plt.ylabel("Mean per UPN")
plt.xlabel("Collection")
plt.legend([],[], frameon=False)
plt.tight_layout()
plt.show()

In [None]:
# Pull needed columns from adata.obs
df = adata.obs[["UPN", "Collection", "ifng_score"]].copy()

# Compute average per UPN
avg = (
    df.groupby(["UPN", "Collection"], observed=True)["ifng_score"]
      .mean()
      .reset_index()
)

plt.figure(figsize=(6, 4))
sns.boxplot(
    data=avg,
    x="Collection",
    y="ifng_score",
    #palette="Set2",
    showfliers=False
)
sns.stripplot(
    data=avg,
    x="Collection",
    y="ifng_score",
    hue="Collection",
    #palette="dark:Set2",
    dodge=False,
    jitter=True,
    size=5,
    alpha=0.8
)

plt.title("Average Hallmark IFNG Score per UPN")
plt.ylabel("Mean per UPN")
plt.xlabel("Collection")
plt.legend([],[], frameon=False)
plt.tight_layout()
plt.show()

In [None]:
timecols = {"NBM": "#0C7515", "NDMM": "#E619B9", "PT": "#CF99C3", "Relapse": '#a8009a'} 


In [None]:
from itertools import combinations
from statannotations.Annotator import Annotator


df = adata.obs[["UPN", "Collection", "subset", "inflam_score"]].copy()

avg = (
    df.groupby(["UPN", "Collection", "subset"], observed=True)["inflam_score"]
      .mean()
      .reset_index()
)

collection_order = ["NBM", "NDMM", "PT"]
comparisons = [("NBM", "NDMM"), ("NDMM", "PT"), ("NBM", "PT")]

out_pdf = "inflam_score_perUPN_boxplots_annotated.pdf"
with PdfPages(out_pdf) as pdf:
# plot for each subset
    for sb in avg["subset"].unique():
        sub_df = avg[avg["subset"] == sb]
    
        fig, ax = plt.subplots(figsize=(2,4))
        
        sns.boxplot(
            data=sub_df,
            x="Collection",
            y="inflam_score",
            #order=collection_order,
            palette=timecols,
            fliersize=0,
            linewidth=1,
            ax=ax
        )
        sns.stripplot(
            data=sub_df,
            x="Collection",
            y="inflam_score",
            #order=collection_order,
            color="black",
            size=3,
            jitter=True,
            alpha=0.8,
            ax=ax
        )
    
        annotator = Annotator(ax, pairs=comparisons, data=sub_df,
                              x="Collection", y="inflam_score", order=collection_order)
        annotator.configure(
            test='Mann-Whitney', 
            text_format='star',    
            loc='outside',          
            comparisons_correction="BH",
            hide_non_significant=True
        )
        annotator.apply_and_annotate()
    
        # ---- Style ----
        ax.set_title(f"Inflammatory score per Collection – {sb}")
        ax.set_ylabel("Mean inflam_score per UPN")
        ax.set_xlabel("Collection")
        plt.xticks(rotation=90)
        plt.tight_layout()
        pdf.savefig(fig, bbox_inches="tight")
        plt.close(fig)
 

In [None]:
df = adata.obs[["UPN", "Collection", "subset", "ifng_score"]].copy()

avg = (
    df.groupby(["UPN", "Collection", "subset"], observed=True)["ifng_score"]
      .mean()
      .reset_index()
)

out_pdf = "ifng_score_perUPN_boxplots_annotated.pdf"
with PdfPages(out_pdf) as pdf:
# plot for each subset
    for sb in avg["subset"].unique():
        sub_df = avg[avg["subset"] == sb]
    
        fig, ax = plt.subplots(figsize=(2,4))
        
        sns.boxplot(
            data=sub_df,
            x="Collection",
            y="ifng_score",
            #order=collection_order,
            palette=timecols,
            fliersize=0,
            linewidth=1,
            ax=ax
        )
        sns.stripplot(
            data=sub_df,
            x="Collection",
            y="ifng_score",
            #order=collection_order,
            color="black",
            size=3,
            jitter=True,
            alpha=0.8,
            ax=ax
        )
    
        annotator = Annotator(ax, pairs=comparisons, data=sub_df,
                              x="Collection", y="ifng_score", order=collection_order)
        annotator.configure(
            test='Mann-Whitney', 
            text_format='star',    
            loc='inside',          
            comparisons_correction="BH",
            hide_non_significant=True
        )
        annotator.apply_and_annotate()
    
        # ---- Style ----
        ax.set_title(f"IFNG score per Collection – {sb}")
        ax.set_ylabel("Mean score per UPN")
        ax.set_xlabel("Collection")
        plt.xticks(rotation=90)
        plt.tight_layout()
        pdf.savefig(fig, bbox_inches="tight")
        plt.close(fig)
 

In [None]:
df = adata.obs[["UPN", "Collection", "MRD", "subset", "ifng_score"]].copy()
ptdf = df[df['Collection']=='PT']

avg = (
    ptdf.groupby(["UPN", "MRD", "subset"], observed=True)["ifng_score"]
      .mean()
      .reset_index()
)
avgf = avg[avg['MRD']!='Unk'].copy()
avgf["MRD"] = avgf["MRD"].cat.remove_unused_categories()
avgf

In [None]:
mrdcols = {"Negative":"blue", "Positive":"red"}
out_pdf = "ifng_score_perUPN_MRDsplit_boxplots_annotated.pdf"
with PdfPages(out_pdf) as pdf:
# plot for each subset
    for sb in avgf["subset"].unique():
        sub_df = avgf[avgf["subset"] == sb]
    
        fig, ax = plt.subplots(figsize=(2,4))
        
        sns.boxplot(
            data=sub_df,
            x="MRD",
            y="ifng_score",
            palette=mrdcols,
            fliersize=0,
            linewidth=1,
            ax=ax
        )
        sns.stripplot(
            data=sub_df,
            x="MRD",
            y="ifng_score",
            color="black",
            size=3,
            jitter=True,
            alpha=0.8,
            ax=ax
        )
    
        annotator = Annotator(ax, pairs=[('Positive','Negative')], data=sub_df,
                              x="MRD", y="ifng_score")
        annotator.configure(
            test='Mann-Whitney', 
            text_format='star',    
            loc='inside',          
            comparisons_correction="BH",
            hide_non_significant=True
        )
        annotator.apply_and_annotate()
    
        # ---- Style ----
        ax.set_title(f"IFNG score per Collection – {sb}")
        ax.set_ylabel("Mean score per UPN")
        ax.set_xlabel("MRD")
        plt.xticks(rotation=90)
        plt.tight_layout()
        pdf.savefig(fig, bbox_inches="tight")
        plt.close(fig)
 

In [None]:
df = adata.obs[["UPN", "Collection", "MRD", "subset", "inflam_score"]].copy()
ptdf = df[df['Collection']=='PT']

avg = (
    ptdf.groupby(["UPN", "MRD", "subset"], observed=True)["inflam_score"]
      .mean()
      .reset_index()
)
avgf = avg[avg['MRD']!='Unk'].copy()
avgf["MRD"] = avgf["MRD"].cat.remove_unused_categories()


mrdcols = {"Negative":"blue", "Positive":"red"}
out_pdf = "inflam_score_perUPN_MRDsplit_boxplots_annotated.pdf"
with PdfPages(out_pdf) as pdf:
# plot for each subset
    for sb in avgf["subset"].unique():
        sub_df = avgf[avgf["subset"] == sb]
    
        fig, ax = plt.subplots(figsize=(2,4))
        
        sns.boxplot(
            data=sub_df,
            x="MRD",
            y="inflam_score",
            palette=mrdcols,
            fliersize=0,
            linewidth=1,
            ax=ax
        )
        sns.stripplot(
            data=sub_df,
            x="MRD",
            y="inflam_score",
            color="black",
            size=3,
            jitter=True,
            alpha=0.8,
            ax=ax
        )
    
        annotator = Annotator(ax, pairs=[('Positive','Negative')], data=sub_df,
                              x="MRD", y="inflam_score")
        annotator.configure(
            test='Mann-Whitney', 
            text_format='star',    
            loc='inside',          
            comparisons_correction="BH",
            hide_non_significant=True
        )
        annotator.apply_and_annotate()
    
        # ---- Style ----
        ax.set_title(f"Inflam score per Collection – {sb}")
        ax.set_ylabel("Mean score per UPN")
        ax.set_xlabel("MRD")
        plt.xticks(rotation=90)
        plt.tight_layout()
        pdf.savefig(fig, bbox_inches="tight")
        plt.close(fig)
 

In [None]:
# Keep only UPNs that have both NDMM and PT
paired_upns = (
    avg.groupby("UPN")["Collection"]
       .apply(lambda x: set(["NDMM", "PT"]).issubset(set(x)))
)
paired_upns = paired_upns[paired_upns].index

paired = avg[avg["UPN"].isin(paired_upns)].copy()

# Define order
collection_order = ["NDMM", "PT"]


out_pdf = "ifng_score_paired_NDMM_PT.pdf"
with PdfPages(out_pdf) as pdf:
    for sb in paired["subset"].unique():
        sub_df = paired[paired["subset"] == sb].pivot(
            index="UPN", columns="Collection", values="ifng_score"
        )

        # Skip if not enough pairs
        if {"NDMM", "PT"}.issubset(sub_df.columns) and len(sub_df.dropna()) > 2:
            stat, pval = wilcoxon(sub_df["NDMM"], sub_df["PT"], alternative="two-sided")
        else:
            continue

        # Long format for plotting
        plot_df = paired[paired["subset"] == sb]

        fig, ax = plt.subplots(figsize=(2, 4))

        # Boxplot
        sns.boxplot(
            data=plot_df,
            x="Collection", y="ifng_score",
            order=collection_order,
            palette=timecols, showfliers=False, ax=ax
        )

        # Connect paired samples
        for upn, g in plot_df.groupby("UPN"):
            if set(g["Collection"]) == set(collection_order):
                xvals = [collection_order.index(c) for c in g["Collection"]]
                yvals = g.sort_values("Collection")["ifng_score"].values
                ax.plot(xvals, yvals, color="gray", alpha=0.5, linewidth=1)

        # Overlay points
        sns.stripplot(
            data=plot_df,
            x="Collection", y="ifng_score",
            order=collection_order,
            color="black", size=4, jitter=False, ax=ax
        )

        annotator = Annotator(ax, pairs=[("NDMM", "PT")], data=plot_df,
                              x="Collection", y="ifng_score", order=collection_order)
        annotator.configure(
            test='Mann-Whitney', 
            text_format='star',    
            loc='inside',          
            comparisons_correction="BH",
            hide_non_significant=True
        )
        annotator.apply_and_annotate()

        
        # Format
        ax.set_title(f"{sb} (n={len(sub_df)})")
        ax.set_xlabel("")
        ax.set_ylabel("Mean score per UPN")
        plt.xticks(rotation=0)
        plt.tight_layout()

        pdf.savefig(fig, bbox_inches="tight")
        plt.close(fig)


In [None]:
paired

In [None]:
avg = (
    df.groupby(["UPN", "Collection"], observed=True)["ifng_score"]
      .mean()
      .reset_index()
)

paired_upns = (
    avg.groupby("UPN")["Collection"]
       .apply(lambda x: set(["NDMM", "PT"]).issubset(set(x)))
)
paired_upns = paired_upns[paired_upns].index

paired = avg[avg["UPN"].isin(paired_upns)].copy()


out_pdf = "ifng_score_paired_NDMM_PT_overall.pdf"
with PdfPages(out_pdf) as pdf:
    fig, ax = plt.subplots(figsize=(2, 4))

    # Boxplot
    sns.boxplot(
        data=paired,
        x="Collection", y="ifng_score",
        order=collection_order,
        palette=timecols, showfliers=False, ax=ax
    )

    # Connect paired samples
    for upn, g in paired.groupby("UPN"):
        if set(g["Collection"]) == set(collection_order):
            xvals = [collection_order.index(c) for c in g["Collection"]]
            yvals = g.sort_values("Collection")["ifng_score"].values
            ax.plot(xvals, yvals, color="gray", alpha=0.5, linewidth=1)

    # Overlay points
    sns.stripplot(
        data=paired,
        x="Collection", y="ifng_score",
        order=collection_order,
        color="black", size=4, jitter=False, ax=ax
    )

    annotator = Annotator(ax, pairs=[("NDMM", "PT")], data=paired,
                          x="Collection", y="ifng_score", order=collection_order)
    annotator.configure(
        test='Mann-Whitney', 
        text_format='star',    
        loc='inside',          
        comparisons_correction="BH",
        hide_non_significant=True
    )
    annotator.apply_and_annotate()

    
    # Format
    ax.set_title("All Myeloids")
    ax.set_xlabel("")
    ax.set_ylabel("Mean ifng_score per UPN")
    plt.xticks(rotation=0)
    plt.tight_layout()

    pdf.savefig(fig, bbox_inches="tight")
    plt.close(fig)


In [None]:
immunosuppressive_genes = [
    "ARG1",       # Arginase-1: depletes arginine, suppresses T cells
    "NOS2",       # Inducible nitric oxide synthase (iNOS): produces NO, inhibits T cells
    "CD274",      # PD-L1: immune checkpoint ligand
    "IL10",       # Anti-inflammatory cytokine
    "TGFB1",      # TGF-beta 1: suppressive cytokine
    "VSIG4",      # Complement receptor, inhibitory macrophage marker
    "MARCO",      # Scavenger receptor, suppressive macrophage/MDSC phenotype
    "CD200",      # Myeloid inhibitory ligand (CD200–CD200R axis)
    "S100A8",     # Alarmin, PMN-MDSC recruitment and function
    "S100A9",     # Alarmin, PMN-MDSC recruitment and function
    "CXCR2",      # Chemokine receptor recruiting suppressive neutrophils
    "CXCL5" ,      # Chemokine associated with PMN-MDSC chemotaxis,
    'CTLA4'
]

In [None]:
sc.pl.dotplot(adata[adata.obs['Collection']!='Relapse'], standard_scale='var', var_names=immunosuppressive_genes, groupby=['subset','Collection'])

In [None]:
from scipy.io import mmwrite
out = Path("mtx")
out.mkdir(exist_ok=True)

mmwrite(out / "matrix.mtx", adata.layers['counts'].T) # transpose to match seurat expected format
adata.obs.to_csv(out / "barcodes.tsv", sep="\t", header=False)
adata.var.to_csv(out / "features.tsv", sep="\t", header=False)
adata.obs.to_csv(out / "metadata.tsv", sep="\t", header=True)