In [None]:
# Import Packages

import os
import csv
import scvi
import rdata
import igraph
import pprint
import leidenalg
import muon as mu
import liana as li
import numpy as np
import pandas as pd
import scanpy as sc
import mudata as md
import anndata as ad
import pyranges as pr
import seaborn as sns
import scrublet as scr
import harmonypy as hm
from scipy import stats
from scipy.io import mmread
import matplotlib.pyplot as plt
from itertools import combinations
from adjustText import adjust_text
from scipy.sparse import csr_matrix
from statannotations.Annotator import Annotator
from statsmodels.stats.multitest import multipletests

mu.set_options(pull_on_update=False)

In [None]:
T_mdata = mu.read('/data/imindol02/COVID_multiome/COVID_Objects/T_mdata_COVID.h5mu')

In [None]:
sc.pp.neighbors(T_mdata['rna'], use_rep='X_harmony', n_neighbors=10, n_pcs=15)
sc.pp.neighbors(T_mdata['atac'], use_rep="X_harmony", n_neighbors=10, n_pcs=20)

mu.pp.neighbors(T_mdata, key_added='wnn')
sc.tl.leiden(T_mdata, resolution=1.0, neighbors_key='wnn', key_added='leiden_wnn')
mu.tl.umap(T_mdata, neighbors_key='wnn')
mu.pl.umap(T_mdata, color=['cell_type', 'leiden_wnn'], size = 10, neighbors_key='wnn')

In [None]:
gene_signatures = {
    'CD4_Score': ['CD4', 'CCR7', 'CD40LG'],
    'CD8_Score': ['CD8A', 'CD8B', 'RUNX3', 'EOMES', 'KLRG1', 'ZEB2'],
    'MAIT_Score': ['TRAV1-2', 'ZBTB16', 'KLRB1', 'SLC4A10'],
    'Treg_Score': ['FOXP3', 'CTLA4', 'IRF4', 'BATF', 'TNFRSF18', 'TOX2', 'PRDM1', 'ICOS', 'CCR4'],
    'Naive_Score': ['CCR7', 'TCF7', 'LEF1'],
    'Memory_Score': ['S100A4', 'CCL5'],
}

for score_name, genes in gene_signatures.items():
    valid_genes = [g for g in genes if g in adata_rna.var_names]
    temp_adata = T_mdata['rna'][:, genes].copy()
    sc.pp.scale(temp_adata, max_value=10)
    score_values = temp_adata.X.mean(axis=1)
    T_mdata['rna'].obs[score_name] = np.array(score_values).flatten()
    T_mdata.obs[score_name] = T_mdata['rna'].obs[score_name]

score_names = list(gene_signatures.keys())
mu.pl.umap(adata_rna, 
           color=score_names,
           neighbors_key='wnn', 
           cmap='viridis',
           vmax = ['p97', 'p99', 'p99', 'p97', 'p99', 'p99'],
           vmin='p0',
           s=30, ncols=3
          )

In [None]:
CD4_adata = T_mdata['rna'][T_mdata.obs['subtype'].isin(['Naive CD4', 'CM CD4', 'EM CD4'])].copy()

ax_dict = sc.pl.matrixplot(
    CD4_adata,
    var_names=['CCR7', 'SELL', 'TCF7', 'S100A4', 'CCL5'],
    groupby='subtype',
    standard_scale='var',
    cmap='Reds',
    show=False
)

ax = ax_dict['mainplot_ax']
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
plt.show()

In [None]:
ax_dict = sc.pl.matrixplot(T_mdata,
                 var_names=score_names, 
                 groupby='subtype', 
                 standard_scale='var',
                 colorbar_title='Normalized Score',
                 cmap='Reds',
                 show = False
                )

ax = ax_dict['mainplot_ax']
ax.set_xticklabels(ax.get_xticklabels(), rotation=60, ha='right')
plt.show()

In [None]:
import math

ncols = 3
nrows = math.ceil(len(score_names) / ncols)
fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(5*ncols, 5*nrows))
axes = axes.flatten()

for i, score in enumerate(score_names):
    sc.pl.violin(
        T_mdata, 
        keys=score, 
        groupby='subtype', 
        rotation=45,
        jitter=False,
        ax=axes[i],
        show=False
    )
    axes[i].set_title(score, fontsize=14)
    axes[i].set_ylabel("Score")

    for label in axes[i].get_xticklabels():
        label.set_horizontalalignment('right')

for j in range(len(score_names), len(axes)):
    axes[j].axis('off')

plt.tight_layout()
plt.show()

In [None]:
df = T_mdata.obs.copy()

props = pd.crosstab(df['patientID'], df['subtype'], normalize='index') * 100
pat_type_map = df[['patientID', 'vaccine type']].drop_duplicates().set_index('patientID')['vaccine type']
props['vaccine type'] = props.index.map(pat_type_map)
cell_types = props.columns.drop('vaccine type')

n_cols = 3
n_rows = (len(cell_types) + n_cols - 1) // n_cols
fig, axes = plt.subplots(n_rows, n_cols, figsize=(5 * n_cols, 5 * n_rows))
axes = axes.flatten()
order = ['HC', 'Group A', 'Group B', 'Group C']
pairs = list(combinations(order, 2))

for i, c_type in enumerate(cell_types):
    ax = axes[i]
    sns.boxplot(
        data=props, 
        x='vaccine type', 
        y=c_type,
        order=order,
        hue='vaccine type',
        ax=ax, 
        palette="Set2", 
        showfliers=False
    )
    sns.stripplot(
        data=props, 
        x='vaccine type', 
        y=c_type, 
        ax=ax,
        hue='vaccine type',
        palette="Set2",
        edgecolor='black',
        linewidth=1,
        size=3,
        alpha=0.7,
        jitter=True
    )
    annotator = Annotator(ax, pairs, data=props, x='vaccine type', y=c_type, order=order)
    annotator.configure(test='t-test_ind', text_format='star', loc='inside', verbose=0, hide_non_significant=True)
    annotator.apply_and_annotate()
    ax.set_title(c_type, fontsize=14, fontweight='bold')
    ax.set_ylabel("Proportion (%)")
    ax.set_xlabel("")

for j in range(i + 1, len(axes)):
    axes[j].axis('off')
plt.tight_layout()
plt.show()

In [None]:
genes_of_interest = ['CR2', 'CD27', 'MME', 'TBX21', 'ZEB2', 'ITGAX', 'FCRL5', 'FCRL2', 'FCRL3', 'LILRB1', 'LILRB2', 'LAIR1', 'SIGLEC6', 'SIGLEC10']  #CR2 (CD21) / MME (CD10) / ITGAX (CD11c)

ax_dict = sc.pl.matrixplot(
    T_mdata,
    var_names=genes_of_interest,
    groupby='subtype',
    use_raw=False,
    standard_scale='var',
    cmap='Reds',
    show=False
)

ax = ax_dict['mainplot_ax']
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
plt.show()

In [None]:
genes_of_interest = ['IL2', 'IL4', 'IL6', 'IL10', 'IL12A', 'IL12B', 'IL17A', 'IL23A', 'EBI3', 'TNF', 'LTA', 'TGFB1', 'IFNA1', 'IFNG', 'CSF2']
DOT = sc.pl.DotPlot(T_mdata,
                    var_names=genes_of_interest,
                    groupby='subtype'
                    )
DOT.style(largest_dot = 100.0, cmap = 'Reds')
DOT.make_figure()
ax = DOT.ax_dict['mainplot_ax']
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
DOT.show()

In [None]:
DOT = sc.pl.DotPlot(T_mdata,
                    var_names=['IFNAR1'],
                    groupby='subtype'
                    )
DOT.style(largest_dot = 100.0, cmap = 'Reds')
DOT.make_figure()
ax = DOT.ax_dict['mainplot_ax']
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
DOT.show()

In [None]:
sc.tl.rank_genes_groups(T_mdata['rna'], 'subtype', method='wilcoxon')
disease_groups = list(T_mdata['rna'].obs['subtype'].cat.categories)
disease_groups = ['MAIT']

for group in disease_groups:
    deg_df = pd.DataFrame({
        'gene': T_mdata['rna'].uns['rank_genes_groups']['names'][group],
        'logFC': T_mdata['rna'].uns['rank_genes_groups']['logfoldchanges'][group],
        'pval': T_mdata['rna'].uns['rank_genes_groups']['pvals_adj'][group]
    })
    
    deg_df['score'] = deg_df['logFC'] * -np.log10(deg_df['pval'] + 1e-300)
    deg_df = deg_df.sort_values('score', ascending=False)
    deg_df = deg_df.drop_duplicates('gene', keep='first')
    rnk = deg_df.set_index('gene')['score']
    
    pre_res = gp.prerank(
        rnk=rnk, 
        gene_sets='GO_Biological_Process_2025',
        threads=4,
        min_size=5,
        max_size=1000,
        permutation_num=100
    )
    
    terms = pre_res.res2d.sort_values('NES', ascending=False).head(20)  
    gp.barplot(
        terms, 
        column='NES',
        title=f'{group} GSEA Results (NES)',
        top_term=20,
        color='red'
    )

In [None]:
import networkx as nx
import requests
import io
from matplotlib.lines import Line2D

def draw_ppi_network_with_pathway(gene_list, pathway_dict, species=9606, score_threshold=400):
   
    string_api_url = "https://string-db.org/api/tsv/network"
    params = {
        "identifiers": "%0d".join(gene_list),
        "species": species,
        "caller_identity": "www.awesome_app.org"
    }
    response = requests.post(string_api_url, data=params)
    if response.status_code != 200:
        return

    df_ppi = pd.read_csv(io.StringIO(response.text), sep='\t')
    df_ppi = df_ppi.head(100)
    
    G = nx.from_pandas_edgelist(
        df_ppi, 
        source='preferredName_A', 
        target='preferredName_B', 
        edge_attr='score'
    )

    palette = ['#E41A1C', '#4DAF4A', '#FFFF33', '#377EB8', '#FF7F00', '#984EA3']
    gene_pathway_colors = {node: [] for node in G.nodes()}
    pathway_legend_colors = {}
    
    for i, (p_name, p_genes) in enumerate(pathway_dict.items()):
        color = palette[i % len(palette)]
        pathway_legend_colors[p_name] = color
        for gene in p_genes:
            if gene in G.nodes():
                gene_pathway_colors[gene].append(color)

    pos = nx.spring_layout(G, k=0.7, seed=42)
    base_node_sizes = []
    
    for node in G.nodes():
        degree = G.degree[node]
        base_node_sizes.append(degree * 50 + 300)

    plt.figure(figsize=(15, 15))
    ax = plt.gca()
    nx.draw_networkx_edges(G, pos, width=1, alpha=0.2, edge_color='gray')
    max_layers = max([len(colors) for colors in gene_pathway_colors.values()]) if gene_pathway_colors else 0
    ring_width_step = 300 

    for layer_idx in range(max_layers):
        nodes_in_layer = []
        sizes_in_layer = []
        colors_in_layer = []
        for i, node in enumerate(G.nodes()):
            colors = gene_pathway_colors[node]
            if len(colors) > layer_idx:
                nodes_in_layer.append(node)
                color = colors[layer_idx] 
                colors_in_layer.append(color)
                added_size = (len(colors) - layer_idx) * ring_width_step
                sizes_in_layer.append(base_node_sizes[i] + added_size)

        if nodes_in_layer:
            nx.draw_networkx_nodes(
                G, pos,
                nodelist=nodes_in_layer,
                node_size=sizes_in_layer,
                node_color=colors_in_layer,
                node_shape='o',
                alpha=1.0
            )
    nx.draw_networkx_nodes(
        G, pos,
        nodelist=G.nodes(),
        node_size=base_node_sizes,
        node_color='lightblue', alpha=1.0,
        edgecolors='white', linewidths=1.5
    )
    nx.draw_networkx_labels(G, pos, font_size=9, font_weight='bold')

    legend_elements = []
    for p_name, p_color in pathway_legend_colors.items():
        legend_elements.append(
            Line2D([0], [0], marker='o', color='w', label=p_name,
                   markerfacecolor='white', markeredgecolor=p_color, 
                   markersize=10, markeredgewidth=3)
        )
    
    plt.legend(handles=legend_elements, loc='upper left', title='Pathways (Outer Rings)', bbox_to_anchor=(1.02, 1),)
    sm = plt.cm.ScalarMappable(cmap=plt.cm.coolwarm, norm=plt.Normalize(vmin=-2, vmax=2))
    sm.set_array([])    
    plt.title(f"PPI Network (Nodes: {len(G.nodes)}, Edges: {len(G.edges)})", fontsize=15)
    plt.axis('off')
    plt.show()

In [None]:
group_name = 'MAIT'
genes_to_keep = [gene for gene in T_mdata['rna'].var_names if not (gene.startswith('MT-') or gene.startswith('RPS') or gene.startswith('RPL'))]
T_adata = T_mdata['rna'][:, genes_to_keep].copy()
sc.tl.rank_genes_groups(T_adata, 'subtype', method='t-test') # wilcoxon

deg_df = sc.get.rank_genes_groups_df(T_adata, group=group_name)
gene_list = deg_df[deg_df['pvals_adj'] < 0.05]['names'].tolist()
if len(gene_list) > 100:
    gene_list = gene_list[:100]

target_pathways = [
    'Interleukin-18-Mediated Signaling Pathway (GO:0035655)',
    'Interleukin-12-Mediated Signaling Pathway (GO:0035722)',
    'Response to Transforming Growth Factor Beta (GO:0071559)',
    'Positive Regulation of Type II Interferon Production (GO:0032729)'
]

enrichr_results = gp.enrichr(
    gene_list=gene_list,
    gene_sets=['GO_Biological_Process_2025'],
    organism='Human',
    outdir=None
)

df = enrichr_results.res2d.copy()
filtered_df = df[df['Term'].isin(target_pathways)].copy()
pathway_dict = {}

for index, row in filtered_df.iterrows():
    pathway_term = row['Term']    
    genes_str = row['Genes']
    genes_list = genes_str.split(';')

    pathway_dict[pathway_term] = genes_list

draw_ppi_network_with_pathway(gene_list, pathway_dict, score_threshold=400)

In [None]:
mu.pl.umap(T_mdata,
           color=['TGFBR1', 'TGFBR2', 'TGFBR3'],
           color_map='Reds',
           neighbors_key='wnn',
           frameon=False,
           s=10
          )

In [None]:
MAIT_subset = T_mdata['rna'][T_mdata['rna'].obs['subtype'] == 'MAIT'].copy()
MAIT_subset = MAIT_subset[MAIT_subset.obs['vaccine type'] != 'HC']

genes_of_interest = ['TGFBR1', 'TGFBR2', 'TGFBR3', 'SMAD2', 'SMAD3']

ax_dict = sc.pl.matrixplot(
    MAIT_subset,
    var_names=genes_of_interest,
    groupby='vaccine type',
    standard_scale='var',
    cmap='RdBu_r',
    title='MAIT Matrixplot',
    show=False
)

ax = ax_dict['mainplot_ax']
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
plt.show()