# <b> Neutrophil Lymphangioleiomyomatosis (LAM) Analysis - for Sasha-Gaye Richards, PhD from the Johnson Lab <b>
This is the notebook for the analysis of different Neutrophil phenotypes using online available datasets of single cell RNA Seq data.
- Single-Cell Transcriptomic Analysis Identifies a Unique Pulmonary Lymphangioleiomyomatosis Cell
    - DOI: https://doi.org/10.1164/rccm.201912-2445OC
    - GEO: GSE135851
        - Samples:
            - GSE135851_LAM1
            - GSE135851_LAM2
            - GSE135851_LAM3
            - GSE135851_LAM4
- Midkine expression by stem-like tumor cells drives persistence to mTOR inhibition and an immune-suppressive microenvironment
    - DOI: https://doi.org/10.1038/s41467-022-32673-7
    - GEO: GSE190260
        - Samples:
            - GSE190260_LAM1
            - GSE190260_LAM2
            - GSE190260_LAM3
For a reference dataset the following publication was used:
- Neutrophil single-cell analysis identifies a type II interferon-related subset for predicting relapse of autoimmune small vessel vasculitis.
    - DOI: https://doi.org/10.1038/s41467-025-58550-7
    - Online: https://ddbj.nig.ac.jp/public/ddbj_database/gea/experiment/E-GEAD-000/E-GEAD-867/
        - Samples:
            - HD-1
            - HD-2
            - HD-3
            - HD-4
            - HD-5
            - HD-6
            - HD-7
            - MPA-1
            - MPA-2
            - MPA-3
            - MPA-4
            - MPA-5
            - MPA-6  

## <b>Analysis<b>
The analysis was done by first isolating the Neutrophil using *MPO* as a marker.

Datasets were then merged by using the *harmonizepy* package.

After this was done different subsets of Neutrophils were marked as **N1** and **N2**, depending on a set of gene expression markers.

| **N1 Neutrophils** | **N2 Neutrophils** |
| --- | --- |
| CD11b (ITGAM) | CD11b (ITGAM) |
| CD16 (FCGR3B) | CD16 (FCGR3B) |
| CD66 (CEACAM1) | CD66 (CEACAM1) |
| CD177 (GPI) | CD177 (GPI) |
| ICAM1 | CD62L (SELL/L-selectin)|
| CXCL1 | CD206 (MRC1) |
| CXCL2 | |
| CXCL3 | |
| CXCL10 | |
| TNF | |
| MYD88 | |

In [None]:
#| remove-cell
import random
import scanpy as sc
import scanpy.external as sce
import anndata as adata
import numpy as np
import pandas as pd
import scipy
import os
import scipy.io as sio
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as mcolors
from matplotlib.colors import Normalize
import seaborn as sns
import gseapy
import scvi
import tempfile
import numba
import pooch
import pynndescent
import torch
import scipy.sparse as sp
import gdown
import gzip
import shutil
import urllib.request
from gprofiler import GProfiler
import celltypist
from celltypist import models
import warnings
import logging
from IPython.display import display, HTML
from sc_plot_and_embed import sc_plot_and_embed
from onedrive_share import onedrive_direct_link 

# Initialize the g:Profiler object
gp = GProfiler(return_dataframe=True)
print(gp.profile)

data_path = '/gpfs01/home/mzxcs8/Extra/Neutrophil LAM Phenotype - Johnson Lab/Analysis_LAM/'

In [None]:
#| remove-cell
random.seed(1325)
sc.settings.verbosity = 0 # verbosity: errors (0), warnings (1), info (2), hints (3)
warnings.filterwarnings("ignore")
logging.getLogger().setLevel(logging.ERROR)
sc.settings.set_figure_params(dpi=100, fontsize=10, dpi_save=100, figsize=(6,5), format='tiff')
sc.settings.figdir = '/gpfs01/home/mzxcs8/Extra/Neutrophil LAM Phenotype - Johnson Lab/Analysis_LAM/_static/Figures'
OneDrive = 'https://uniofnottm.sharepoint.com/:f:/r/sites/MacrophagePaper/Shared%20Documents/General/Bioinformatics/Analysis%20for%20Publication/scRNASeq/LCC_Healthy/Figures?csf=1&web=1&e=BeWfn7'

In [None]:
#| remove-cell
sc.pp.normalize_total(HD1_AD, target_sum=1e4)
sc.pp.log1p(HD1_AD)

In [None]:
#| remove-cell
sc.pp.neighbors(HD1_AD)
sc.tl.umap(HD1_AD)
sc.tl.leiden(HD1_AD,flavor = "igraph", resolution=0.5, n_iterations = 2)
sc.tl.dendrogram(HD1_AD, groupby = "leiden")

In [None]:
#| remove-cell
sc.pl.umap(HD1_AD,color=['leiden'], legend_fontsize=8)
sc.pl.umap(HD1_AD,color=['dataset'], legend_fontsize=8)
sc.pl.umap(HD1_AD,color=['sample'], legend_fontsize=8)

In [None]:
#| remove-cell
sc.pl.umap(HD1_AD,color=['MPO','ELANE','SLPI','ITGAM','SELL','ICAM1','CXCL10','CXCL1','CXCL2','CEACAM1'], legend_fontsize=8, cmap ='Reds' )

In [None]:
#| hide-input
marker_Neutrophil = {
    "Neutrophil": ['MPO','ELANE','FCGR3B','NAMPT','IFITM2','TNFAIP2','ITGAM','FUT4','CXCR3','FCGR3A'],
    "Neutrophil N1": ["ITGAM","FCGR3B","CEACAM1","GPI","ICAM1","CXCL1",'CXCL2','CXCL3','CXCL10','TNF','MYD88'],
    "Neutrophil N2": ['ITGAM','FCGR3B','CEACAM1','GPI','SELL','MRC1','PI3','SLPI']
}
sc.pl.dotplot(
    HD1_AD,
    marker_Neutrophil,
    groupby = ["leiden"], 
    dendrogram=True,
    use_raw=False,
    label = 'Dotplot of Neutrophil specific markers - LAM 2 Samples [GSE135851]'
)

In [None]:
#| remove-cell
HD1_AD.obs["Cell_Type"] = "Other"
HD1_AD.obs["Cell_Type"] = HD1_AD.obs["Cell_Type"].astype("category")

In [None]:
#| remove-cell
HD1_AD

In [None]:
#| hide-input
# Utility function to safely extract gene expression values
# Step 1: Subset epithelial only
HD1_AD_Neutrophil = HD1_AD.copy()

def get_gene_expression(adata, gene_name):
    """Extract expression values for a given gene safely, accounting for sparse matrices."""
    if gene_name in adata.var_names:
        gene_idx = adata.var_names.get_loc(gene_name)
        values = adata.X[:, gene_idx].toarray() if sp.issparse(adata.X) else adata.X[:, gene_idx]
        return values.flatten()
    else:
        print(f"Warning: Gene {gene_name} not found in dataset.")
        return np.zeros(adata.shape[0])

#Neutrophil
MPO_values = get_gene_expression(HD1_AD_Neutrophil, 'MPO')
ELANE_values = get_gene_expression(HD1_AD_Neutrophil, 'ELANE')

if "Neutrophil MPO+" not in HD1_AD_Neutrophil.obs["Cell_Type"].cat.categories:
    HD1_AD_Neutrophil.obs["Cell_Type"] = HD1_AD_Neutrophil.obs["Cell_Type"].cat.add_categories(["Neutrophil MPO+"]) 

neutrophil_mask = (MPO_values > 0.001)
HD1_AD_Neutrophil.obs.loc[neutrophil_mask, 'Cell_Type'] = "Neutrophil MPO+"

HD1_AD_Neutrophil.obs["Cell_Type"] = HD1_AD_Neutrophil.obs["Cell_Type"].astype("category")

print(HD1_AD_Neutrophil.obs['Cell_Type'].value_counts())

In [None]:
#| hide-input
HD1_AD_Neutrophil.obs['NP_Only'] = HD1_AD_Neutrophil.obs['Cell_Type'] == 'Neutrophil MPO+'
sc.pl.umap(HD1_AD_Neutrophil, color='NP_Only', palette = 'Reds', size = 30, title = 'Neutrophil MPO+')

In [None]:
#| hide-input
marker_Neutrophil = {
    "Neutrophil": ['MPO','ELANE','FCGR3B','NAMPT','IFITM2','TNFAIP2','ITGAM','FUT4','CXCR3','FCGR3A'],
    "Neutrophil N1": ["ITGAM","FCGR3B","CEACAM1","GPI","ICAM1","CXCL1",'CXCL2','CXCL3','CXCL10','TNF','MYD88'],
    "Neutrophil N2": ['ITGAM','FCGR3B','CEACAM1','GPI','SELL','MRC1','PI3','SLPI']
}
sc.pl.dotplot(
    HD1_AD_Neutrophil,
    marker_Neutrophil,
    groupby = ["Cell_Type"], 
    dendrogram=True,
    use_raw=False,
    title = 'Dotplot of Neutrophil specific markers - Lam 2 Samples [GSE135851]'
)

In [None]:
#| remove-cell
sc.pp.normalize_total(HD2_AD, target_sum=1e4)
sc.pp.log1p(HD2_AD)

In [None]:
#| remove-cell
sc.pp.neighbors(HD2_AD)
sc.tl.umap(HD2_AD)
sc.tl.leiden(HD2_AD,flavor = "igraph", resolution=0.5, n_iterations = 2)
sc.tl.dendrogram(HD2_AD, groupby = "leiden")

In [None]:
#| remove-cell
sc.pl.umap(HD2_AD,color=['leiden'], legend_fontsize=8)
sc.pl.umap(HD2_AD,color=['dataset'], legend_fontsize=8)
sc.pl.umap(HD2_AD,color=['sample'], legend_fontsize=8)

In [None]:
#| remove-cell
sc.pl.umap(HD2_AD,color=['MPO','ELANE','SLPI','ITGAM','SELL','ICAM1','CXCL10','CXCL1','CXCL2','CEACAM1'], legend_fontsize=8, cmap ='Reds' )

In [None]:
#| hide-input
marker_Neutrophil = {
    "Neutrophil": ['MPO','ELANE','FCGR3B','NAMPT','IFITM2','TNFAIP2','ITGAM','FUT4','CXCR3','FCGR3A'],
    "Neutrophil N1": ["ITGAM","FCGR3B","CEACAM1","GPI","ICAM1","CXCL1",'CXCL2','CXCL3','CXCL10','TNF','MYD88'],
    "Neutrophil N2": ['ITGAM','FCGR3B','CEACAM1','GPI','SELL','MRC1','PI3','SLPI']
}
sc.pl.dotplot(
    HD2_AD,
    marker_Neutrophil,
    groupby = ["leiden"], 
    dendrogram=True,
    use_raw=False,
    label = 'Dotplot of Neutrophil specific markers - LAM 2 Samples [GSE135851]'
)

In [None]:
#| remove-cell
HD2_AD.obs["Cell_Type"] = "Other"
HD2_AD.obs["Cell_Type"] = HD2_AD.obs["Cell_Type"].astype("category")

In [None]:
#| remove-cell
HD2_AD

In [None]:
#| hide-input
# Utility function to safely extract gene expression values
# Step 1: Subset epithelial only
HD2_AD_Neutrophil = HD2_AD.copy()

def get_gene_expression(adata, gene_name):
    """Extract expression values for a given gene safely, accounting for sparse matrices."""
    if gene_name in adata.var_names:
        gene_idx = adata.var_names.get_loc(gene_name)
        values = adata.X[:, gene_idx].toarray() if sp.issparse(adata.X) else adata.X[:, gene_idx]
        return values.flatten()
    else:
        print(f"Warning: Gene {gene_name} not found in dataset.")
        return np.zeros(adata.shape[0])

#Neutrophil
MPO_values = get_gene_expression(HD2_AD_Neutrophil, 'MPO')
ELANE_values = get_gene_expression(HD2_AD_Neutrophil, 'ELANE')

if "Neutrophil MPO+" not in HD2_AD_Neutrophil.obs["Cell_Type"].cat.categories:
    HD2_AD_Neutrophil.obs["Cell_Type"] = HD2_AD_Neutrophil.obs["Cell_Type"].cat.add_categories(["Neutrophil MPO+"]) 

neutrophil_mask = (MPO_values > 0.001)
HD2_AD_Neutrophil.obs.loc[neutrophil_mask, 'Cell_Type'] = "Neutrophil MPO+"

HD2_AD_Neutrophil.obs["Cell_Type"] = HD2_AD_Neutrophil.obs["Cell_Type"].astype("category")

print(HD2_AD_Neutrophil.obs['Cell_Type'].value_counts())


In [None]:
#| hide-input
HD2_AD_Neutrophil.obs['NP_Only'] = HD2_AD_Neutrophil.obs['Cell_Type'] == 'Neutrophil MPO+'
sc.pl.umap(HD2_AD_Neutrophil, color='NP_Only', palette = 'Reds', size = 30, title = 'Neutrophil MPO+')

In [None]:
#| hide-input
marker_Neutrophil = {
    "Neutrophil": ['MPO','ELANE','FCGR3B','NAMPT','IFITM2','TNFAIP2','ITGAM','FUT4','CXCR3','FCGR3A'],
    "Neutrophil N1": ["ITGAM","FCGR3B","CEACAM1","GPI","ICAM1","CXCL1",'CXCL2','CXCL3','CXCL10','TNF','MYD88'],
    "Neutrophil N2": ['ITGAM','FCGR3B','CEACAM1','GPI','SELL','MRC1','PI3','SLPI']
}
sc.pl.dotplot(
    HD2_AD_Neutrophil,
    marker_Neutrophil,
    groupby = ["Cell_Type"], 
    dendrogram=True,
    use_raw=False,
    title = 'Dotplot of Neutrophil specific markers - Lam 2 Samples [GSE135851]'
)

In [None]:
#| remove-cell
sc.pp.normalize_total(HD3_AD, target_sum=1e4)
sc.pp.log1p(HD3_AD)

In [None]:
#| remove-cell
sc.pp.neighbors(HD3_AD)
sc.tl.umap(HD3_AD)
sc.tl.leiden(HD3_AD,flavor = "igraph", resolution=0.5, n_iterations = 2)
sc.tl.dendrogram(HD3_AD, groupby = "leiden")

In [None]:
#| remove-cell
sc.pl.umap(HD3_AD,color=['leiden'], legend_fontsize=8)
sc.pl.umap(HD3_AD,color=['dataset'], legend_fontsize=8)
sc.pl.umap(HD3_AD,color=['sample'], legend_fontsize=8)

In [None]:
#| remove-cell
sc.pl.umap(HD3_AD,color=['MPO','ELANE','SLPI','ITGAM','SELL','ICAM1','CXCL10','CXCL1','CXCL2','CEACAM1'], legend_fontsize=8, cmap ='Reds' )

In [None]:
#| hide-input
marker_Neutrophil = {
    "Neutrophil": ['MPO','ELANE','FCGR3B','NAMPT','IFITM2','TNFAIP2','ITGAM','FUT4','CXCR3','FCGR3A'],
    "Neutrophil N1": ["ITGAM","FCGR3B","CEACAM1","GPI","ICAM1","CXCL1",'CXCL2','CXCL3','CXCL10','TNF','MYD88'],
    "Neutrophil N2": ['ITGAM','FCGR3B','CEACAM1','GPI','SELL','MRC1','PI3','SLPI']
}
sc.pl.dotplot(
    HD3_AD,
    marker_Neutrophil,
    groupby = ["leiden"], 
    dendrogram=True,
    use_raw=False,
    label = 'Dotplot of Neutrophil specific markers - LAM 2 Samples [GSE135851]'
)

In [None]:
#| remove-cell
HD3_AD.obs["Cell_Type"] = "Other"
HD3_AD.obs["Cell_Type"] = HD3_AD.obs["Cell_Type"].astype("category")

In [None]:
#| remove-cell
HD3_AD

In [None]:
#| hide-input
# Utility function to safely extract gene expression values
# Step 1: Subset epithelial only
HD3_AD_Neutrophil = HD3_AD.copy()

def get_gene_expression(adata, gene_name):
    """Extract expression values for a given gene safely, accounting for sparse matrices."""
    if gene_name in adata.var_names:
        gene_idx = adata.var_names.get_loc(gene_name)
        values = adata.X[:, gene_idx].toarray() if sp.issparse(adata.X) else adata.X[:, gene_idx]
        return values.flatten()
    else:
        print(f"Warning: Gene {gene_name} not found in dataset.")
        return np.zeros(adata.shape[0])

#Neutrophil
MPO_values = get_gene_expression(HD3_AD_Neutrophil, 'MPO')
ELANE_values = get_gene_expression(HD3_AD_Neutrophil, 'ELANE')

if "Neutrophil MPO+" not in HD3_AD_Neutrophil.obs["Cell_Type"].cat.categories:
    HD3_AD_Neutrophil.obs["Cell_Type"] = HD3_AD_Neutrophil.obs["Cell_Type"].cat.add_categories(["Neutrophil MPO+"]) 

neutrophil_mask = (MPO_values > 0.001)
HD3_AD_Neutrophil.obs.loc[neutrophil_mask, 'Cell_Type'] = "Neutrophil MPO+"

HD3_AD_Neutrophil.obs["Cell_Type"] = HD3_AD_Neutrophil.obs["Cell_Type"].astype("category")

print(HD3_AD_Neutrophil.obs['Cell_Type'].value_counts())


In [None]:
#| hide-input
HD3_AD_Neutrophil.obs['NP_Only'] = HD3_AD_Neutrophil.obs['Cell_Type'] == 'Neutrophil MPO+'
sc.pl.umap(HD3_AD_Neutrophil, color='NP_Only', palette = 'Reds', size = 30, title = 'Neutrophil MPO+')

In [None]:
#| hide-input
marker_Neutrophil = {
    "Neutrophil": ['MPO','ELANE','FCGR3B','NAMPT','IFITM2','TNFAIP2','ITGAM','FUT4','CXCR3','FCGR3A'],
    "Neutrophil N1": ["ITGAM","FCGR3B","CEACAM1","GPI","ICAM1","CXCL1",'CXCL2','CXCL3','CXCL10','TNF','MYD88'],
    "Neutrophil N2": ['ITGAM','FCGR3B','CEACAM1','GPI','SELL','MRC1','PI3','SLPI']
}
sc.pl.dotplot(
    HD3_AD_Neutrophil,
    marker_Neutrophil,
    groupby = ["Cell_Type"], 
    dendrogram=True,
    use_raw=False,
    title = 'Dotplot of Neutrophil specific markers - Lam 2 Samples [GSE135851]'
)

In [None]:
#| remove-cell
sc.pp.normalize_total(HD4_AD, target_sum=1e4)
sc.pp.log1p(HD4_AD)

In [None]:
#| remove-cell
sc.pp.neighbors(HD4_AD)
sc.tl.umap(HD4_AD)
sc.tl.leiden(HD4_AD,flavor = "igraph", resolution=0.5, n_iterations = 2)
sc.tl.dendrogram(HD4_AD, groupby = "leiden")

In [None]:
#| remove-cell
sc.pl.umap(HD4_AD,color=['leiden'], legend_fontsize=8)
sc.pl.umap(HD4_AD,color=['dataset'], legend_fontsize=8)
sc.pl.umap(HD4_AD,color=['sample'], legend_fontsize=8)

In [None]:
#| remove-cell
sc.pl.umap(HD4_AD,color=['MPO','ELANE','SLPI','ITGAM','SELL','ICAM1','CXCL10','CXCL1','CXCL2','CEACAM1'], legend_fontsize=8, cmap ='Reds' )

In [None]:
#| hide-input
marker_Neutrophil = {
    "Neutrophil": ['MPO','ELANE','FCGR3B','NAMPT','IFITM2','TNFAIP2','ITGAM','FUT4','CXCR3','FCGR3A'],
    "Neutrophil N1": ["ITGAM","FCGR3B","CEACAM1","GPI","ICAM1","CXCL1",'CXCL2','CXCL10','TNF','MYD88'],
    "Neutrophil N2": ['ITGAM','FCGR3B','CEACAM1','GPI','SELL','MRC1','PI3','SLPI']
}
sc.pl.dotplot(
    HD4_AD,
    marker_Neutrophil,
    groupby = ["leiden"], 
    dendrogram=True,
    use_raw=False,
    label = 'Dotplot of Neutrophil specific markers - LAM 2 Samples [GSE135851]'
)

In [None]:
#| remove-cell
HD4_AD.obs["Cell_Type"] = "Other"
HD4_AD.obs["Cell_Type"] = HD4_AD.obs["Cell_Type"].astype("category")

In [None]:
#| remove-cell
HD4_AD

In [None]:
#| hide-input
# Utility function to safely extract gene expression values
# Step 1: Subset epithelial only
HD4_AD_Neutrophil = HD4_AD.copy()

def get_gene_expression(adata, gene_name):
    """Extract expression values for a given gene safely, accounting for sparse matrices."""
    if gene_name in adata.var_names:
        gene_idx = adata.var_names.get_loc(gene_name)
        values = adata.X[:, gene_idx].toarray() if sp.issparse(adata.X) else adata.X[:, gene_idx]
        return values.flatten()
    else:
        print(f"Warning: Gene {gene_name} not found in dataset.")
        return np.zeros(adata.shape[0])

#Neutrophil
MPO_values = get_gene_expression(HD4_AD_Neutrophil, 'MPO')
ELANE_values = get_gene_expression(HD4_AD_Neutrophil, 'ELANE')

if "Neutrophil MPO+" not in HD4_AD_Neutrophil.obs["Cell_Type"].cat.categories:
    HD4_AD_Neutrophil.obs["Cell_Type"] = HD4_AD_Neutrophil.obs["Cell_Type"].cat.add_categories(["Neutrophil MPO+"]) 

neutrophil_mask = (MPO_values > 0.001)
HD4_AD_Neutrophil.obs.loc[neutrophil_mask, 'Cell_Type'] = "Neutrophil MPO+"

HD4_AD_Neutrophil.obs["Cell_Type"] = HD4_AD_Neutrophil.obs["Cell_Type"].astype("category")

print(HD4_AD_Neutrophil.obs['Cell_Type'].value_counts())


In [None]:
#| hide-input
HD4_AD_Neutrophil.obs['NP_Only'] = HD4_AD_Neutrophil.obs['Cell_Type'] == 'Neutrophil MPO+'
sc.pl.umap(HD4_AD_Neutrophil, color='NP_Only', palette = 'Reds', size = 30, title = 'Neutrophil MPO+')

In [None]:
#| hide-input
marker_Neutrophil = {
    "Neutrophil": ['MPO','ELANE','FCGR3B','NAMPT','IFITM2','TNFAIP2','ITGAM','FUT4','CXCR3','FCGR3A'],
    "Neutrophil N1": ["ITGAM","FCGR3B","CEACAM1","GPI","ICAM1","CXCL1",'CXCL2','CXCL10','TNF','MYD88'],
    "Neutrophil N2": ['ITGAM','FCGR3B','CEACAM1','GPI','SELL','MRC1','PI3','SLPI']
}
sc.pl.dotplot(
    HD4_AD_Neutrophil,
    marker_Neutrophil,
    groupby = ["Cell_Type"], 
    dendrogram=True,
    use_raw=False,
    title = 'Dotplot of Neutrophil specific markers - Lam 2 Samples [GSE135851]'
)

In [None]:
#| remove-cell
sc.pp.normalize_total(HD5_AD, target_sum=1e4)
sc.pp.log1p(HD5_AD)

In [None]:
#| remove-cell
sc.pp.neighbors(HD5_AD)
sc.tl.umap(HD5_AD)
sc.tl.leiden(HD5_AD,flavor = "igraph", resolution=0.5, n_iterations = 2)
sc.tl.dendrogram(HD5_AD, groupby = "leiden")

In [None]:
#| remove-cell
sc.pl.umap(HD5_AD,color=['leiden'], legend_fontsize=8)
sc.pl.umap(HD5_AD,color=['dataset'], legend_fontsize=8)
sc.pl.umap(HD5_AD,color=['sample'], legend_fontsize=8)

In [None]:
#| remove-cell
sc.pl.umap(HD5_AD,color=['MPO','ELANE','SLPI','ITGAM','SELL','ICAM1','CXCL10','CXCL1','CXCL2','CEACAM1'], legend_fontsize=8, cmap ='Reds' )

In [None]:
#| hide-input
marker_Neutrophil = {
    "Neutrophil": ['MPO','ELANE','FCGR3B','NAMPT','IFITM2','TNFAIP2','ITGAM','FUT4','CXCR3','FCGR3A'],
    "Neutrophil N1": ["ITGAM","FCGR3B","CEACAM1","GPI","ICAM1","CXCL1",'CXCL2','CXCL3','CXCL10','TNF','MYD88'],
    "Neutrophil N2": ['ITGAM','FCGR3B','CEACAM1','GPI','SELL','MRC1','PI3','SLPI']
}
sc.pl.dotplot(
    HD5_AD,
    marker_Neutrophil,
    groupby = ["leiden"], 
    dendrogram=True,
    use_raw=False,
    label = 'Dotplot of Neutrophil specific markers - LAM 2 Samples [GSE135851]'
)

In [None]:
#| remove-cell
HD5_AD.obs["Cell_Type"] = "Other"
HD5_AD.obs["Cell_Type"] = HD5_AD.obs["Cell_Type"].astype("category")

In [None]:
#| remove-cell
HD5_AD

In [None]:
#| hide-input
# Utility function to safely extract gene expression values
# Step 1: Subset epithelial only
HD5_AD_Neutrophil = HD5_AD.copy()

def get_gene_expression(adata, gene_name):
    """Extract expression values for a given gene safely, accounting for sparse matrices."""
    if gene_name in adata.var_names:
        gene_idx = adata.var_names.get_loc(gene_name)
        values = adata.X[:, gene_idx].toarray() if sp.issparse(adata.X) else adata.X[:, gene_idx]
        return values.flatten()
    else:
        print(f"Warning: Gene {gene_name} not found in dataset.")
        return np.zeros(adata.shape[0])

#Neutrophil
MPO_values = get_gene_expression(HD5_AD_Neutrophil, 'MPO')
ELANE_values = get_gene_expression(HD5_AD_Neutrophil, 'ELANE')

if "Neutrophil MPO+" not in HD5_AD_Neutrophil.obs["Cell_Type"].cat.categories:
    HD5_AD_Neutrophil.obs["Cell_Type"] = HD5_AD_Neutrophil.obs["Cell_Type"].cat.add_categories(["Neutrophil MPO+"]) 

neutrophil_mask = (MPO_values > 0.001)
HD5_AD_Neutrophil.obs.loc[neutrophil_mask, 'Cell_Type'] = "Neutrophil MPO+"

HD5_AD_Neutrophil.obs["Cell_Type"] = HD5_AD_Neutrophil.obs["Cell_Type"].astype("category")

print(HD5_AD_Neutrophil.obs['Cell_Type'].value_counts())


In [None]:
#| hide-input
HD5_AD_Neutrophil.obs['NP_Only'] = HD5_AD_Neutrophil.obs['Cell_Type'] == 'Neutrophil MPO+'
sc.pl.umap(HD5_AD_Neutrophil, color='NP_Only', palette = 'Reds', size = 30, title = 'Neutrophil MPO+')

In [None]:
#| hide-input
marker_Neutrophil = {
    "Neutrophil": ['MPO','ELANE','FCGR3B','NAMPT','IFITM2','TNFAIP2','ITGAM','FUT4','CXCR3','FCGR3A'],
    "Neutrophil N1": ["ITGAM","FCGR3B","CEACAM1","GPI","ICAM1","CXCL1",'CXCL2','CXCL3','CXCL10','TNF','MYD88'],
    "Neutrophil N2": ['ITGAM','FCGR3B','CEACAM1','GPI','SELL','MRC1','PI3','SLPI']
}
sc.pl.dotplot(
    HD5_AD_Neutrophil,
    marker_Neutrophil,
    groupby = ["Cell_Type"], 
    dendrogram=True,
    use_raw=False,
    title = 'Dotplot of Neutrophil specific markers - Lam 2 Samples [GSE135851]'
)

In [None]:
#| remove-cell
sc.pp.normalize_total(HD6_AD, target_sum=1e4)
sc.pp.log1p(HD6_AD)

In [None]:
#| remove-cell
sc.pp.neighbors(HD6_AD)
sc.tl.umap(HD6_AD)
sc.tl.leiden(HD6_AD,flavor = "igraph", resolution=0.5, n_iterations = 2)
sc.tl.dendrogram(HD6_AD, groupby = "leiden")

In [None]:
#| remove-cell
sc.pl.umap(HD6_AD,color=['leiden'], legend_fontsize=8)
sc.pl.umap(HD6_AD,color=['dataset'], legend_fontsize=8)
sc.pl.umap(HD6_AD,color=['sample'], legend_fontsize=8)

In [None]:
#| remove-cell
sc.pl.umap(HD6_AD,color=['MPO','ELANE','SLPI','ITGAM','SELL','ICAM1','CXCL10','CXCL1','CXCL2','CEACAM1'], legend_fontsize=8, cmap ='Reds' )

In [None]:
#| hide-input
marker_Neutrophil = {
    "Neutrophil": ['MPO','ELANE','FCGR3B','NAMPT','IFITM2','TNFAIP2','ITGAM','FUT4','CXCR3','FCGR3A'],
    "Neutrophil N1": ["ITGAM","FCGR3B","CEACAM1","GPI","ICAM1","CXCL1",'CXCL2','CXCL3','CXCL10','TNF','MYD88'],
    "Neutrophil N2": ['ITGAM','FCGR3B','CEACAM1','GPI','SELL','MRC1','PI3','SLPI']
}
sc.pl.dotplot(
    HD6_AD,
    marker_Neutrophil,
    groupby = ["leiden"], 
    dendrogram=True,
    use_raw=False,
    label = 'Dotplot of Neutrophil specific markers - LAM 2 Samples [GSE135851]'
)

In [None]:
#| remove-cell
HD6_AD.obs["Cell_Type"] = "Other"
HD6_AD.obs["Cell_Type"] = HD6_AD.obs["Cell_Type"].astype("category")

In [None]:
#| remove-cell
HD6_AD

In [None]:
#| hide-input
# Utility function to safely extract gene expression values
# Step 1: Subset epithelial only
HD6_AD_Neutrophil = HD6_AD.copy()

def get_gene_expression(adata, gene_name):
    """Extract expression values for a given gene safely, accounting for sparse matrices."""
    if gene_name in adata.var_names:
        gene_idx = adata.var_names.get_loc(gene_name)
        values = adata.X[:, gene_idx].toarray() if sp.issparse(adata.X) else adata.X[:, gene_idx]
        return values.flatten()
    else:
        print(f"Warning: Gene {gene_name} not found in dataset.")
        return np.zeros(adata.shape[0])

#Neutrophil
MPO_values = get_gene_expression(HD6_AD_Neutrophil, 'MPO')
ELANE_values = get_gene_expression(HD6_AD_Neutrophil, 'ELANE')

if "Neutrophil MPO+" not in HD6_AD_Neutrophil.obs["Cell_Type"].cat.categories:
    HD6_AD_Neutrophil.obs["Cell_Type"] = HD6_AD_Neutrophil.obs["Cell_Type"].cat.add_categories(["Neutrophil MPO+"]) 

neutrophil_mask = (MPO_values > 0.001)
HD6_AD_Neutrophil.obs.loc[neutrophil_mask, 'Cell_Type'] = "Neutrophil MPO+"

HD6_AD_Neutrophil.obs["Cell_Type"] = HD6_AD_Neutrophil.obs["Cell_Type"].astype("category")

print(HD6_AD_Neutrophil.obs['Cell_Type'].value_counts())


In [None]:
#| hide-input
HD6_AD_Neutrophil.obs['NP_Only'] = HD6_AD_Neutrophil.obs['Cell_Type'] == 'Neutrophil MPO+'
sc.pl.umap(HD6_AD_Neutrophil, color='NP_Only', palette = 'Reds', size = 30, title = 'Neutrophil MPO+')

In [None]:
#| hide-input
marker_Neutrophil = {
    "Neutrophil": ['MPO','ELANE','FCGR3B','NAMPT','IFITM2','TNFAIP2','ITGAM','FUT4','CXCR3','FCGR3A'],
    "Neutrophil N1": ["ITGAM","FCGR3B","CEACAM1","GPI","ICAM1","CXCL1",'CXCL2','CXCL3','CXCL10','TNF','MYD88'],
    "Neutrophil N2": ['ITGAM','FCGR3B','CEACAM1','GPI','SELL','MRC1','PI3','SLPI']
}
sc.pl.dotplot(
    HD6_AD_Neutrophil,
    marker_Neutrophil,
    groupby = ["Cell_Type"], 
    dendrogram=True,
    use_raw=False,
    title = 'Dotplot of Neutrophil specific markers - Lam 2 Samples [GSE135851]'
)

In [None]:
#| remove-cell
sc.pp.normalize_total(HD7_AD, target_sum=1e4)
sc.pp.log1p(HD7_AD)

In [None]:
#| remove-cell
sc.pp.neighbors(HD7_AD)
sc.tl.umap(HD7_AD)
sc.tl.leiden(HD7_AD,flavor = "igraph", resolution=0.5, n_iterations = 2)
sc.tl.dendrogram(HD7_AD, groupby = "leiden")

In [None]:
#| remove-cell
sc.pl.umap(HD7_AD,color=['leiden'], legend_fontsize=8)
sc.pl.umap(HD7_AD,color=['dataset'], legend_fontsize=8)
sc.pl.umap(HD7_AD,color=['sample'], legend_fontsize=8)

In [None]:
#| remove-cell
sc.pl.umap(HD7_AD,color=['MPO','ELANE','SLPI','ITGAM','SELL','ICAM1','CXCL10','CXCL1','CXCL2','CEACAM1'], legend_fontsize=8, cmap ='Reds' )

In [None]:
#| hide-input
marker_Neutrophil = {
    "Neutrophil": ['MPO','ELANE','FCGR3B','NAMPT','IFITM2','TNFAIP2','ITGAM','FUT4','CXCR3','FCGR3A'],
    "Neutrophil N1": ["ITGAM","FCGR3B","CEACAM1","GPI","ICAM1","CXCL1",'CXCL2','CXCL3','CXCL10','TNF','MYD88'],
    "Neutrophil N2": ['ITGAM','FCGR3B','CEACAM1','GPI','SELL','MRC1','PI3','SLPI']
}
sc.pl.dotplot(
    HD7_AD,
    marker_Neutrophil,
    groupby = ["leiden"], 
    dendrogram=True,
    use_raw=False,
    label = 'Dotplot of Neutrophil specific markers - LAM 2 Samples [GSE135851]'
)

In [None]:
#| remove-cell
HD7_AD.obs["Cell_Type"] = "Other"
HD7_AD.obs["Cell_Type"] = HD7_AD.obs["Cell_Type"].astype("category")

In [None]:
#| remove-cell
HD7_AD

In [None]:
#| hide-input
# Utility function to safely extract gene expression values
# Step 1: Subset epithelial only
HD7_AD_Neutrophil = HD7_AD.copy()

def get_gene_expression(adata, gene_name):
    """Extract expression values for a given gene safely, accounting for sparse matrices."""
    if gene_name in adata.var_names:
        gene_idx = adata.var_names.get_loc(gene_name)
        values = adata.X[:, gene_idx].toarray() if sp.issparse(adata.X) else adata.X[:, gene_idx]
        return values.flatten()
    else:
        print(f"Warning: Gene {gene_name} not found in dataset.")
        return np.zeros(adata.shape[0])

#Neutrophil
MPO_values = get_gene_expression(HD7_AD_Neutrophil, 'MPO')
ELANE_values = get_gene_expression(HD7_AD_Neutrophil, 'ELANE')

if "Neutrophil MPO+" not in HD7_AD_Neutrophil.obs["Cell_Type"].cat.categories:
    HD7_AD_Neutrophil.obs["Cell_Type"] = HD7_AD_Neutrophil.obs["Cell_Type"].cat.add_categories(["Neutrophil MPO+"]) 

neutrophil_mask = (MPO_values > 0.001)
HD7_AD_Neutrophil.obs.loc[neutrophil_mask, 'Cell_Type'] = "Neutrophil MPO+"

HD7_AD_Neutrophil.obs["Cell_Type"] = HD7_AD_Neutrophil.obs["Cell_Type"].astype("category")

print(HD7_AD_Neutrophil.obs['Cell_Type'].value_counts())


In [None]:
#| hide-input
HD7_AD_Neutrophil.obs['NP_Only'] = HD7_AD_Neutrophil.obs['Cell_Type'] == 'Neutrophil MPO+'
sc.pl.umap(HD7_AD_Neutrophil, color='NP_Only', palette = 'Reds', size = 30, title = 'Neutrophil MPO+')

In [None]:
#| hide-input
marker_Neutrophil = {
    "Neutrophil": ['MPO','ELANE','FCGR3B','NAMPT','IFITM2','TNFAIP2','ITGAM','FUT4','CXCR3','FCGR3A'],
    "Neutrophil N1": ["ITGAM","FCGR3B","CEACAM1","GPI","ICAM1","CXCL1",'CXCL2','CXCL3','CXCL10','TNF','MYD88'],
    "Neutrophil N2": ['ITGAM','FCGR3B','CEACAM1','GPI','SELL','MRC1','PI3','SLPI']
}
sc.pl.dotplot(
    HD7_AD_Neutrophil,
    marker_Neutrophil,
    groupby = ["Cell_Type"], 
    dendrogram=True,
    use_raw=False,
    title = 'Dotplot of Neutrophil specific markers - Lam 2 Samples [GSE135851]'
)

In [None]:
#| remove-cell
sc.pp.normalize_total(MPA1_AD, target_sum=1e4)
sc.pp.log1p(MPA1_AD)

In [None]:
#| remove-cell
sc.pp.neighbors(MPA1_AD)
sc.tl.umap(MPA1_AD)
sc.tl.leiden(MPA1_AD,flavor = "igraph", resolution=0.5, n_iterations = 2)
sc.tl.dendrogram(MPA1_AD, groupby = "leiden")

In [None]:
#| remove-cell
sc.pl.umap(MPA1_AD,color=['leiden'], legend_fontsize=8)
sc.pl.umap(MPA1_AD,color=['dataset'], legend_fontsize=8)
sc.pl.umap(MPA1_AD,color=['sample'], legend_fontsize=8)

In [None]:
#| remove-cell
sc.pl.umap(MPA1_AD,color=['MPO','ELANE','SLPI','ITGAM','SELL','ICAM1','CXCL10','CXCL1','CXCL2','CEACAM1'], legend_fontsize=8, cmap ='Reds' )

In [None]:
#| hide-input
marker_Neutrophil = {
    "Neutrophil": ['MPO','ELANE','FCGR3B','NAMPT','IFITM2','TNFAIP2','ITGAM','FUT4','CXCR3','FCGR3A'],
    "Neutrophil N1": ["ITGAM","FCGR3B","CEACAM1","GPI","ICAM1","CXCL1",'CXCL2','CXCL3','CXCL10','TNF','MYD88'],
    "Neutrophil N2": ['ITGAM','FCGR3B','CEACAM1','GPI','SELL','MRC1','PI3','SLPI']
}
sc.pl.dotplot(
    MPA1_AD,
    marker_Neutrophil,
    groupby = ["leiden"], 
    dendrogram=True,
    use_raw=False,
    label = 'Dotplot of Neutrophil specific markers - LAM 2 Samples [GSE135851]'
)

In [None]:
#| remove-cell
MPA1_AD.obs["Cell_Type"] = "Other"
MPA1_AD.obs["Cell_Type"] = MPA1_AD.obs["Cell_Type"].astype("category")

In [None]:
#| remove-cell
MPA1_AD

In [None]:
#| hide-input
# Utility function to safely extract gene expression values
# Step 1: Subset epithelial only
MPA1_AD_Neutrophil = MPA1_AD.copy()

def get_gene_expression(adata, gene_name):
    """Extract expression values for a given gene safely, accounting for sparse matrices."""
    if gene_name in adata.var_names:
        gene_idx = adata.var_names.get_loc(gene_name)
        values = adata.X[:, gene_idx].toarray() if sp.issparse(adata.X) else adata.X[:, gene_idx]
        return values.flatten()
    else:
        print(f"Warning: Gene {gene_name} not found in dataset.")
        return np.zeros(adata.shape[0])

#Neutrophil
MPO_values = get_gene_expression(MPA1_AD_Neutrophil, 'MPO')
ELANE_values = get_gene_expression(MPA1_AD_Neutrophil, 'ELANE')

if "Neutrophil MPO+" not in MPA1_AD_Neutrophil.obs["Cell_Type"].cat.categories:
    MPA1_AD_Neutrophil.obs["Cell_Type"] = MPA1_AD_Neutrophil.obs["Cell_Type"].cat.add_categories(["Neutrophil MPO+"]) 

neutrophil_mask = (MPO_values > 0.001)
MPA1_AD_Neutrophil.obs.loc[neutrophil_mask, 'Cell_Type'] = "Neutrophil MPO+"

MPA1_AD_Neutrophil.obs["Cell_Type"] = MPA1_AD_Neutrophil.obs["Cell_Type"].astype("category")

print(MPA1_AD_Neutrophil.obs['Cell_Type'].value_counts())


In [None]:
#| hide-input
MPA1_AD_Neutrophil.obs['NP_Only'] = MPA1_AD_Neutrophil.obs['Cell_Type'] == 'Neutrophil MPO+'
sc.pl.umap(MPA1_AD_Neutrophil, color='NP_Only', palette = 'Reds', size = 30, title = 'Neutrophil MPO+')

In [None]:
#| hide-input
marker_Neutrophil = {
    "Neutrophil": ['MPO','ELANE','FCGR3B','NAMPT','IFITM2','TNFAIP2','ITGAM','FUT4','CXCR3','FCGR3A'],
    "Neutrophil N1": ["ITGAM","FCGR3B","CEACAM1","GPI","ICAM1","CXCL1",'CXCL2','CXCL3','CXCL10','TNF','MYD88'],
    "Neutrophil N2": ['ITGAM','FCGR3B','CEACAM1','GPI','SELL','MRC1','PI3','SLPI']
}
sc.pl.dotplot(
    MPA1_AD_Neutrophil,
    marker_Neutrophil,
    groupby = ["Cell_Type"], 
    dendrogram=True,
    use_raw=False,
    title = 'Dotplot of Neutrophil specific markers - Lam 2 Samples [GSE135851]'
)

In [None]:
#| remove-cell
sc.pp.normalize_total(MPA2_AD, target_sum=1e4)
sc.pp.log1p(MPA2_AD)

In [None]:
#| remove-cell
sc.pp.neighbors(MPA2_AD)
sc.tl.umap(MPA2_AD)
sc.tl.leiden(MPA2_AD,flavor = "igraph", resolution=0.5, n_iterations = 2)
sc.tl.dendrogram(MPA2_AD, groupby = "leiden")

In [None]:
#| remove-cell
sc.pl.umap(MPA2_AD,color=['leiden'], legend_fontsize=8)
sc.pl.umap(MPA2_AD,color=['dataset'], legend_fontsize=8)
sc.pl.umap(MPA2_AD,color=['sample'], legend_fontsize=8)

In [None]:
#| remove-cell
sc.pl.umap(MPA2_AD,color=['MPO','ELANE','SLPI','ITGAM','SELL','ICAM1','CXCL1','CXCL2','CEACAM1'], legend_fontsize=8, cmap ='Reds' )

In [None]:
#| hide-input
marker_Neutrophil = {
    "Neutrophil": ['MPO','ELANE','FCGR3B','NAMPT','IFITM2','TNFAIP2','ITGAM','FUT4','CXCR3','FCGR3A'],
    "Neutrophil N1": ["ITGAM","FCGR3B","CEACAM1","GPI","ICAM1","CXCL1",'CXCL2','TNF','MYD88'],
    "Neutrophil N2": ['ITGAM','FCGR3B','CEACAM1','GPI','SELL','MRC1','PI3','SLPI']
}
sc.pl.dotplot(
    MPA2_AD,
    marker_Neutrophil,
    groupby = ["leiden"], 
    dendrogram=True,
    use_raw=False,
    label = 'Dotplot of Neutrophil specific markers - LAM 2 Samples [GSE135851]'
)

In [None]:
#| remove-cell
MPA2_AD.obs["Cell_Type"] = "Other"
MPA2_AD.obs["Cell_Type"] = MPA2_AD.obs["Cell_Type"].astype("category")

In [None]:
#| remove-cell
MPA2_AD

In [None]:
#| hide-input
# Utility function to safely extract gene expression values
# Step 1: Subset epithelial only
MPA2_AD_Neutrophil = MPA2_AD.copy()

def get_gene_expression(adata, gene_name):
    """Extract expression values for a given gene safely, accounting for sparse matrices."""
    if gene_name in adata.var_names:
        gene_idx = adata.var_names.get_loc(gene_name)
        values = adata.X[:, gene_idx].toarray() if sp.issparse(adata.X) else adata.X[:, gene_idx]
        return values.flatten()
    else:
        print(f"Warning: Gene {gene_name} not found in dataset.")
        return np.zeros(adata.shape[0])

#Neutrophil
MPO_values = get_gene_expression(MPA2_AD_Neutrophil, 'MPO')
ELANE_values = get_gene_expression(MPA2_AD_Neutrophil, 'ELANE')

if "Neutrophil MPO+" not in MPA2_AD_Neutrophil.obs["Cell_Type"].cat.categories:
    MPA2_AD_Neutrophil.obs["Cell_Type"] = MPA2_AD_Neutrophil.obs["Cell_Type"].cat.add_categories(["Neutrophil MPO+"]) 

neutrophil_mask = (MPO_values > 0.001)
MPA2_AD_Neutrophil.obs.loc[neutrophil_mask, 'Cell_Type'] = "Neutrophil MPO+"

MPA2_AD_Neutrophil.obs["Cell_Type"] = MPA2_AD_Neutrophil.obs["Cell_Type"].astype("category")

print(MPA2_AD_Neutrophil.obs['Cell_Type'].value_counts())


In [None]:
#| hide-input
MPA2_AD_Neutrophil.obs['NP_Only'] = MPA2_AD_Neutrophil.obs['Cell_Type'] == 'Neutrophil MPO+'
sc.pl.umap(MPA2_AD_Neutrophil, color='NP_Only', palette = 'Reds', size = 30, title = 'Neutrophil MPO+')

In [None]:
#| hide-input
marker_Neutrophil = {
    "Neutrophil": ['MPO','ELANE','FCGR3B','NAMPT','IFITM2','TNFAIP2','ITGAM','FUT4','CXCR3','FCGR3A'],
    "Neutrophil N1": ["ITGAM","FCGR3B","CEACAM1","GPI","ICAM1","CXCL1",'CXCL2','TNF','MYD88'],
    "Neutrophil N2": ['ITGAM','FCGR3B','CEACAM1','GPI','SELL','MRC1','PI3','SLPI']
}
sc.pl.dotplot(
    MPA2_AD_Neutrophil,
    marker_Neutrophil,
    groupby = ["Cell_Type"], 
    dendrogram=True,
    use_raw=False,
    title = 'Dotplot of Neutrophil specific markers - Lam 2 Samples [GSE135851]'
)

In [None]:
#| remove-cell
sc.pp.normalize_total(MPA3_AD, target_sum=1e4)
sc.pp.log1p(MPA3_AD)

In [None]:
#| remove-cell
sc.pp.neighbors(MPA3_AD)
sc.tl.umap(MPA3_AD)
sc.tl.leiden(MPA3_AD,flavor = "igraph", resolution=0.5, n_iterations = 2)
sc.tl.dendrogram(MPA3_AD, groupby = "leiden")

In [None]:
#| remove-cell
sc.pl.umap(MPA3_AD,color=['leiden'], legend_fontsize=8)
sc.pl.umap(MPA3_AD,color=['dataset'], legend_fontsize=8)
sc.pl.umap(MPA3_AD,color=['sample'], legend_fontsize=8)

In [None]:
#| remove-cell
sc.pl.umap(MPA3_AD,color=['MPO','ELANE','SLPI','ITGAM','SELL','ICAM1','CXCL10','CXCL1','CXCL2','CEACAM1'], legend_fontsize=8, cmap ='Reds' )

In [None]:
#| hide-input
marker_Neutrophil = {
    "Neutrophil": ['MPO','ELANE','FCGR3B','NAMPT','IFITM2','TNFAIP2','ITGAM','FUT4','CXCR3','FCGR3A'],
    "Neutrophil N1": ["ITGAM","FCGR3B","CEACAM1","GPI","ICAM1","CXCL1",'CXCL2','CXCL3','CXCL10','TNF','MYD88'],
    "Neutrophil N2": ['ITGAM','FCGR3B','CEACAM1','GPI','SELL','MRC1','PI3','SLPI']
}
sc.pl.dotplot(
    MPA3_AD,
    marker_Neutrophil,
    groupby = ["leiden"], 
    dendrogram=True,
    use_raw=False,
    label = 'Dotplot of Neutrophil specific markers - LAM 2 Samples [GSE135851]'
)

In [None]:
#| remove-cell
MPA3_AD.obs["Cell_Type"] = "Other"
MPA3_AD.obs["Cell_Type"] = MPA3_AD.obs["Cell_Type"].astype("category")

In [None]:
#| remove-cell
MPA3_AD

In [None]:
#| hide-input
# Utility function to safely extract gene expression values
# Step 1: Subset epithelial only
MPA3_AD_Neutrophil = MPA3_AD.copy()

def get_gene_expression(adata, gene_name):
    """Extract expression values for a given gene safely, accounting for sparse matrices."""
    if gene_name in adata.var_names:
        gene_idx = adata.var_names.get_loc(gene_name)
        values = adata.X[:, gene_idx].toarray() if sp.issparse(adata.X) else adata.X[:, gene_idx]
        return values.flatten()
    else:
        print(f"Warning: Gene {gene_name} not found in dataset.")
        return np.zeros(adata.shape[0])

#Neutrophil
MPO_values = get_gene_expression(MPA3_AD_Neutrophil, 'MPO')
ELANE_values = get_gene_expression(MPA3_AD_Neutrophil, 'ELANE')

if "Neutrophil MPO+" not in MPA3_AD_Neutrophil.obs["Cell_Type"].cat.categories:
    MPA3_AD_Neutrophil.obs["Cell_Type"] = MPA3_AD_Neutrophil.obs["Cell_Type"].cat.add_categories(["Neutrophil MPO+"]) 

neutrophil_mask = (MPO_values > 0.001)
MPA3_AD_Neutrophil.obs.loc[neutrophil_mask, 'Cell_Type'] = "Neutrophil MPO+"

MPA3_AD_Neutrophil.obs["Cell_Type"] = MPA3_AD_Neutrophil.obs["Cell_Type"].astype("category")

print(MPA3_AD_Neutrophil.obs['Cell_Type'].value_counts())


In [None]:
#| hide-input
MPA3_AD_Neutrophil.obs['NP_Only'] = MPA3_AD_Neutrophil.obs['Cell_Type'] == 'Neutrophil MPO+'
sc.pl.umap(MPA3_AD_Neutrophil, color='NP_Only', palette = 'Reds', size = 30, title = 'Neutrophil MPO+')

In [None]:
#| hide-input
marker_Neutrophil = {
    "Neutrophil": ['MPO','ELANE','FCGR3B','NAMPT','IFITM2','TNFAIP2','ITGAM','FUT4','CXCR3','FCGR3A'],
    "Neutrophil N1": ["ITGAM","FCGR3B","CEACAM1","GPI","ICAM1","CXCL1",'CXCL2','CXCL3','CXCL10','TNF','MYD88'],
    "Neutrophil N2": ['ITGAM','FCGR3B','CEACAM1','GPI','SELL','MRC1','PI3','SLPI']
}
sc.pl.dotplot(
    MPA3_AD_Neutrophil,
    marker_Neutrophil,
    groupby = ["Cell_Type"], 
    dendrogram=True,
    use_raw=False,
    title = 'Dotplot of Neutrophil specific markers - Lam 2 Samples [GSE135851]'
)

In [None]:
#| remove-cell
sc.pp.normalize_total(MPA4_AD, target_sum=1e4)
sc.pp.log1p(MPA4_AD)

In [None]:
#| remove-cell
sc.pp.neighbors(MPA4_AD)
sc.tl.umap(MPA4_AD)
sc.tl.leiden(MPA4_AD,flavor = "igraph", resolution=0.5, n_iterations = 2)
sc.tl.dendrogram(MPA4_AD, groupby = "leiden")

In [None]:
#| remove-cell
sc.pl.umap(MPA4_AD,color=['leiden'], legend_fontsize=8)
sc.pl.umap(MPA4_AD,color=['dataset'], legend_fontsize=8)
sc.pl.umap(MPA4_AD,color=['sample'], legend_fontsize=8)

In [None]:
#| remove-cell
sc.pl.umap(MPA4_AD,color=['MPO','ELANE','SLPI','ITGAM','SELL','ICAM1','CXCL10','CXCL1','CXCL2','CEACAM1'], legend_fontsize=8, cmap ='Reds' )

In [None]:
#| hide-input
marker_Neutrophil = {
    "Neutrophil": ['MPO','ELANE','FCGR3B','NAMPT','IFITM2','TNFAIP2','ITGAM','FUT4','CXCR3','FCGR3A'],
    "Neutrophil N1": ["ITGAM","FCGR3B","CEACAM1","GPI","ICAM1","CXCL1",'CXCL2','CXCL3','CXCL10','TNF','MYD88'],
    "Neutrophil N2": ['ITGAM','FCGR3B','CEACAM1','GPI','SELL','MRC1','PI3','SLPI']
}
sc.pl.dotplot(
    MPA4_AD,
    marker_Neutrophil,
    groupby = ["leiden"], 
    dendrogram=True,
    use_raw=False,
    label = 'Dotplot of Neutrophil specific markers - LAM 2 Samples [GSE135851]'
)

In [None]:
#| remove-cell
MPA4_AD.obs["Cell_Type"] = "Other"
MPA4_AD.obs["Cell_Type"] = MPA4_AD.obs["Cell_Type"].astype("category")

In [None]:
#| remove-cell
MPA4_AD

In [None]:
#| hide-input
# Utility function to safely extract gene expression values
# Step 1: Subset epithelial only
MPA4_AD_Neutrophil = MPA4_AD.copy()

def get_gene_expression(adata, gene_name):
    """Extract expression values for a given gene safely, accounting for sparse matrices."""
    if gene_name in adata.var_names:
        gene_idx = adata.var_names.get_loc(gene_name)
        values = adata.X[:, gene_idx].toarray() if sp.issparse(adata.X) else adata.X[:, gene_idx]
        return values.flatten()
    else:
        print(f"Warning: Gene {gene_name} not found in dataset.")
        return np.zeros(adata.shape[0])

#Neutrophil
MPO_values = get_gene_expression(MPA4_AD_Neutrophil, 'MPO')
ELANE_values = get_gene_expression(MPA4_AD_Neutrophil, 'ELANE')

if "Neutrophil MPO+" not in MPA4_AD_Neutrophil.obs["Cell_Type"].cat.categories:
    MPA4_AD_Neutrophil.obs["Cell_Type"] = MPA4_AD_Neutrophil.obs["Cell_Type"].cat.add_categories(["Neutrophil MPO+"]) 

neutrophil_mask = (MPO_values > 0.001)
MPA4_AD_Neutrophil.obs.loc[neutrophil_mask, 'Cell_Type'] = "Neutrophil MPO+"

MPA4_AD_Neutrophil.obs["Cell_Type"] = MPA4_AD_Neutrophil.obs["Cell_Type"].astype("category")

print(MPA4_AD_Neutrophil.obs['Cell_Type'].value_counts())


In [None]:
#| hide-input
MPA4_AD_Neutrophil.obs['NP_Only'] = MPA4_AD_Neutrophil.obs['Cell_Type'] == 'Neutrophil MPO+'
sc.pl.umap(MPA4_AD_Neutrophil, color='NP_Only', palette = 'Reds', size = 30, title = 'Neutrophil MPO+')

In [None]:
#| hide-input
marker_Neutrophil = {
    "Neutrophil": ['MPO','ELANE','FCGR3B','NAMPT','IFITM2','TNFAIP2','ITGAM','FUT4','CXCR3','FCGR3A'],
    "Neutrophil N1": ["ITGAM","FCGR3B","CEACAM1","GPI","ICAM1","CXCL1",'CXCL2','CXCL3','CXCL10','TNF','MYD88'],
    "Neutrophil N2": ['ITGAM','FCGR3B','CEACAM1','GPI','SELL','MRC1','PI3','SLPI']
}
sc.pl.dotplot(
    MPA4_AD_Neutrophil,
    marker_Neutrophil,
    groupby = ["Cell_Type"], 
    dendrogram=True,
    use_raw=False,
    title = 'Dotplot of Neutrophil specific markers - Lam 2 Samples [GSE135851]'
)

In [None]:
#| remove-cell
sc.pp.normalize_total(MPA5_AD, target_sum=1e4)
sc.pp.log1p(MPA5_AD)

In [None]:
#| remove-cell
sc.pp.neighbors(MPA5_AD)
sc.tl.umap(MPA5_AD)
sc.tl.leiden(MPA5_AD,flavor = "igraph", resolution=0.5, n_iterations = 2)
sc.tl.dendrogram(MPA5_AD, groupby = "leiden")

In [None]:
#| remove-cell
sc.pl.umap(MPA5_AD,color=['leiden'], legend_fontsize=8)
sc.pl.umap(MPA5_AD,color=['dataset'], legend_fontsize=8)
sc.pl.umap(MPA5_AD,color=['sample'], legend_fontsize=8)

In [None]:
#| remove-cell
sc.pl.umap(MPA5_AD,color=['MPO','ELANE','SLPI','ITGAM','SELL','ICAM1','CXCL10','CXCL1','CXCL2','CEACAM1'], legend_fontsize=8, cmap ='Reds' )

In [None]:
#| hide-input
marker_Neutrophil = {
    "Neutrophil": ['MPO','ELANE','FCGR3B','NAMPT','IFITM2','TNFAIP2','ITGAM','FUT4','CXCR3','FCGR3A'],
    "Neutrophil N1": ["ITGAM","FCGR3B","CEACAM1","GPI","ICAM1","CXCL1",'CXCL2','CXCL3','CXCL10','TNF','MYD88'],
    "Neutrophil N2": ['ITGAM','FCGR3B','CEACAM1','GPI','SELL','MRC1','PI3','SLPI']
}
sc.pl.dotplot(
    MPA5_AD,
    marker_Neutrophil,
    groupby = ["leiden"], 
    dendrogram=True,
    use_raw=False,
    label = 'Dotplot of Neutrophil specific markers - LAM 2 Samples [GSE135851]'
)

In [None]:
#| remove-cell
MPA5_AD.obs["Cell_Type"] = "Other"
MPA5_AD.obs["Cell_Type"] = MPA5_AD.obs["Cell_Type"].astype("category")

In [None]:
#| remove-cell
MPA5_AD

In [None]:
#| hide-input
# Utility function to safely extract gene expression values
# Step 1: Subset epithelial only
MPA5_AD_Neutrophil = MPA5_AD.copy()

def get_gene_expression(adata, gene_name):
    """Extract expression values for a given gene safely, accounting for sparse matrices."""
    if gene_name in adata.var_names:
        gene_idx = adata.var_names.get_loc(gene_name)
        values = adata.X[:, gene_idx].toarray() if sp.issparse(adata.X) else adata.X[:, gene_idx]
        return values.flatten()
    else:
        print(f"Warning: Gene {gene_name} not found in dataset.")
        return np.zeros(adata.shape[0])

#Neutrophil
MPO_values = get_gene_expression(MPA5_AD_Neutrophil, 'MPO')
ELANE_values = get_gene_expression(MPA5_AD_Neutrophil, 'ELANE')

if "Neutrophil MPO+" not in MPA5_AD_Neutrophil.obs["Cell_Type"].cat.categories:
    MPA5_AD_Neutrophil.obs["Cell_Type"] = MPA5_AD_Neutrophil.obs["Cell_Type"].cat.add_categories(["Neutrophil MPO+"]) 

neutrophil_mask = (MPO_values > 0.001)
MPA5_AD_Neutrophil.obs.loc[neutrophil_mask, 'Cell_Type'] = "Neutrophil MPO+"

MPA5_AD_Neutrophil.obs["Cell_Type"] = MPA5_AD_Neutrophil.obs["Cell_Type"].astype("category")

print(MPA5_AD_Neutrophil.obs['Cell_Type'].value_counts())


In [None]:
#| hide-input
MPA5_AD_Neutrophil.obs['NP_Only'] = MPA5_AD_Neutrophil.obs['Cell_Type'] == 'Neutrophil MPO+'
sc.pl.umap(MPA5_AD_Neutrophil, color='NP_Only', palette = 'Reds', size = 30, title = 'Neutrophil MPO+')

In [None]:
#| hide-input
marker_Neutrophil = {
    "Neutrophil": ['MPO','ELANE','FCGR3B','NAMPT','IFITM2','TNFAIP2','ITGAM','FUT4','CXCR3','FCGR3A'],
    "Neutrophil N1": ["ITGAM","FCGR3B","CEACAM1","GPI","ICAM1","CXCL1",'CXCL2','CXCL3','CXCL10','TNF','MYD88'],
    "Neutrophil N2": ['ITGAM','FCGR3B','CEACAM1','GPI','SELL','MRC1','PI3','SLPI']
}
sc.pl.dotplot(
    MPA5_AD_Neutrophil,
    marker_Neutrophil,
    groupby = ["Cell_Type"], 
    dendrogram=True,
    use_raw=False,
    title = 'Dotplot of Neutrophil specific markers - Lam 2 Samples [GSE135851]'
)

### Joining All_LAM with reference Neutrophil Dataset (E-GEAD-867)

In [None]:
#| remove-cell
All_LAM = sc.read_h5ad('/gpfs01/home/mzxcs8/Extra/Neutrophil LAM Phenotype - Johnson Lab/All_LAM.h5ad')
EGEAD867_LAM = sc.read_h5ad('/gpfs01/home/mzxcs8/Extra/Neutrophil LAM Phenotype - Johnson Lab/EGEAD867_LAM.h5ad')

In [None]:
# fixed_pipeline.py
import mygene
import pandas as pd
import scanpy as sc
import numpy as np

mg = mygene.MyGeneInfo()

# =====================================
# Step 0: Collect datasets
# =====================================
datasets = {
    'All_LAM': All_LAM,
    'EGEAD867_LAM': EGEAD867_LAM   
}

# Build reference gene set as union (keep any gene present in any dataset)
reference_genes = set().union(*(ad.var_names for ad in datasets.values()))
print(f"Reference gene set (union): {len(reference_genes)} genes")

# =====================================
# Step 1: Gene mapping
# =====================================
def build_gene_mapping(gene_list, reference_set, dataset_name):
    """
    Map Ensembl IDs, symbols, or aliases to HGNC symbols,
    prioritizing reference_set membership.

    Returns:
        mapping (dict): query -> mapped ID
        unmapped_df (pd.DataFrame): queries that could not be mapped
    """
    # Query mygene (ask for both 'ensembl.gene' and 'ensembl' to capture both formats)
    res = mg.querymany(
        list(gene_list),
        scopes=["ensembl.gene", "symbol", "alias", "ensembl"],
        fields="symbol,ensembl.gene,ensembl,alias",
        species="human",
        as_dataframe=True,
    )

    mapping = {}
    unmapped_records = []  # store info for dataframe

    # iterate rows (each query)
    for query, row in res.iterrows():
        # mygene returns a 'notfound' column for missing queries
        if isinstance(row, pd.Series) and row.get("notfound"):
            unmapped_records.append(
                {"dataset": dataset_name, "query": query, "reason": "notfound"}
            )
            continue

        # Collect all possible identifiers
        candidates = set()

        # symbol
        symbol = row.get("symbol")
        if pd.notna(symbol):
            candidates.add(symbol)

        # ensembl.gene (string or list)
        ens_gene = row.get("ensembl.gene")
        if pd.notna(ens_gene):
            if isinstance(ens_gene, list):
                candidates.update(ens_gene)
            else:
                candidates.add(ens_gene)

        # ensembl (list of dicts or single dict) -> extract 'gene' keys
        ens_full = row.get("ensembl")
        if pd.notna(ens_full):
            if isinstance(ens_full, dict) and "gene" in ens_full:
                candidates.add(ens_full["gene"])
            elif isinstance(ens_full, list):
                for entry in ens_full:
                    if isinstance(entry, dict) and "gene" in entry:
                        candidates.add(entry["gene"])

        # alias (string or list)
        aliases = row.get("alias")
        if pd.notna(aliases):
            if isinstance(aliases, list):
                candidates.update(aliases)
            else:
                candidates.add(aliases)

        # Try to match against reference set (priority)
        match = next((c for c in candidates if c in reference_set), None)

        if match:
            mapping[query] = match
        elif symbol:  # fallback: use symbol if present
            mapping[query] = symbol
        else:
            unmapped_records.append(
                {"dataset": dataset_name, "query": query, "reason": "no_match_in_reference"}
            )

    unmapped_df = pd.DataFrame(unmapped_records)
    # ensure consistent columns even if empty
    if unmapped_df.empty:
        unmapped_df = pd.DataFrame(columns=["dataset", "query", "reason"])
    return mapping, unmapped_df


# =====================================
# Step 2: Harmonize + merge duplicates
# =====================================
def harmonize_and_merge(adata, reference_set, dataset_name, merge_method="sum", convert_sparse=True):
    """
    Rename genes to reference symbols, merge duplicates.
    Returns (adata_harmonized, unmapped_df).
    Parameters:
      - merge_method: "sum" (default) or "mean"
      - convert_sparse: if True, transiently convert sparse matrix to dense for grouping;
                        if False, will try a memory-friendly sparse approach (not implemented here).
    """
    print(f"\n--- Harmonizing {dataset_name} ---")
    mapping, unmapped_df = build_gene_mapping(adata.var_names, reference_set, dataset_name)

    # Keep a copy of original var names
    adata.var["old_names"] = adata.var_names

    # Apply mapping (map queries to mapped name or keep original)
    new_names = adata.var_names.to_series().map(lambda g: mapping.get(g, g))
    adata.var_names = new_names

    # If duplicates exist, merge them
    if adata.var_names.has_duplicates:
        ndup = adata.var_names.duplicated().sum()
        print(f"{dataset_name}: {ndup} duplicate gene names detected → merging ({merge_method})")

        # Convert to dense carefully — only if requested and if size allows
        if convert_sparse and hasattr(adata.X, "toarray"):
            X = adata.X.T.toarray()  # genes x cells
        else:
            # fallback to numpy conversion (may still be large)
            X = np.asarray(adata.X.T)

        # Create DataFrame with genes x cells
        df = pd.DataFrame(X, index=adata.var_names, columns=adata.obs_names)

        # Group by gene name and aggregate
        if merge_method == "sum":
            df2 = df.groupby(df.index).sum()
        elif merge_method == "mean":
            df2 = df.groupby(df.index).mean()
        else:
            raise ValueError("merge_method must be 'sum' or 'mean'")

        # Re-create AnnData (cells x genes)
        adata = sc.AnnData(df2.T, obs=adata.obs.copy())
        adata.var_names = df2.index
        adata.var_names.name = None

    # Final sanity-check: ensure var_names are strings
    adata.var_names = adata.var_names.astype(str)

    print(f"{dataset_name}: {adata.n_vars} genes after harmonization")
    return adata, unmapped_df


# =====================================
# Step 3: Apply harmonization & track unmapped
# =====================================
all_unmapped_list = []
harmonized_datasets = {}

for name, ad in datasets.items():
    adata_harmonized, unmapped_df = harmonize_and_merge(ad.copy(), reference_genes, name)
    harmonized_datasets[name] = adata_harmonized
    all_unmapped_list.append(unmapped_df)

# Concatenate unmapped into one DataFrame
all_unmapped = pd.concat(all_unmapped_list, ignore_index=True)

# =====================================
# Step 4: Concatenate (intersection or union as desired)
# =====================================
# Use join="outer" for union, "inner" for intersection across datasets
All_REF_LAM = sc.concat(
    list(harmonized_datasets.values()),
    join="outer",
    label="dataset",
    keys=list(harmonized_datasets.keys()),
)

print("\nFinal concatenated AnnData:")
print(f"Shape: {All_REF_LAM.shape}")   # (cells, genes)

# Show unmapped summary
print("\n⚠️ Unmapped genes summary:")
display(all_unmapped.head())


In [None]:
All_REF_LAM

In [None]:
All_REF_LAM.obs['sample'].value_counts()

In [None]:
All_REF_LAM.obs['dataset'].value_counts()

In [None]:
#| remove-cell
# Run PCA
#sc.pp.scale(AT2_HLCA)                # optional but recommended
sc.tl.pca(All_REF_LAM, svd_solver="arpack")

sce.pp.harmony_integrate(All_REF_LAM, key = 'dataset')
'X_pca_harmony' in All_REF_LAM.obsm
True

In [None]:
#| remove-cell
print(f"Total counts in lcc_h_filt: {All_REF_LAM.X.sum()}")

In [None]:
#| remove-cell
sc.pp.normalize_total(All_REF_LAM, target_sum=1e4)
sc.pp.log1p(All_REF_LAM)

#if sp.issparse(AT2_HLCA.X):
#    AT2_HLCA.X = AT2_HLCA.X.toarray()  # Convert sparse to dense matrix
#AT2_HLCA.X.shape

All_REF_LAM.obsm['X_pca'] = All_REF_LAM.obsm['X_pca_harmony']

In [None]:
#| remove-cell
sc.pp.neighbors(All_REF_LAM)
sc.tl.umap(All_REF_LAM)
sc.tl.leiden(All_REF_LAM,flavor = "igraph", resolution=0.5, n_iterations = 2)
sc.tl.dendrogram(All_REF_LAM, groupby = "leiden")

In [None]:
sc.pl.umap(All_REF_LAM,color=['leiden'], legend_fontsize=8)
sc.pl.umap(All_REF_LAM,color=['dataset'], legend_fontsize=8)
sc.pl.umap(All_REF_LAM,color=['sample'], legend_fontsize=8)

In [None]:
sc.pl.umap(All_REF_LAM,color=['MPO','ITGAM','SELL','ICAM1','CXCL10','CXCL1','CXCL2','CEACAM1'], legend_fontsize=20, cmap = 'Reds')

In [None]:
#| hide-input
marker_Neutrophil = {
    "Neutrophil": ['MPO','ELANE','FCGR3B','NAMPT','IFITM2','TNFAIP2','ITGAM','FUT4','CXCR3','FCGR3A'],
    "Neutrophil N1": ["ITGAM","FCGR3B","CEACAM1","GPI","ICAM1","CXCL1",'CXCL2','CXCL3','CXCL10','TNF','MYD88'],
    "Neutrophil N2": ['ITGAM','FCGR3B','CEACAM1','GPI','SELL','MRC1','PI3','SLPI']
}
sc.pl.dotplot(
    All_REF_LAM,
    marker_Neutrophil,
    groupby = ["Cell_Type"], 
    dendrogram=True,
    use_raw=False,
    label = 'Dotplot of Neutrophil specific markers - Lam 2 Samples [GSE135851]'
)

In [None]:
#| hide-input
marker_Neutrophil = {
    "Neutrophil": ['MPO','ELANE','FCGR3B','NAMPT','IFITM2','TNFAIP2','ITGAM','FUT4','CXCR3','FCGR3A'],
    "Neutrophil N1": ["ITGAM","FCGR3B","CEACAM1","GPI","ICAM1","CXCL1",'CXCL2','CXCL3','CXCL10','TNF','MYD88'],
    "Neutrophil N2": ['ITGAM','FCGR3B','CEACAM1','GPI','SELL','MRC1','PI3','SLPI']
}

sc.pl.dotplot(
    All_REF_LAM,
    marker_Neutrophil,
    groupby = ["sample"], 
    dendrogram=True,
    use_raw=False,
    label = 'Dotplot of Neutrophil specific markers - Lam 2 Samples [GSE135851]'
)

In [None]:
#| hide-input
marker_Neutrophil = {
    "Neutrophil": ['MPO','ELANE','FCGR3B','NAMPT','IFITM2','TNFAIP2','ITGAM','FUT4','CXCR3','FCGR3A'],
    "Neutrophil N1": ["ITGAM","FCGR3B","CEACAM1","GPI","ICAM1","CXCL1",'CXCL2','CXCL3','CXCL10','TNF','MYD88'],
    "Neutrophil N2": ['ITGAM','FCGR3B','CEACAM1','GPI','SELL','MRC1','PI3','SLPI']
}

sc.pl.dotplot(
    All_REF_LAM,
    marker_Neutrophil,
    groupby = ["sample",'Cell_Type'], 
    dendrogram=True,
    use_raw=False,
    label = 'Dotplot of Neutrophil specific markers - Lam 2 Samples [GSE135851]'
)

In [None]:
#| hide-input
marker_Neutrophil = {
    "Neutrophil": ['MPO','ELANE','FCGR3B','NAMPT','IFITM2','TNFAIP2','ITGAM','FUT4','CXCR3','FCGR3A'],
    "Neutrophil N1": ["ITGAM","FCGR3B","CEACAM1","GPI","ICAM1","CXCL1",'CXCL2','CXCL3','CXCL10','TNF','MYD88'],
    "Neutrophil N2": ['ITGAM','FCGR3B','CEACAM1','GPI','SELL','MRC1','PI3','SLPI']
}

sc.pl.dotplot(
    All_REF_LAM,
    marker_Neutrophil,
    groupby = ["dataset",'Cell_Type'], 
    dendrogram=True,
    use_raw=False,
    label = 'Dotplot of Neutrophil specific markers - Lam 2 Samples [GSE135851]'
)

## MPO+ Cells Only

In [None]:
#| remove-cell
All_NeutroMPO = All_REF_LAM[All_REF_LAM.obs["Cell_Type"] == "Neutrophil MPO+"].copy()

In [None]:
#| remove-cell
sc.pp.neighbors(All_NeutroMPO)
sc.tl.umap(All_NeutroMPO)
sc.tl.leiden(All_NeutroMPO,flavor = "igraph", resolution=0.5, n_iterations = 2)
sc.tl.dendrogram(All_NeutroMPO, groupby = "leiden")

In [None]:
#| remove-cell
sc.pl.umap(All_NeutroMPO,color=['leiden'], legend_fontsize=8)
sc.pl.umap(All_NeutroMPO,color=['dataset'], legend_fontsize=8)
sc.pl.umap(All_NeutroMPO,color=['sample'], legend_fontsize=8)

In [None]:
sc.pl.umap(All_NeutroMPO,color=['MPO','ITGAM','SELL','ICAM1','CXCL10','CXCL1','CXCL2','CEACAM1'], legend_fontsize=20, cmap = 'Reds')

In [None]:
#| hide-input
marker_Neutrophil = {
    "Neutrophil": ['MPO','ELANE','FCGR3B','NAMPT','IFITM2','TNFAIP2','ITGAM','FUT4','CXCR3','FCGR3A'],
    "Neutrophil N1": ["ITGAM","FCGR3B","CEACAM1","GPI","ICAM1","CXCL1",'CXCL2','CXCL3','CXCL10','TNF','MYD88'],
    "Neutrophil N2": ['ITGAM','FCGR3B','CEACAM1','GPI','SELL','MRC1','PI3','SLPI']
}
sc.tl.dendrogram(All_NeutroMPO, groupby = ['sample'])
sc.pl.dotplot(
    All_NeutroMPO,
    marker_Neutrophil,
    groupby = ["sample"], 
    dendrogram=True,
    use_raw=False,
    label = 'Dotplot of Neutrophil specific markers - Lam 2 Samples [GSE135851]'
)

In [None]:
#| hide-input
marker_Neutrophil = {
    "Neutrophil": ['MPO','ELANE','FCGR3B','NAMPT','IFITM2','TNFAIP2','ITGAM','FUT4','CXCR3','FCGR3A'],
    "Neutrophil N1": ["ITGAM","FCGR3B","CEACAM1","GPI","ICAM1","CXCL1",'CXCL2','CXCL3','CXCL10','TNF','MYD88'],
    "Neutrophil N2": ['ITGAM','FCGR3B','CEACAM1','GPI','SELL','MRC1','PI3','SLPI']
}

sc.pl.dotplot(
    All_NeutroMPO,
    marker_Neutrophil,
    groupby = ["dataset"], 
    dendrogram=True,
    use_raw=False,
    label = 'Dotplot of Neutrophil specific markers - Lam 2 Samples [GSE135851]'
)

In [None]:
#| hide-input
marker_Neutrophil = {
    "Neutrophil": ['MPO','ELANE','FCGR3B','NAMPT','IFITM2','TNFAIP2','ITGAM','FUT4','CXCR3','FCGR3A'],
    "Neutrophil N1": ["ITGAM","FCGR3B","CEACAM1","GPI","ICAM1","CXCL1",'CXCL2','CXCL3','CXCL10','TNF','MYD88'],
    "Neutrophil N2": ['ITGAM','FCGR3B','CEACAM1','GPI','SELL','MRC1','PI3','SLPI']
}

sc.pl.dotplot(
    All_NeutroMPO,
    marker_Neutrophil,
    groupby = ["dataset",'Cell_Type'], 
    dendrogram=True,
    use_raw=False,
    label = 'Dotplot of Neutrophil specific markers - Lam 2 Samples [GSE135851]'
)

In [None]:
#| hide-input
marker_Neutrophil = {
    "Neutrophil": ['MPO','ELANE','FCGR3B','NAMPT','IFITM2','TNFAIP2','ITGAM','FUT4','CXCR3','FCGR3A'],
    "Neutrophil N1": ["ITGAM","FCGR3B","CEACAM1","GPI","ICAM1","CXCL1",'CXCL2','CXCL3','CXCL10','TNF','MYD88'],
    "Neutrophil N2": ['ITGAM','FCGR3B','CEACAM1','GPI','SELL','MRC1','PI3','SLPI']
}
sc.tl.dendrogram(All_NeutroMPO, groupby = ['sample','Cell_Type'])
sc.pl.dotplot(
    All_NeutroMPO,
    marker_Neutrophil,
    groupby = ["sample",'Cell_Type'], 
    dendrogram=True,
    use_raw=False,
    label = 'Dotplot of Neutrophil specific markers - Lam 2 Samples [GSE135851]'
)