In [1]:
import os
import gzip
import scipy
import scanpy as sc
import pandas as pd
import numpy as np
import anndata as ad
import seaborn as sns
import scipy.sparse as sp
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

import warnings
warnings.filterwarnings("ignore")

sc.settings.verbosity = 0
sc.settings.set_figure_params(dpi=300, dpi_save=500)
from matplotlib import rcParams
rcParams["figure.dpi"] = 300
rcParams["savefig.dpi"] = 500

In [2]:
data_dir = '../../../Data/Spatial/Transcriptomics/Xenium_PF_Vannan2025/'
if not os.path.exists(data_dir + 'processed/'):
    os.makedirs(data_dir + 'processed/')

In [3]:
adata_concat = ad.read_h5ad(data_dir + 'GSE250346.h5ad')
if adata_concat.obsm:     
    adata_concat.obsm.clear() 
if adata_concat.varm:     
    adata_concat.varm.clear() 
if adata_concat.raw:
    del adata_concat.raw
adata_concat

AnnData object with n_obs × n_vars = 1630319 × 343
    obs: 'sample', 'patient', 'cell_id', 'full_cell_id', 'sample_type', 'sample_affect', 'disease_status', 'percent_pathology', 'tma', 'run', 'final_CT', 'final_lineage', 'CNiche', 'TNiche', 'lumen_id', 'lumen_rank', 'x_centroid', 'y_centroid', 'adj_x_centroid', 'adj_y_centroid', 'super_adj_x_centroid', 'super_adj_y_centroid', 'nCount_RNA', 'nFeature_RNA', 'perc_negcontrolprobe', 'perc_negcontrolcodeword', 'perc_unassigned', 'perc_negcontrolorunassigned'
    var: 'mean', 'std'

In [None]:
ctr_name_list = ['THD0008', 'THD0011', 'VUHD038', 'VUHD049', 'VUHD069', 'VUHD090', 'VUHD095', 'VUHD113', 'VUHD116A', 'VUHD116B',]
cond_name_list = ['TILD028LA', 'TILD049MA', 'TILD080LA', 'TILD111LA', 'TILD113LA', 'TILD117LA', 'TILD117MA1', 'TILD117MA2', 
                  'TILD130LA', 'TILD167LA', 'TILD175MA', 'TILD299MA', 'TILD315MA', 'VUILD102LA', 'VUILD102MA', 'VUILD104MA1', 
                  'VUILD104MA2', 'VUILD105MA1', 'VUILD105MA2', 'VUILD106MA', 'VUILD107MA', 'VUILD110LA', 'VUILD115MA', 'VUILD141MA', 
                  'VUILD142MA', 'VUILD48LA1', 'VUILD48LA2', 'VUILD49LA', 'VUILD58MA', 'VUILD78LA', 'VUILD78MA', 'VUILD91LA', 
                  'VUILD91MA', 'VUILD96LA', 'VUILD96MA']
less_affe_list = []
more_affe_list = []

adata_list = []

for slice_name in ctr_name_list+cond_name_list:

    adata = adata_concat[adata_concat.obs['sample'] == slice_name, :].copy()
    adata.X = sp.csr_matrix(adata.X)
    adata.obsm['spatial'] = adata.obs[['x_centroid', 'y_centroid']].to_numpy()

    # adata.write_h5ad(data_dir + f'processed/{slice_name}.h5ad')
    print(f'{slice_name}: {adata.shape}')
    adata_list.append(adata)

    if adata.obs['sample_affect'][0] == 'Less Affected':
        less_affe_list.append(slice_name)
    elif adata.obs['sample_affect'][0] == 'More Affected':
        more_affe_list.append(slice_name)

# with open(data_dir + "processed/slice_name_list.txt", "w") as f:
#     for slice_name in ctr_name_list+cond_name_list:
#         f.write(slice_name + '\n')

THD0008: (69227, 343)
THD0011: (25364, 343)
VUHD038: (5941, 343)
VUHD049: (22688, 343)
VUHD069: (19976, 343)
VUHD090: (16110, 343)
VUHD095: (11442, 343)
VUHD113: (12870, 343)
VUHD116A: (12372, 343)
VUHD116B: (29147, 343)
TILD028LA: (24813, 343)
TILD049MA: (11140, 343)
TILD080LA: (51212, 343)
TILD111LA: (29566, 343)
TILD113LA: (24202, 343)
TILD117LA: (36707, 343)
TILD117MA1: (49231, 343)
TILD117MA2: (35271, 343)
TILD130LA: (24547, 343)
TILD167LA: (15870, 343)
TILD175MA: (35887, 343)
TILD299MA: (52992, 343)
TILD315MA: (37361, 343)
VUILD102LA: (26364, 343)
VUILD102MA: (31517, 343)
VUILD104MA1: (35502, 343)
VUILD104MA2: (37764, 343)
VUILD105MA1: (21108, 343)
VUILD105MA2: (21178, 343)
VUILD106MA: (124717, 343)
VUILD107MA: (68413, 343)
VUILD110LA: (122815, 343)
VUILD115MA: (94540, 343)
VUILD141MA: (34702, 343)
VUILD142MA: (9222, 343)
VUILD48LA1: (20819, 343)
VUILD48LA2: (30302, 343)
VUILD49LA: (38280, 343)
VUILD58MA: (56733, 343)
VUILD78LA: (26235, 343)
VUILD78MA: (35306, 343)
VUILD91LA: (15

In [5]:
# less_affe_list = ['TILD028LA', 'TILD080LA', 'TILD111LA', 'TILD113LA', 'TILD117LA', 'TILD130LA', 'TILD167LA', 'VUILD102LA', 'VUILD110LA',
#                   'VUILD48LA1', 'VUILD48LA2', 'VUILD49LA', 'VUILD78LA', 'VUILD91LA', 'VUILD96LA']
# more_affe_list = ['TILD049MA', 'TILD117MA1', 'TILD117MA2', 'TILD175MA', 'TILD299MA', 'TILD315MA', 'VUILD102MA', 'VUILD104MA1', 'VUILD104MA2',
#                   'VUILD105MA1', 'VUILD105MA2', 'VUILD106MA', 'VUILD107MA', 'VUILD115MA', 'VUILD141MA', 'VUILD142MA', 'VUILD58MA', 'VUILD78MA',
#                   'VUILD91MA', 'VUILD96MA']

In [6]:
adata_concat.var_names.tolist()

['ABCC2',
 'ACKR1',
 'ACTA2',
 'AGER',
 'AGR3',
 'AIF1',
 'AKR1B10',
 'AKR1C1',
 'AKR1C2',
 'ANKRD28',
 'APLN',
 'APLNR',
 'ASCL1',
 'ATF3',
 'ATF4',
 'ATF6',
 'ATG7',
 'ATP2A3',
 'AXIN2',
 'AXL',
 'BANK1',
 'BAX',
 'BCL2',
 'BCL2L1',
 'BCL2L11',
 'BMP4',
 'BMPR2',
 'BPIFA1',
 'C1QC',
 'C20orf85',
 'CA4',
 'CALCA',
 'CCL18',
 'CCL2',
 'CCL21',
 'CCL22',
 'CCL5',
 'CCN2',
 'CCNA1',
 'CCNB2',
 'CCR7',
 'CD14',
 'CD19',
 'CD1A',
 'CD1C',
 'CD2',
 'CD247',
 'CD27',
 'CD274',
 'CD28',
 'CD34',
 'CD3D',
 'CD3E',
 'CD3G',
 'CD4',
 'CD44',
 'CD52',
 'CD68',
 'CD69',
 'CD79A',
 'CD79B',
 'CD86',
 'CD8A',
 'CD8B',
 'CDH26',
 'CDK1',
 'CDKN2A',
 'CEACAM5',
 'CEACAM6',
 'CENPF',
 'CFTR',
 'CGA',
 'CHAC1',
 'CHGB',
 'CLDN5',
 'COL15A1',
 'COL1A1',
 'COL1A2',
 'COL3A1',
 'COL4A3',
 'CPA3',
 'CREB3L4',
 'CRELD2',
 'CSPG4',
 'CST3',
 'CTHRC1',
 'CTLA4',
 'CTNNB1',
 'CXCL13',
 'CXCL14',
 'CXCL9',
 'CXCR4',
 'CXCR5',
 'DCN',
 'DCTPP1',
 'DDIT3',
 'DEFB1',
 'DERL3',
 'DIRAS3',
 'DMBT1',
 'DNAJB9',
 'DUOX

In [7]:
sorted(set(adata_concat.obs['disease_status']))

['Control', 'Disease']

In [11]:
sorted(set(adata_concat.obs['percent_pathology']))

[0.0,
 5.0,
 15.0,
 20.0,
 30.0,
 35.0,
 40.0,
 45.0,
 50.0,
 55.0,
 65.0,
 70.0,
 75.0,
 85.0,
 90.0,
 95.0,
 100.0]

In [13]:
for i in range(len(adata_list)):
    print(set(adata_list[i].obs['sample']))
    print(set(adata_list[i].obs['percent_pathology']))

{'THD0008'}
{0.0}
{'THD0011'}
{20.0}
{'VUHD038'}
{5.0}
{'VUHD049'}
{0.0}
{'VUHD069'}
{0.0}
{'VUHD090'}
{5.0}
{'VUHD095'}
{0.0}
{'VUHD113'}
{0.0}
{'VUHD116A'}
{0.0}
{'VUHD116B'}
{0.0}
{'TILD028LA'}
{45.0}
{'TILD049MA'}
{100.0}
{'TILD080LA'}
{65.0}
{'TILD111LA'}
{20.0}
{'TILD113LA'}
{55.0}
{'TILD117LA'}
{50.0}
{'TILD117MA1'}
{95.0}
{'TILD117MA2'}
{90.0}
{'TILD130LA'}
{70.0}
{'TILD167LA'}
{50.0}
{'TILD175MA'}
{95.0}
{'TILD299MA'}
{75.0}
{'TILD315MA'}
{100.0}
{'VUILD102LA'}
{35.0}
{'VUILD102MA'}
{95.0}
{'VUILD104MA1'}
{85.0}
{'VUILD104MA2'}
{90.0}
{'VUILD105MA1'}
{90.0}
{'VUILD105MA2'}
{95.0}
{'VUILD106MA'}
{100.0}
{'VUILD107MA'}
{100.0}
{'VUILD110LA'}
{55.0}
{'VUILD115MA'}
{100.0}
{'VUILD141MA'}
{100.0}
{'VUILD142MA'}
{95.0}
{'VUILD48LA1'}
{20.0}
{'VUILD48LA2'}
{40.0}
{'VUILD49LA'}
{45.0}
{'VUILD58MA'}
{100.0}
{'VUILD78LA'}
{65.0}
{'VUILD78MA'}
{85.0}
{'VUILD91LA'}
{15.0}
{'VUILD91MA'}
{75.0}
{'VUILD96LA'}
{30.0}
{'VUILD96MA'}
{100.0}


In [8]:
sorted(set(adata_concat.obs['final_CT']))

['AT1',
 'AT2',
 'Activated Fibrotic FBs',
 'Adventitial FBs',
 'Alveolar FBs',
 'Alveolar Macrophages',
 'Arteriole',
 'B cells',
 'Basal',
 'Basophils',
 'CD4+ T-cells',
 'CD8+ T-cells',
 'Capillary',
 'Goblet',
 'Inflammatory FBs',
 'Interstitial Macrophages',
 'KRT5-/KRT17+',
 'Langerhans cells',
 'Lymphatic',
 'Macrophages - IFN-activated',
 'Mast',
 'Mesothelial',
 'Migratory DCs',
 'Monocytes/MDMs',
 'Multiciliated',
 'Myofibroblasts',
 'NK/NKT',
 'Neutrophils',
 'PNEC',
 'Plasma',
 'Proliferating AT2',
 'Proliferating Airway',
 'Proliferating B cells',
 'Proliferating FBs',
 'Proliferating Myeloid',
 'Proliferating NK/NKT',
 'Proliferating T-cells',
 'RASC',
 'SMCs/Pericytes',
 'SPP1+ Macrophages',
 'Secretory',
 'Subpleural FBs',
 'Transitional AT2',
 'Tregs',
 'Venous',
 'cDCs',
 'pDCs']

In [9]:
sorted(set(adata_concat.obs['sample_affect']))

['Less Affected', 'More Affected', 'Unaffected']