# Prep adata for cellchat - distinguish CD4 by distance from TLS / bronchi category

In [1]:
import sys
import os

import scanpy as sc
import matplotlib.pyplot as plt
import numpy as np
import warnings

warnings.filterwarnings("ignore")

In [7]:
import pandas as pd

In [2]:
dist_out_dir = '/home/workspace/spatial_mouse_lung_outputs/downstream_analysis/distance'

if not os.path.exists (dist_out_dir):
    os.makedirs(dist_out_dir)

plot_out_dir = os.path.join(dist_out_dir, 'plots')
if not os.path.exists (plot_out_dir):
    os.makedirs(plot_out_dir)

In [13]:
# adata = sc.read('/projects/Kennidy/Prostate_Spatial_R2/celltype_annotated_updated.h5ad')
adata = sc.read_h5ad(os.path.join(dist_out_dir,'adata_distance_zones_structure.h5ad'))

In [14]:
adata.obs['label_fine'].unique().tolist()

['Col13a1+ fibroblast',
 'Alv Mf',
 'Cap',
 'Vein',
 'AT2',
 'Mono',
 'Th0',
 'Pericyte 2',
 'Cap-a',
 'Neut',
 'Pericyte 1',
 'Club',
 'Ciliated',
 'Art',
 'AT1',
 'CD4 naive',
 'B cell',
 'Th17',
 'Int Mf',
 'CD8 naive',
 'SMC',
 'gd T cell',
 'Plasmablast',
 'Th2',
 'Lymph',
 'Ccr7- cDC2',
 'NK cell',
 'cDC1',
 'CD4 trans',
 'Ccr7+ cDC2',
 'Th1',
 'CD8 act',
 'Mesothelial',
 'Treg',
 'Myofibroblast',
 'Col14a1+ fibroblast',
 'ILC2']

In [15]:
adata

AnnData object with n_obs × n_vars = 593237 × 480
    obs: 'cell_id', 'x_centroid', 'y_centroid', 'transcript_counts', 'control_probe_counts', 'genomic_control_counts', 'control_codeword_counts', 'unassigned_codeword_counts', 'deprecated_codeword_counts', 'total_counts', 'cell_area', 'nucleus_area', 'nucleus_count', 'segmentation_method', 'sample', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'log1p_total_counts', 'pct_counts_in_top_10_genes', 'pct_counts_in_top_20_genes', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_150_genes', 'n_counts', 'sample_label', 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'percent.mito', 'Barcode', 'Age', 'Oxygen', 'percent.mt', 'S.Score', 'G2M.Score', 'Phase', 'Sample', 'RNA_snn_res.0.2', 'seurat_clusters', 'RNA_snn_res.0.15', 'nCount_SCT', 'nFeature_SCT', 'integrated_snn_res.0.2', 'integrated_snn_res.0.1', 'cluster_high_res', 'CellType', 'leiden_res1', 'CellType_consolidated', '_scvi_batch', '_scvi_labels', 'celltype_scanvi', 'C_scANVI', 'label_scan

In [16]:
# make CD4 act category by distances 
def quantify_quadrant_expression(adata, 
                                 x_axis = 'avg_distance_to_bronchi_zone',
                                 # y_axis = 'distance_to_tls',
                                 y_axis = 'avg_distance_to_TLS_zone',
                                 x_clip = 450, y_clip = 450,
                                 # x_clip=None, y_clip=None,
                                 x_threshold = 150, y_threshold = 100):
    
    adata = adata.copy()

    # Clip x and y axes
    if x_clip:
        print('Clipping x to', x_clip)
        adata = adata[adata.obs[x_axis] <= x_clip, :]
    if y_clip:
        print('Clipping y to', y_clip)
        adata = adata[adata.obs[y_axis] <= y_clip, :]
        
    quadrant_order = ['near-bronchi (B)', 'parenchyma (P)', 'near-TLS (T)']
    adata.obs['spatial_quadrant']= 'none'
    adata.obs.loc[(adata.obs[x_axis] > x_threshold) & (adata.obs[y_axis] > y_threshold), 'spatial_quadrant'] = 'parenchyma (P)'
    adata.obs.loc[adata.obs[y_axis] < y_threshold, 'spatial_quadrant'] = 'near-TLS (T)'
    adata.obs.loc[(adata.obs[x_axis] < x_threshold) & (adata.obs[y_axis] > y_threshold), 'spatial_quadrant'] = 'near-bronchi (B)'
    # adata.obs.loc[(adata.obs[x_axis] < x_threshold) & (adata.obs[y_axis] < y_threshold), 'spatial_quadrant'] = 'near-both'
    adata.obs['spatial_quadrant'] = pd.Categorical(adata.obs['spatial_quadrant'], categories=quadrant_order, ordered=True)
    return(adata) 

# make columns for spatial_quadrant
adata = quantify_quadrant_expression(adata)

# set labels
adata.obs['spatial_quadrant'] = adata.obs['spatial_quadrant'].values.tolist()
adata.obs['label_fine'] = adata.obs['label_fine'].values.tolist()

Clipping x to 450
Clipping y to 450


In [17]:
adata.obs['spatial_quadrant'].unique()

array(['parenchyma (P)', 'near-bronchi (B)', 'near-TLS (T)'], dtype=object)

In [18]:
#wherever cell_type_1 is 'T Cells', add the classification to the cell_type_1 column
# Create a mask for CD4 act
cd4_mask = adata.obs['label_fine'].isin(['Th0', 'Th1', 'Th17', 'Th2', 'Treg', 'CD4 trans'])

# make single activated T cell label per region 
adata.obs.loc[cd4_mask, 'label_fine'] = (
    'CD4 act (' + adata.obs.loc[cd4_mask, 'spatial_quadrant'] + ')'
)

# Check the updated cell types
print("Updated T cell categories:")
print(adata.obs.loc[cd4_mask, 'label_fine'].value_counts())

# adata.obs['label_medium']


Updated T cell categories:
label_fine
CD4 act (parenchyma (P))      3028
CD4 act (near-TLS (T))        2008
CD4 act (near-bronchi (B))    1636
Name: count, dtype: int64


In [19]:
sc.pp.filter_cells(adata, min_genes=5)
sc.pp.filter_genes(adata, min_cells=5)

In [20]:

sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

# Assign all cells

In [21]:
adata.write_h5ad(os.path.join(dist_out_dir,'adata_cellchat_distance_prepped.h5ad'))

In [26]:
'Il1rl1' in adata.var.index

True

In [27]:
'Il1rap' in adata.var.index

False