In [5]:
import sys
import os

import scanpy as sc
import matplotlib.pyplot as plt
import numpy as np
import warnings

warnings.filterwarnings("ignore")

In [6]:
dist_out_dir = '/home/workspace/spatial_mouse_lung_outputs/downstream_analysis/distance'

if not os.path.exists (dist_out_dir):
    os.makedirs(dist_out_dir)

plot_out_dir = os.path.join(dist_out_dir, 'plots')
if not os.path.exists (plot_out_dir):
    os.makedirs(plot_out_dir)

In [41]:
# adata = sc.read('/projects/Kennidy/Prostate_Spatial_R2/celltype_annotated_updated.h5ad')
adata = sc.read_h5ad(os.path.join(dist_out_dir,'adata_distance_zones_structure.h5ad'))

In [42]:
adata.obs['label_fine'].unique().tolist()

['Col13a1+ fibroblast',
 'Alv Mf',
 'Cap',
 'Vein',
 'AT2',
 'Mono',
 'Th0',
 'Pericyte 2',
 'Cap-a',
 'Neut',
 'Pericyte 1',
 'Club',
 'Ciliated',
 'Art',
 'AT1',
 'CD4 naive',
 'B cell',
 'Th17',
 'Int Mf',
 'CD8 naive',
 'SMC',
 'gd T cell',
 'Plasmablast',
 'Th2',
 'Lymph',
 'Ccr7- cDC2',
 'NK cell',
 'cDC1',
 'CD4 trans',
 'Ccr7+ cDC2',
 'Th1',
 'CD8 act',
 'Mesothelial',
 'Treg',
 'Myofibroblast',
 'Col14a1+ fibroblast',
 'ILC2']

In [43]:
# focus only on TLS, adventitia, parenchyma
adata = adata[adata.obs['zone_consol'].isin(['TLS', 'adventitia', 'parenchyma']), :]
# set labels
adata.obs['zone_consol'] = adata.obs['zone_consol'].values.tolist()
adata.obs['label_fine'] = adata.obs['label_fine'].values.tolist()

In [45]:
#wherever cell_type_1 is 'T Cells', add the classification to the cell_type_1 column
# Create a mask for CD4 act
cd4_mask = adata.obs['label_fine'].isin(['Th0', 'Th1', 'Th17', 'Th2', 'Treg', 'CD4 trans'])

# make single activated T cell label per region 
adata.obs.loc[cd4_mask, 'label_fine'] = (
    'CD4 act (' + adata.obs.loc[cd4_mask, 'zone_consol'] + ')'
)

# Check the updated cell types
print("Updated T cell categories:")
print(adata.obs.loc[cd4_mask, 'label_fine'].value_counts())

# adata.obs['label_medium']


Updated T cell categories:
label_fine
CD4 act (parenchyma)    6313
CD4 act (adventitia)    2409
CD4 act (TLS)            842
Name: count, dtype: int64


In [46]:
sc.pp.filter_cells(adata, min_genes=5)
sc.pp.filter_genes(adata, min_cells=5)

In [47]:

sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

# Assign all cells

In [48]:
adata.write_h5ad(os.path.join(dist_out_dir,'adata_cellchat_prepped.h5ad'))