In [None]:
#In an anaconda environment that contains scimap, use csv files that are MCQuant outputs, as input for scimap
#Use glob function to load all csv files, and then convert them to adata
import scimap as sm
import anndata as ad
import pandas as pd
import glob
path = '/data/vasileiosionat2/IBEX_FINAL/Quantification/Outputs/'
files = glob.glob(path +'/*.csv')
filepath = files  # printing file name of desired extension
adata = sm.pp.mcmicro_to_scimap (filepath)

In [None]:
import sys
import os
import scanpy as sc
import seaborn as sns; sns.set(color_codes=True)

In [None]:
%gui qt

In [None]:
# Assuming 'adata.obs' is a pandas DataFrame-like structure
# Create the 'PatientID' column by extracting the first 5 characters from the 'imageid' column
adata.obs['PatientID'] = adata.obs['imageid'].str[:5]

# Verify the new column
print(adata.obs.head())

In [None]:
# Assuming 'adata.obs' is a pandas DataFrame-like structure
# Create the 'SampleID' column by splitting the 'imageid' column at the first underscore and taking the first part
adata.obs['SampleID'] = adata.obs['imageid'].str.split('_').str[0]

# Verify the new column
print(adata.obs.head())


In [None]:
# List of PatientIDs that should be labeled as "Perio"
perio_patients = ["HV137", "HV140", "HV154", "HV160", "HV196", "HV204", "HV214", "HV219", "TM327"]

# Create the 'ConditionID' column based on the 'PatientID' column
adata.obs['ConditionID'] = adata.obs['PatientID'].apply(lambda x: "Perio" if x in perio_patients else "Healthy")

# Verify the new column
print(adata.obs.head())

In [None]:
#list of markers
adata.var[0:20]

In [None]:
#First step of IBEX clustering is using a supervised phenotyping method
#Load your phenotyping csv that was created according to the scimap instructions
phenotype = pd.read_csv('/data/vasileiosionat2/IBEX_FINAL/Scimap/phenotypes_FINAL.csv')
phenotype

In [None]:
#Rescaling data based on unsupervised gating (GMM)
adata = sm.pp.rescale(adata, gate=None)

Scaling Vimentin
Scaling Image HV154B1_HV154B1_0
Scaling CK19
Scaling CK5
Scaling MCT
Scaling CD31
Scaling CD4
Scaling PanCK
Scaling MPO
Scaling Ki67
Scaling CD8a
Scaling Thy-1
Scaling CD3
Scaling CD45
Scaling S100a8-9
Scaling CD138
Scaling aSMA
Scaling Hoechst
Scaling CD20
Scaling HLA-DR
Scaling Vimentin
Scaling Image HV220B1_HV220B1_0
Scaling CK19
Scaling CK5
Scaling MCT
Scaling CD31
Scaling CD4
Scaling PanCK
Scaling MPO
Scaling Ki67
Scaling CD8a
Scaling Thy-1
Scaling CD3
Scaling CD45
Scaling S100a8-9
Scaling CD138
Scaling aSMA
Scaling Hoechst
Scaling CD20
Scaling HLA-DR
Scaling Vimentin
Scaling Image HV140A1_HV140A1_0
Scaling CK19
Scaling CK5
Scaling MCT
Scaling CD31
Scaling CD4
Scaling PanCK
Scaling MPO
Scaling Ki67
Scaling CD8a
Scaling Thy-1
Scaling CD3
Scaling CD45
Scaling S100a8-9
Scaling CD138
Scaling aSMA
Scaling Hoechst
Scaling CD20
Scaling HLA-DR
Scaling Vimentin
Scaling Image HV160D_HV160D_0
Scaling CK19
Scaling CK5
Scaling MCT
Scaling CD31
Scaling CD4
Scaling PanCK
Scaling

In [None]:
#Phenotyping individual cells based on the reschaled data and the supervised phenotyping strategy
adata = sm.tl.phenotype_cells(adata, phenotype=phenotype, label='phenotype') 

In [None]:
#Visualizing protein marker expression per phenotype
#These phenotypes will be subclustered with kmeans method
sc.pl.matrixplot(adata, var_names= adata.var.index, groupby='phenotype', dendrogram=False, use_raw=False, cmap="vlag", standard_scale='var')

In [None]:
#Subclustering the epithelial phenotype
adata = sm.tl.cluster (adata, k= 4, method = 'kmeans', sub_cluster = True, subset_genes=['CD138', 'CK19', 'CK5', 'PanCK','S100a8-9'],
    sub_cluster_column='phenotype', sub_cluster_group='epithelial', use_raw = False)

In [None]:
#Subclustering the fibroblast phenotype
adata = sm.tl.cluster (adata, k=4, method = 'kmeans', sub_cluster = True, 
    sub_cluster_column='kmeans', sub_cluster_group='fibroblast', use_raw = False)

In [None]:
#Subclustering the APC phenotype
adata = sm.tl.cluster (adata, k= 4, method = 'kmeans', sub_cluster = True, 
    sub_cluster_column='kmeans', sub_cluster_group='APCs-immune', use_raw = False)

In [None]:
#Subclustering the T cell phenotype
adata = sm.tl.cluster (adata, k= 6, method = 'kmeans', sub_cluster = True, 
    sub_cluster_column='kmeans', sub_cluster_group='T cell', use_raw = False)

In [None]:
#Subclustering the plasma cell phenotype
adata = sm.tl.cluster (adata, k= 4, method = 'kmeans', sub_cluster = True, 
    sub_cluster_column='kmeans', sub_cluster_group='plasma cells', use_raw = False)

In [None]:
#Subclustering the neutrophil phenotype
adata = sm.tl.cluster (adata, k= 4, method = 'kmeans', sub_cluster = True, 
    sub_cluster_column='kmeans', sub_cluster_group='neutrophils', use_raw = False)

In [None]:
#Subclustering the immune-other phenotype
adata = sm.tl.cluster (adata, k= 3, method = 'kmeans', sub_cluster = True, 
    sub_cluster_column='kmeans', sub_cluster_group='immune-other', use_raw = False)

In [None]:
#Subclustering the SMC phenotype
adata = sm.tl.cluster (adata, k= 3, method = 'kmeans', sub_cluster = True, 
    sub_cluster_column='kmeans', sub_cluster_group='SMC', use_raw = False)

In [None]:
#Subclustering the endothelial phenotype
adata = sm.tl.cluster (adata, k= 3, method = 'kmeans', sub_cluster = True, 
    sub_cluster_column='kmeans', sub_cluster_group='endothelial', use_raw = False)

In [None]:
#Subclustering the mast cell phenotype
adata = sm.tl.cluster (adata, k= 2, method = 'kmeans', sub_cluster = True, 
    sub_cluster_column='kmeans', sub_cluster_group='mast cells', use_raw = False)

In [None]:
#Subclustering the epithelial clusters based on Ki67 to uncover proliferating epithelial cells
adata = sm.tl.cluster (adata, k= 2, method = 'kmeans', sub_cluster = True, subset_genes=['Ki67'],
    sub_cluster_column='kmeans', sub_cluster_group=['epithelial-0','epithelial-1','epithelial-2','epithelial-3','immune-other-1'], use_raw = False)

In [None]:
sc.pl.matrixplot(adata, var_names= adata.var.index, groupby='kmeans', dendrogram=False, use_raw=False, cmap="vlag", standard_scale='var')

In [None]:
#Subclustering the immune-other-0
adata = sm.tl.cluster (adata, k= 2, method = 'kmeans', sub_cluster = True, subset_genes=['CD138'],
    sub_cluster_column='kmeans', sub_cluster_group='immune-other-0', use_raw = False)

In [None]:
sc.pl.matrixplot(adata, var_names= adata.var.index, groupby='kmeans', dendrogram=False, use_raw=False, cmap="vlag", standard_scale='var')

In [None]:
adata.obs['kmeans'].value_counts()

In [None]:
#subclustering plasma-cells-1
adata = sm.tl.cluster (adata, k= 2, method = 'kmeans', sub_cluster = True, subset_genes=['CD3'],
    sub_cluster_column='kmeans', sub_cluster_group='plasma cells-1', use_raw = False)

In [None]:
sc.pl.matrixplot(adata, var_names= adata.var.index, groupby='kmeans', dendrogram=False, use_raw=False, cmap="vlag", standard_scale='var')

In [None]:
#Subclustering the T-cell-3 cluster
adata = sm.tl.cluster (adata, k= 2, method = 'kmeans', sub_cluster = True, subset_genes=['CD138'],
    sub_cluster_column='kmeans', sub_cluster_group='T cell-3', use_raw = False)

In [None]:
sc.pl.matrixplot(adata, var_names= adata.var.index, groupby='kmeans', dendrogram=False, use_raw=False, cmap="vlag", standard_scale='var')

In [None]:
#Subclustering the neutrophil clusters
adata = sm.tl.cluster (adata, k= 2, method = 'kmeans', sub_cluster = True, subset_genes=['CD31','Thy-1','aSMA'],
    sub_cluster_column='kmeans', sub_cluster_group=['neutrophils-0','neutrophils-1'], use_raw = False)

In [None]:
sc.pl.matrixplot(adata, var_names= adata.var.index, groupby='kmeans', dendrogram=False, use_raw=False, cmap="vlag", standard_scale='var')

In [None]:
#Subclustering the neutrophils-0-1
adata = sm.tl.cluster (adata, k= 2, method = 'kmeans', sub_cluster = True, subset_genes=['Thy-1'],
    sub_cluster_column='kmeans', sub_cluster_group=['neutrophils-0-1'], use_raw = False)

In [None]:
sc.pl.matrixplot(adata, var_names= adata.var.index, groupby='kmeans', dendrogram=False, use_raw=False, cmap="vlag", standard_scale='var')

In [None]:
#Visually inspecting kmena clusters in relation to the marker expression with napari scimap tool

image_path1 = '/data/vasileiosionat2/IBEX_FINAL/E)Processed_files/HV137B1.tif'
seg_mask_path1 = '/data/vasileiosionat2/IBEX_FINAL/Cellprofiler_Outputs/Masks/HV137B1_0.tiff'
   
sm.pl.image_viewer(image_path1,
                   adata,
                   seg_mask=seg_mask_path1,
                   subset='HV137B1_HV137B1_0',
                   overlay='kmeans',
                   point_size=20) 


In [None]:
#Renaming and merging the kmeans cluster outputs according to their marker expression and visual inspection of the cluster
adata.obs['cluster'] = 'test'

In [None]:
APC_kmeans = ['APCs-immune-1','APCs-immune-3']
adata.obs.loc[adata.obs['kmeans'].isin(APC_kmeans), 'cluster'] = 'APC'

In [None]:
APCT_kmeans = ['APCs-immune-0']
adata.obs.loc[adata.obs['kmeans'].isin(APCT_kmeans), 'cluster'] = 'APC (proximity to blood vessel)'

In [None]:
APCT_kmeans = ['APCs-immune-2']
adata.obs.loc[adata.obs['kmeans'].isin(APCT_kmeans), 'cluster'] = 'APC (proximity to T cell)'

In [None]:
SMC_kmeans = ['SMC-0','SMC-2']
adata.obs.loc[adata.obs['kmeans'].isin(SMC_kmeans), 'cluster'] = 'SMC'

In [None]:
APSMC_kmeans = ['SMC-1']
adata.obs.loc[adata.obs['kmeans'].isin(APSMC_kmeans), 'cluster'] = 'HLA-DR(+) SMC'

In [None]:
CD8TBmix_kmeans = ['T cell-5']
adata.obs.loc[adata.obs['kmeans'].isin(CD8TBmix_kmeans), 'cluster'] = 'CD8 T cell / b cell mix'

In [None]:
Thelper_kmeans = ['T cell-2']
adata.obs.loc[adata.obs['kmeans'].isin(Thelper_kmeans), 'cluster'] = 'CD4 T cell'

In [None]:
PlasmaT_kmeans = ['T cell-3-0', 'plasma cells-1-0']
adata.obs.loc[adata.obs['kmeans'].isin(PlasmaT_kmeans), 'cluster'] = 'plasma / T cell mix'

In [None]:
B_kmeans = ['b cell']
adata.obs.loc[adata.obs['kmeans'].isin(B_kmeans), 'cluster'] = 'b cell'

In [None]:
B_kmeans = ['T cell-1']
adata.obs.loc[adata.obs['kmeans'].isin(B_kmeans), 'cluster'] = 'CD4 T cell / b cell mix'

In [None]:
Tcyt_kmeans = ['T cell-4']
adata.obs.loc[adata.obs['kmeans'].isin(Tcyt_kmeans), 'cluster'] = 'CD8 T cell'

In [None]:
IntraepiT_kmeans = ['T cell-0']
adata.obs.loc[adata.obs['kmeans'].isin(IntraepiT_kmeans), 'cluster'] = 'intraepithelial T cell'

In [None]:
VEC_kmeans = ['endothelial-2']
adata.obs.loc[adata.obs['kmeans'].isin(VEC_kmeans), 'cluster'] = 'aSMA(-) endothelial cell'

In [None]:
VEC2_kmeans = ['endothelial-1']
adata.obs.loc[adata.obs['kmeans'].isin(VEC2_kmeans), 'cluster'] = 'HLA-DR(+) endothelial cell'

In [None]:
VEC3_kmeans = ['endothelial-0']
adata.obs.loc[adata.obs['kmeans'].isin(VEC3_kmeans), 'cluster'] = 'endothelial cell'

In [None]:
BasalEpi_kmeans = ['epithelial-0-0','epithelial-1-1','immune-other-1-0','likely-SMC']
adata.obs.loc[adata.obs['kmeans'].isin(BasalEpi_kmeans), 'cluster'] = 'basal/parabasal epithelial cell'

In [None]:
SpinousEpi_kmeans = ['epithelial-3-0']
adata.obs.loc[adata.obs['kmeans'].isin(SpinousEpi_kmeans), 'cluster'] = 'spinous epithelial cell'

In [None]:
CrEpi_kmeans = ['epithelial-2-0']
adata.obs.loc[adata.obs['kmeans'].isin(CrEpi_kmeans), 'cluster'] = 'crevicular / keratin epithelial cell'

In [None]:
PrEpi_kmeans = ['epithelial-0-1','epithelial-1-0','immune-other-1-1','epithelial-3-1', 'epithelial-2-1']
adata.obs.loc[adata.obs['kmeans'].isin(PrEpi_kmeans), 'cluster'] = 'proliferating epithelial cell'

In [None]:
Fibro_kmeans = ['fibroblast-2']
adata.obs.loc[adata.obs['kmeans'].isin(Fibro_kmeans), 'cluster'] = 'fibroblast'

In [None]:
ECM_kmeans = ['fibroblast-0']
adata.obs.loc[adata.obs['kmeans'].isin(ECM_kmeans), 'cluster'] = 'Extracellular matrix / Vim(+)'

In [None]:
Fibro2_kmeans = ['fibroblast-1']
adata.obs.loc[adata.obs['kmeans'].isin(Fibro2_kmeans), 'cluster'] = 'Thy-1(-) fibroblast'

In [None]:
Fibro3_kmeans = ['fibroblast-3']
adata.obs.loc[adata.obs['kmeans'].isin(Fibro3_kmeans), 'cluster'] = 'HLA-DR(+) fibroblast'

In [None]:
plasma_kmeans = ['plasma cells-2','plasma cells-3','immune-other-0-0']
adata.obs.loc[adata.obs['kmeans'].isin(plasma_kmeans), 'cluster'] = 'plasma cell'

In [None]:
plasmaT_kmeans = ['plasma cells-1-1']
adata.obs.loc[adata.obs['kmeans'].isin(plasmaT_kmeans), 'cluster'] = 'plasma cell / proximity to T cell'

In [None]:
BVimmune_kmeans = ['immune-other-0-1','plasma cells-0', 'T cell-3-1']
adata.obs.loc[adata.obs['kmeans'].isin(BVimmune_kmeans), 'cluster'] = 'endothelial / immune mix'

In [None]:
immuneother_kmeans = ['immune-other-2']
adata.obs.loc[adata.obs['kmeans'].isin(immuneother_kmeans), 'cluster'] = 'Thy-1(+) / CD45(+) cell'

In [None]:
LC_kmeans = ['likely-APCs-immune',]
adata.obs.loc[adata.obs['kmeans'].isin(LC_kmeans), 'cluster'] = 'intraepithelial APC'

In [None]:
unknown_kmeans = ['likely-endothelial']
adata.obs.loc[adata.obs['kmeans'].isin(unknown_kmeans), 'cluster'] = 'Unknown'

In [None]:
mast_kmeans = ['mast cells-0','mast cells-1']
adata.obs.loc[adata.obs['kmeans'].isin(mast_kmeans), 'cluster'] = 'mast cell'

In [None]:
Neut1_kmeans = ['neutrophils-1-1','neutrophils-2']
adata.obs.loc[adata.obs['kmeans'].isin(Neut1_kmeans), 'cluster'] = 'S100a8/9(-) neutrophil'

In [None]:
Neut2_kmeans = ['neutrophils-0-0','neutrophils-0-1-0']
adata.obs.loc[adata.obs['kmeans'].isin(Neut2_kmeans), 'cluster'] = 'S100a8/9(+) neutrophil, CT'

In [None]:
Neut3_kmeans = ['neutrophils-3']
adata.obs.loc[adata.obs['kmeans'].isin(Neut3_kmeans), 'cluster'] = 'intraepithelial neutrophil'

In [None]:
Neut4_kmeans = ['Unknown','neutrophils-0-1-1','neutrophils-1-0']
adata.obs.loc[adata.obs['kmeans'].isin(Neut4_kmeans), 'cluster'] = 'intravascular neutrophil'

In [None]:
adata.obs['cluster'].value_counts()

In [None]:
sc.pl.matrixplot(adata, var_names= adata.var.index, groupby='cluster', dendrogram=False, use_raw=False, cmap="vlag", standard_scale='var')

In [None]:
#Visualize renamed/merged clusters in Napari.
image_path1 = '/data/vasileiosionat2/IBEX_FINAL/E)Processed_files/HV196B.tif'
seg_mask_path1 = '/data/vasileiosionat2/IBEX_FINAL/Cellprofiler_Outputs/Masks/HV196B_0.tiff'
   
#Let's visualize the subpoputions (white points) superimposed to the different channels and to the segmentation mask
sm.pl.image_viewer(image_path1,
                   adata,
                   seg_mask=seg_mask_path1,
                   subset='HV196B_HV196B_0',
                   overlay='cluster',
                   point_size=20)


In [None]:
# Duplicate the 'cluster' column and rename the new column to 'spatial_cluster'
#This will be the most fine-grained cell annotations. Spatial cluster indicates that we incorporated the pathologist annotations that are shown below
adata.obs['spatial_cluster'] = adata.obs['cluster']

In [None]:
#Pathologist annotations to facilitate epithelial characterization
#Iteratively inspect each sample in napari and select the superficial layers of the oral/surface epithelium.
# List of sample names
sample_names = ['HV196B', 'HV203B', 'HV189B', 'HV180B2', 'HV180B1', 'HV140A2', 'HV140A1', 'HV214A']

# Update paths and parameters for each sample
adata = sm.pl.addROI_image('/data/vasileiosionat2/IBEX_FINAL/E)Processed_files/HV196B.tif', adata, 
                            subset='HV196B_HV196B_0', 
                            imageid='imageid',               
                            overlay=None, overlay_category=None,
                            markers=None,
                            channel_names='default', 
                            x_coordinate='X_centroid', y_coordinate='Y_centroid', 
                            seg_mask='/data/vasileiosionat2/IBEX_FINAL/Cellprofiler_Outputs/Masks/HV196B_0.tiff', 
                            overwrite=False, 
                            label='Surface_Epi')

adata = sm.pl.addROI_image('/data/vasileiosionat2/IBEX_FINAL/E)Processed_files/HV203B.tif', adata, 
                            subset='HV203B_HV203B_0', 
                            imageid='imageid',               
                            overlay=None, overlay_category=None,
                            markers=None,
                            channel_names='default', 
                            x_coordinate='X_centroid', y_coordinate='Y_centroid', 
                            seg_mask='/data/vasileiosionat2/IBEX_FINAL/Cellprofiler_Outputs/Masks/HV203B_0.tiff', 
                            overwrite=False, 
                            label='Surface_Epi')

adata = sm.pl.addROI_image('/data/vasileiosionat2/IBEX_FINAL/E)Processed_files/HV189B.tif', adata, 
                            subset='HV189B_HV189B_0', 
                            imageid='imageid',               
                            overlay=None, overlay_category=None,
                            markers=None,
                            channel_names='default', 
                            x_coordinate='X_centroid', y_coordinate='Y_centroid', 
                            seg_mask='/data/vasileiosionat2/IBEX_FINAL/Cellprofiler_Outputs/Masks/HV189B_0.tiff', 
                            overwrite=False, 
                            label='Surface_Epi')

adata = sm.pl.addROI_image('/data/vasileiosionat2/IBEX_FINAL/E)Processed_files/HV180B2.tif', adata, 
                            subset='HV180B2_HV180B2_0', 
                            imageid='imageid',               
                            overlay=None, overlay_category=None,
                            markers=None,
                            channel_names='default', 
                            x_coordinate='X_centroid', y_coordinate='Y_centroid', 
                            seg_mask='/data/vasileiosionat2/IBEX_FINAL/Cellprofiler_Outputs/Masks/HV180B2_0.tiff', 
                            overwrite=False, 
                            label='Surface_Epi')

adata = sm.pl.addROI_image('/data/vasileiosionat2/IBEX_FINAL/E)Processed_files/HV180B1.tif', adata, 
                            subset='HV180B1_HV180B1_0', 
                            imageid='imageid',               
                            overlay=None, overlay_category=None,
                            markers=None,
                            channel_names='default', 
                            x_coordinate='X_centroid', y_coordinate='Y_centroid', 
                            seg_mask='/data/vasileiosionat2/IBEX_FINAL/Cellprofiler_Outputs/Masks/HV180B1_0.tiff', 
                            overwrite=False, 
                            label='Surface_Epi')

adata = sm.pl.addROI_image('/data/vasileiosionat2/IBEX_FINAL/E)Processed_files/HV140A2.tif', adata, 
                            subset='HV140A2_HV140A2_0', 
                            imageid='imageid',               
                            overlay=None, overlay_category=None,
                            markers=None,
                            channel_names='default', 
                            x_coordinate='X_centroid', y_coordinate='Y_centroid', 
                            seg_mask='/data/vasileiosionat2/IBEX_FINAL/Cellprofiler_Outputs/Masks/HV140A2_0.tiff', 
                            overwrite=False, 
                            label='Surface_Epi')

adata = sm.pl.addROI_image('/data/vasileiosionat2/IBEX_FINAL/E)Processed_files/HV140A1.tif', adata, 
                            subset='HV140A1_HV140A1_0', 
                            imageid='imageid',               
                            overlay=None, overlay_category=None,
                            markers=None,
                            channel_names='default', 
                            x_coordinate='X_centroid', y_coordinate='Y_centroid', 
                            seg_mask='/data/vasileiosionat2/IBEX_FINAL/Cellprofiler_Outputs/Masks/HV140A1_0.tiff', 
                            overwrite=False, 
                            label='Surface_Epi')

adata = sm.pl.addROI_image('/data/vasileiosionat2/IBEX_FINAL/E)Processed_files/HV214A.tif', adata, 
                            subset='HV214A_HV214A_0', 
                            imageid='imageid',               
                            overlay=None, overlay_category=None,
                            markers=None,
                            channel_names='default', 
                            x_coordinate='X_centroid', y_coordinate='Y_centroid', 
                            seg_mask='/data/vasileiosionat2/IBEX_FINAL/Cellprofiler_Outputs/Masks/HV214A_0.tiff', 
                            overwrite=False, 
                            label='Surface_Epi')


In [None]:
# List of additional sample names
additional_sample_names = ['HV207B', 'HV195A2', 'HV219B1', 'HV137B2', 'HV137B1', 'HV219B2', 'HV154B2', 'HV219B3', 'HV154B1', 'HV195A1']

# Update paths and parameters for each additional sample
adata = sm.pl.addROI_image('/data/vasileiosionat2/IBEX_FINAL/E)Processed_files/HV207B.tif', adata, 
                            subset='HV207B_HV207B_0', 
                            imageid='imageid',               
                            overlay=None, overlay_category=None,
                            markers=None,
                            channel_names='default', 
                            x_coordinate='X_centroid', y_coordinate='Y_centroid', 
                            seg_mask='/data/vasileiosionat2/IBEX_FINAL/Cellprofiler_Outputs/Masks/HV207B_0.tiff', 
                            overwrite=False, 
                            label='Surface_Epi')

adata = sm.pl.addROI_image('/data/vasileiosionat2/IBEX_FINAL/E)Processed_files/HV195A2.tif', adata, 
                            subset='HV195A2_HV195A2_0', 
                            imageid='imageid',               
                            overlay=None, overlay_category=None,
                            markers=None,
                            channel_names='default', 
                            x_coordinate='X_centroid', y_coordinate='Y_centroid', 
                            seg_mask='/data/vasileiosionat2/IBEX_FINAL/Cellprofiler_Outputs/Masks/HV195A2_0.tiff', 
                            overwrite=False, 
                            label='Surface_Epi')

adata = sm.pl.addROI_image('/data/vasileiosionat2/IBEX_FINAL/E)Processed_files/HV219B1.tif', adata, 
                            subset='HV219B1_HV219B1_0', 
                            imageid='imageid',               
                            overlay=None, overlay_category=None,
                            markers=None,
                            channel_names='default', 
                            x_coordinate='X_centroid', y_coordinate='Y_centroid', 
                            seg_mask='/data/vasileiosionat2/IBEX_FINAL/Cellprofiler_Outputs/Masks/HV219B1_0.tiff', 
                            overwrite=False, 
                            label='Surface_Epi')

adata = sm.pl.addROI_image('/data/vasileiosionat2/IBEX_FINAL/E)Processed_files/HV137B2.tif', adata, 
                            subset='HV137B2_HV137B2_0', 
                            imageid='imageid',               
                            overlay=None, overlay_category=None,
                            markers=None,
                            channel_names='default', 
                            x_coordinate='X_centroid', y_coordinate='Y_centroid', 
                            seg_mask='/data/vasileiosionat2/IBEX_FINAL/Cellprofiler_Outputs/Masks/HV137B2_0.tiff', 
                            overwrite=False, 
                            label='Surface_Epi')

adata = sm.pl.addROI_image('/data/vasileiosionat2/IBEX_FINAL/E)Processed_files/HV137B1.tif', adata, 
                            subset='HV137B1_HV137B1_0', 
                            imageid='imageid',               
                            overlay=None, overlay_category=None,
                            markers=None,
                            channel_names='default', 
                            x_coordinate='X_centroid', y_coordinate='Y_centroid', 
                            seg_mask='/data/vasileiosionat2/IBEX_FINAL/Cellprofiler_Outputs/Masks/HV137B1_0.tiff', 
                            overwrite=False, 
                            label='Surface_Epi')

adata = sm.pl.addROI_image('/data/vasileiosionat2/IBEX_FINAL/E)Processed_files/HV219B2.tif', adata, 
                            subset='HV219B2_HV219B2_0', 
                            imageid='imageid',               
                            overlay=None, overlay_category=None,
                            markers=None,
                            channel_names='default', 
                            x_coordinate='X_centroid', y_coordinate='Y_centroid', 
                            seg_mask='/data/vasileiosionat2/IBEX_FINAL/Cellprofiler_Outputs/Masks/HV219B2_0.tiff', 
                            overwrite=False, 
                            label='Surface_Epi')

adata = sm.pl.addROI_image('/data/vasileiosionat2/IBEX_FINAL/E)Processed_files/HV154B2.tif', adata, 
                            subset='HV154B2_HV154B2_0', 
                            imageid='imageid',               
                            overlay=None, overlay_category=None,
                            markers=None,
                            channel_names='default', 
                            x_coordinate='X_centroid', y_coordinate='Y_centroid', 
                            seg_mask='/data/vasileiosionat2/IBEX_FINAL/Cellprofiler_Outputs/Masks/HV154B2_0.tiff', 
                            overwrite=False, 
                            label='Surface_Epi')

adata = sm.pl.addROI_image('/data/vasileiosionat2/IBEX_FINAL/E)Processed_files/HV219B3.tif', adata, 
                            subset='HV219B3_HV219B3_0', 
                            imageid='imageid',               
                            overlay=None, overlay_category=None,
                            markers=None,
                            channel_names='default', 
                            x_coordinate='X_centroid', y_coordinate='Y_centroid', 
                            seg_mask='/data/vasileiosionat2/IBEX_FINAL/Cellprofiler_Outputs/Masks/HV219B3_0.tiff', 
                            overwrite=False, 
                            label='Surface_Epi')

adata = sm.pl.addROI_image('/data/vasileiosionat2/IBEX_FINAL/E)Processed_files/HV154B1.tif', adata, 
                            subset='HV154B1_HV154B1_0', 
                            imageid='imageid',               
                            overlay=None, overlay_category=None,
                            markers=None,
                            channel_names='default', 
                            x_coordinate='X_centroid', y_coordinate='Y_centroid', 
                            seg_mask='/data/vasileiosionat2/IBEX_FINAL/Cellprofiler_Outputs/Masks/HV154B1_0.tiff', 
                            overwrite=False, 
                            label='Surface_Epi')

adata = sm.pl.addROI_image('/data/vasileiosionat2/IBEX_FINAL/E)Processed_files/HV195A1.tif', adata, 
                            subset='HV195A1_HV195A1_0', 
                            imageid='imageid',               
                            overlay=None, overlay_category=None,
                            markers=None,
                            channel_names='default', 
                            x_coordinate='X_centroid', y_coordinate='Y_centroid', 
                            seg_mask='/data/vasileiosionat2/IBEX_FINAL/Cellprofiler_Outputs/Masks/HV195A1_0.tiff', 
                            overwrite=False, 
                            label='Surface_Epi')


In [None]:
# List of additional sample names
additional_sample_names = ['HV214A2surg', 'HV204B', 'HV220B2', 'HV214A1surg', 'HV140B', 'TM327I', 'HV190B', 'HV171B', 'HV191B', 'HV220B1', 'HV160D']

# Update paths and parameters for each additional sample
adata = sm.pl.addROI_image('/data/vasileiosionat2/IBEX_FINAL/E)Processed_files/HV214A2surg.tif', adata, 
                            subset='HV214A2surg_HV214A2surg_0', 
                            imageid='imageid',               
                            overlay=None, overlay_category=None,
                            markers=None,
                            channel_names='default', 
                            x_coordinate='X_centroid', y_coordinate='Y_centroid', 
                            seg_mask='/data/vasileiosionat2/IBEX_FINAL/Cellprofiler_Outputs/Masks/HV214A2surg_0.tiff', 
                            overwrite=False, 
                            label='Surface_Epi')

adata = sm.pl.addROI_image('/data/vasileiosionat2/IBEX_FINAL/E)Processed_files/HV204B.tif', adata, 
                            subset='HV204B_HV204B_0', 
                            imageid='imageid',               
                            overlay=None, overlay_category=None,
                            markers=None,
                            channel_names='default', 
                            x_coordinate='X_centroid', y_coordinate='Y_centroid', 
                            seg_mask='/data/vasileiosionat2/IBEX_FINAL/Cellprofiler_Outputs/Masks/HV204B_0.tiff', 
                            overwrite=False, 
                            label='Surface_Epi')

adata = sm.pl.addROI_image('/data/vasileiosionat2/IBEX_FINAL/E)Processed_files/HV220B2.tif', adata, 
                            subset='HV220B2_HV220B2_0', 
                            imageid='imageid',               
                            overlay=None, overlay_category=None,
                            markers=None,
                            channel_names='default', 
                            x_coordinate='X_centroid', y_coordinate='Y_centroid', 
                            seg_mask='/data/vasileiosionat2/IBEX_FINAL/Cellprofiler_Outputs/Masks/HV220B2_0.tiff', 
                            overwrite=False, 
                            label='Surface_Epi')

adata = sm.pl.addROI_image('/data/vasileiosionat2/IBEX_FINAL/E)Processed_files/HV214A1surg.tif', adata, 
                            subset='HV214A1surg_HV214A1surg_0', 
                            imageid='imageid',               
                            overlay=None, overlay_category=None,
                            markers=None,
                            channel_names='default', 
                            x_coordinate='X_centroid', y_coordinate='Y_centroid', 
                            seg_mask='/data/vasileiosionat2/IBEX_FINAL/Cellprofiler_Outputs/Masks/HV214A1surg_0.tiff', 
                            overwrite=False, 
                            label='Surface_Epi')

adata = sm.pl.addROI_image('/data/vasileiosionat2/IBEX_FINAL/E)Processed_files/HV140B.tif', adata, 
                            subset='HV140B_HV140B_0', 
                            imageid='imageid',               
                            overlay=None, overlay_category=None,
                            markers=None,
                            channel_names='default', 
                            x_coordinate='X_centroid', y_coordinate='Y_centroid', 
                            seg_mask='/data/vasileiosionat2/IBEX_FINAL/Cellprofiler_Outputs/Masks/HV140B_0.tiff', 
                            overwrite=False, 
                            label='Surface_Epi')

adata = sm.pl.addROI_image('/data/vasileiosionat2/IBEX_FINAL/E)Processed_files/TM327I.tif', adata, 
                            subset='TM327I_TM327I_0', 
                            imageid='imageid',               
                            overlay=None, overlay_category=None,
                            markers=None,
                            channel_names='default', 
                            x_coordinate='X_centroid', y_coordinate='Y_centroid', 
                            seg_mask='/data/vasileiosionat2/IBEX_FINAL/Cellprofiler_Outputs/Masks/TM327I_0.tiff', 
                            overwrite=False, 
                            label='Surface_Epi')

adata = sm.pl.addROI_image('/data/vasileiosionat2/IBEX_FINAL/E)Processed_files/HV190B.tif', adata, 
                            subset='HV190B_HV190B_0', 
                            imageid='imageid',               
                            overlay=None, overlay_category=None,
                            markers=None,
                            channel_names='default', 
                            x_coordinate='X_centroid', y_coordinate='Y_centroid', 
                            seg_mask='/data/vasileiosionat2/IBEX_FINAL/Cellprofiler_Outputs/Masks/HV190B_0.tiff', 
                            overwrite=False, 
                            label='Surface_Epi')

adata = sm.pl.addROI_image('/data/vasileiosionat2/IBEX_FINAL/E)Processed_files/HV171B.tif', adata, 
                            subset='HV171B_HV171B_0', 
                            imageid='imageid',               
                            overlay=None, overlay_category=None,
                            markers=None,
                            channel_names='default', 
                            x_coordinate='X_centroid', y_coordinate='Y_centroid', 
                            seg_mask='/data/vasileiosionat2/IBEX_FINAL/Cellprofiler_Outputs/Masks/HV171B_0.tiff', 
                            overwrite=False, 
                            label='Surface_Epi')

adata = sm.pl.addROI_image('/data/vasileiosionat2/IBEX_FINAL/E)Processed_files/HV191B.tif', adata, 
                            subset='HV191B_HV191B_0', 
                            imageid='imageid',               
                            overlay=None, overlay_category=None,
                            markers=None,
                            channel_names='default', 
                            x_coordinate='X_centroid', y_coordinate='Y_centroid', 
                            seg_mask='/data/vasileiosionat2/IBEX_FINAL/Cellprofiler_Outputs/Masks/HV191B_0.tiff', 
                            overwrite=False, 
                            label='Surface_Epi')

adata = sm.pl.addROI_image('/data/vasileiosionat2/IBEX_FINAL/E)Processed_files/HV220B1.tif', adata, 
                            subset='HV220B1_HV220B1_0', 
                            imageid='imageid',               
                            overlay=None, overlay_category=None,
                            markers=None,
                            channel_names='default', 
                            x_coordinate='X_centroid', y_coordinate='Y_centroid', 
                            seg_mask='/data/vasileiosionat2/IBEX_FINAL/Cellprofiler_Outputs/Masks/HV220B1_0.tiff', 
                            overwrite=False, 
                            label='Surface_Epi')

adata = sm.pl.addROI_image('/data/vasileiosionat2/IBEX_FINAL/E)Processed_files/HV160D.tif', adata, 
                            subset='HV160D_HV160D_0', 
                            imageid='imageid',               
                            overlay=None, overlay_category=None,
                            markers=None,
                            channel_names='default', 
                            x_coordinate='X_centroid', y_coordinate='Y_centroid', 
                            seg_mask='/data/vasileiosionat2/IBEX_FINAL/Cellprofiler_Outputs/Masks/HV160D_0.tiff', 
                            overwrite=False, 
                            label='Surface_Epi')

In [None]:
#First, add the new category to the 'spatial_cluster' column
adata.obs['spatial_cluster'] = adata.obs['spatial_cluster'].cat.add_categories(['keratin epithelial cell'])

# Define the condition for updating 'keratin epithelial cell'
keratin_condition = (adata.obs['cluster'] == 'crevicular / keratin epithelial cell') & (adata.obs['Surface_Epi'] == 'Surface_Epi')

# Update the 'spatial_cluster' column where the condition is met
adata.obs.loc[keratin_condition, 'spatial_cluster'] = 'keratin epithelial cell'

In [None]:
#Because "APC - proximity to T"l was more of a mixture of APCs with T cells, we performed kmeans for CD3 to separate those that are more T cells from those that are more APCs. 

adata = sm.tl.cluster (adata, k= 2, method = 'kmeans', sub_cluster = True, subset_genes=['CD3'], label='kmeans_new',
    sub_cluster_column='spatial_cluster', sub_cluster_group=['APC (proximity to T cell)'], use_raw = False)

In [None]:
#In the event there are some B cells within the Thelper-B mix, we reclustered based on CD20. 

adata = sm.tl.cluster (adata, k= 2, method = 'kmeans', sub_cluster = True, subset_genes=['CD20'], label='kmeans_new',
    sub_cluster_column='spatial_cluster', sub_cluster_group=['Thelper-B mix'], use_raw = False)

In [None]:
# First, add the new category to the 'spatial_cluster' column
adata.obs['spatial_cluster'] = adata.obs['spatial_cluster'].cat.add_categories(['T cell (proximity to APC)'])

# Now, define the condition
unknown2_condition = (adata.obs['spatial_cluster'] == 'APC (proximity to T cell)') & (adata.obs['kmeans_new'] == 'APC (proximity to T cell)-0')

# Update the 'spatial_cluster' column where the condition is met
adata.obs.loc[unknown2_condition, 'spatial_cluster'] = 'T cell (proximity to APC)'

In [None]:
adata = sm.tl.cluster(
    adata, 
    k=2,  # Set the number of clusters for k-means
    method='kmeans',  # Use k-means clustering
    sub_cluster=True,  # Enable sub-clustering
    subset_genes=['Ki67'],  # Specify the genes to use for clustering
    sub_cluster_column='spatial_cluster',  # Save the sub-clustering results in this column
    sub_cluster_group='Plasma',  # Perform sub-clustering only within the 'Plasma' group
    use_raw=False,  # Use processed data, not raw data
    label= 'spatial_cluster'
)

In [None]:
adata.obs['spatial_cluster'] = adata.obs['spatial_cluster'].astype(str)
# Convert the 'niche_cc14' column to categorical
adata.obs['spatial_cluster'] = pd.Categorical(adata.obs['spatial_cluster'])

# Define the replacement dictionary
replacement_dict = {
    'Plasma-0': 'Plasma',
    'Plasma-1': 'PB',
}


# Replace values in the 'niche_cc14' column
adata.obs['spatial_cluster'] = adata.obs['spatial_cluster'].replace(replacement_dict)
print(adata.obs['spatial_cluster'].unique())

In [None]:
# Assuming 'adata' is your AnnData object
adata.write_h5ad("/data/vasileiosionat2/IBEX_FINAL/Scimap/Adata/adata.h5ad")