In [None]:
#### ------------------------------------------------------------------------------------
#### Use scanpy.yml environment
#### If looking to propagate labels for a larger dataset, 
#### see <link to Chris' RAPIDS notebook> for GPU implementation of KNN label propagation
#### -------------------------------------------------------------------------------------

import os
import re 
import numpy as np 
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt 
from sklearn.neighbors import KNeighborsClassifier

### Propagating tracked labels to untracked cells on the immune panel

In [None]:
## inputs 
# spatial coords 
immune_spatial = pd.read_csv('data/E06_immune_spatial.csv')

# cell type annotations 
immune_annots_df = pd.read_csv(f'annotations/E06_immune_annotations.csv')
immune_annots_map = pd.Series(immune_annots_df.cell_type_refined.values, index = immune_annots_df.cluster).to_dict()

tracked_annots_df = pd.read_csv(f'annotations/E06_tracked_annotations.csv')
tracked_annots_map = pd.Series(tracked_annots_df.cell_type_refined.values, index = tracked_annots_df.cluster).to_dict()

# data files
immune = pd.read_csv('data/E06_immune_indiv_leiden.csv')
tracked = pd.read_csv('data/E06_tracked_indiv_leiden.csv')

In [None]:
# map cell types to leiden clusters 
immune['cell_type_immune'] = immune['leiden'].map(immune_annots_map)
tracked['cell_type_tracked'] = tracked['leiden'].map(tracked_annots_map)

# merge cluster assignments from immune panels into tracked data 
tracked = pd.merge(tracked, immune[['CellID','cell_type_immune']], left_on = 'immune_CellID', right_on = 'CellID')

# merge spatial coords into immune dataframe 
immune = pd.merge(immune, immune_spatial, on = 'CellID')

# set CellID as index 
immune_sub = immune.set_index('CellID')

# subset to markers
immune_markers = ['CD3', 'GRZB', 'Ki67', 'PanCK', 'CD45', 'CD68', 'CD3d', 'CD8a',
       'CD163', 'aSMA', 'CD14', 'CD4', 'FOXP3', 'CD11b', 'CD20', 'MHC_II_DPB1']

immune_sub = immune_sub[immune_markers]

In [None]:
# split into tracked and untracked 
immune_tracked = immune_sub[immune_sub.index.isin(tracked['immune_CellID'])]
immune_untracked = immune_sub[~immune_sub.index.isin(tracked['immune_CellID'])]

# grab the labels to train on from the labeled annotations 
label_df = pd.merge(
    immune_tracked, 
    tracked[['immune_CellID','cell_type_tracked']],
    left_index = True,
    right_on = 'immune_CellID')

labs = label_df['cell_type_tracked']

# train, fit, predict KNN 
model = KNeighborsClassifier(n_neighbors=10)
model.fit(immune_tracked, labs)
y_hat = model.predict(immune_untracked)

# assign predicted labels 
immune_tracked['cross_cluster'] = list(labs)
immune_untracked['cross_cluster'] = list(y_hat)

# recombined tracked and untracked cells 
recombined_df = pd.concat([immune_tracked, immune_untracked])

# merge with spatial 
recombined_df.reset_index()
recombined_df = pd.merge(recombined_df, immune_spatial, on = 'CellID')

# merge immune cell type into recombined_df 
recombined_df = pd.merge(recombined_df, immune[['CellID','cell_type_immune']], on = 'CellID')

In [None]:
recombined_df.to_csv('data/E06_immune_label_propagated_spatial.csv', index = False)

### Propagating tracked labels to untracked cells on the tumor panel

In [None]:
## inputs 
# spatial coords 
tumor_spatial = pd.read_csv('data/E06_tumor_spatial.csv')

# cell type annotations 
tumor_annots_df = pd.read_csv(f'annotations/E06_tumor_annotations.csv')
tumor_annots_map = pd.Series(tumor_annots_df.cell_type_refined.values, index = tumor_annots_df.cluster).to_dict()

tracked_annots_df = pd.read_csv(f'annotations/E06_tracked_annotations.csv')
tracked_annots_map = pd.Series(tracked_annots_df.cell_type_refined.values, index = tracked_annots_df.cluster).to_dict()

# data files
tumor = pd.read_csv('data/E06_tumor_indiv_leiden.csv')
tracked = pd.read_csv('data/E06_tracked_indiv_leiden.csv')

In [None]:
# map cell types to leiden clusters 
tumor['cell_type_tumor'] = tumor['leiden'].map(tumor_annots_map)
tracked['cell_type_tracked'] = tracked['leiden'].map(tracked_annots_map)

# merge cluster assignments from tumor panels into tracked data 
tracked = pd.merge(tracked, tumor[['CellID','cell_type_tumor']], left_on = 'tumor_CellID', right_on = 'CellID')

# merge spatial coords into tumor dataframe 
tumor = pd.merge(tumor, tumor_spatial, on = 'CellID')

# set CellID as index 
tumor_sub = tumor.set_index('CellID')

# subset to markers
tumor_markers = ['CD3',  'Vimentin', 'aSMA', 'Ecad', 'Ki67', 'CD45', 
           'CK14', 'CK19', 'CK17', 'PCNA', 'PanCK', 'CD31']

tumor_sub = tumor_sub[tumor_markers]

In [None]:
# split into tracked and untracked 
tumor_tracked = tumor_sub[tumor_sub.index.isin(tracked['tumor_CellID'])]
tumor_untracked = tumor_sub[~tumor_sub.index.isin(tracked['tumor_CellID'])]

# grab the labels to train on from the labeled annotations 
label_df = pd.merge(
    tumor_tracked, 
    tracked[['tumor_CellID','cell_type_tracked']],
    left_index = True,
    right_on = 'tumor_CellID')

labs = label_df['cell_type_tracked']

# train, fit, predict KNN 
model = KNeighborsClassifier(n_neighbors=10)
model.fit(tumor_tracked, labs)
y_hat = model.predict(tumor_untracked)

# assign predicted labels 
tumor_tracked['cross_cluster'] = list(labs)
tumor_untracked['cross_cluster'] = list(y_hat)

# recombined tracked and untracked cells 
recombined_df = pd.concat([tumor_tracked, tumor_untracked])

# merge with spatial 
recombined_df.reset_index()
recombined_df = pd.merge(recombined_df, tumor_spatial, on = 'CellID')

# merge tumor cell type into recombined_df 
recombined_df = pd.merge(recombined_df, tumor[['CellID','cell_type_tumor']], on = 'CellID')

In [None]:
recombined_df.to_csv('data/E06_tumor_label_propagated_spatial.csv', index = False)

### Create spatial scatter plots of pre- and post-label propagation (Figure 3D)

In [None]:
# cell type colors for consistency across figures 
cell_type_colors = {
    'Epithelial' : '#d60000', 
    'Basal Epithelial' : '#8c3bff', 
    'Proliferative Epithelial' : '#018700',
    'Proliferative Basal Epithelial' : '#00acc6', 
    'Myoepithelial' : '#97ff00', 
    'Stroma' : '#ff7ed1', 
    'Endothelial' : '#6b004f',
    'aSMA+ Stroma' : '#ffa52f', 
    'B-cells' : '#00009c',
    'T-cells' : '#857067', 
    'CD8+ T-cells' : '#004942', 
    'CD4+ T-cells' : '#4f2a00',
    'Regulatory T-cells' : '#00fdcf', 
    'Macrophages' : '#bcb6ff',
    'CD163+ Macrophages' : '#95b379'   
}

In [None]:
immune = pd.read_csv('data/E06_immune_label_propagated_spatial.csv')
tumor = pd.read_csv('data/E06_tumor_label_propagated_spatial.csv')

In [None]:
immune.columns = ['CellID', 'CD3', 'GRZB', 'Ki67', 'PanCK', 'CD45', 'CD68', 'CD3d',
       'CD8a', 'CD163', 'aSMA', 'CD14', 'CD4', 'FOXP3', 'CD11b', 'CD20',
       'MHC_II_DPB1', 'cross_cluster', 'x', 'y', 'cell_type_individual']

In [None]:
tumor.columns = ['CellID', 'CD3', 'Vimentin', 'aSMA', 'Ecad', 'Ki67', 'CD45', 'CK14',
       'CK19', 'CK17', 'PCNA', 'PanCK', 'CD31', 'cross_cluster', 'x', 'y',
       'cell_type_individual']

In [None]:
plt.style.use("dark_background")
fig,ax = plt.subplots(
    ncols=2, 
    nrows=2, 
    gridspec_kw={'wspace' : 0.1, 'hspace' : 0.1},
    figsize = (8,8))

panels = [immune, tumor]
labels = ['cell_type_individual','cross_cluster']

for i,p in enumerate(panels):
    for j,l in enumerate(labels):

        sns.scatterplot(
            x = p['y'], 
            y = p['x'], 
            hue = p[l],
            palette = cell_type_colors,
            hue_order = list(cell_type_colors.keys()),
            linewidth = 0,
            s = 2,
            ax = ax[j,i])

        ax[j,i].invert_xaxis()
        # ax[j,i].set_aspect('equal')
        ax[j,i].tick_params(left = False, right = False , labelleft = False, labelbottom = False, bottom = False) 
        ax[j,i].set_ylabel('')
        ax[j,i].set_xlabel('')


        ax[j,i].get_legend().set_visible(False)


ax[0,0].set_title('Slide 1')
ax[0,1].set_title('Slide 2')

ax[0,0].set_ylabel('Individual Panel')
ax[1,0].set_ylabel('Combined Panel')


plt.savefig('Figure_3D.png', dpi = 500)