In [None]:
#### ---------------
#### Use scanpy.yml environment
#### ---------------

import os
import sys 
import warnings

import numpy as np 
import pandas as pd 
import anndata as ad
import scanpy as sc

import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D

from scipy import stats 
from sklearn.neighbors import KDTree
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

In [None]:
def count_neighbors_by_type(df, radius, x, y, cell_type_labels):

    """Generate neighborhood matrix within radius. subset query cells of interest"""

    print(f"Finding {cell_type_labels} neighbors in {radius}um")

    phenotypes = df[cell_type_labels]

    # Create tree for queries, get neighbors 
    cell_coords = df[[x, y]]
    kdt = KDTree(cell_coords)
    neighbors = kdt.query_radius(cell_coords, r=radius)

    # Remove the query cell from the neighbors list, which is the index of the neighbors list
    neighbors = [n[n != i] for i, n in enumerate(neighbors)]

    # Count phenotypes in neighbors
    def count_phenotypes(n):
        p, c = np.unique(phenotypes[n], return_counts=True)
        return dict(zip(p, c))

    neighbor_matrix = np.array(list(map(count_phenotypes, neighbors)))
    
    # Create a temp dataframe with all neighbors data and total neighbors
    neighbor_df = pd.DataFrame(list(neighbor_matrix)).fillna(0)
    neighbor_df.columns = ["%s" % n for n in neighbor_df.columns]
    neighbor_df['total_neighbors'] = neighbor_df.sum(axis=1)
    neighbor_df['query_cell_type'] = list(phenotypes)
    
    return neighbor_df

In [None]:
immune = pd.read_csv('data/E06_immune_label_propagated_spatial.csv')
tumor = pd.read_csv('data/E06_tumor_label_propagated_spatial.csv')
tracked = pd.read_csv('data/E06_tracked_indiv_leiden.csv')

In [None]:
RADIUS = 30 #microns 

panels = {'immune' : immune, 'tumor' : tumor}

ndf_matrices = {}
for panel in panels:

    df = panels[panel]

    # convert X an Y centroids from px to um 
    df['x_um'] = df['x'] * 0.65
    df['y_um'] = df['y'] * 0.65

    # generate the neighborhood matrix 
    ndf_matrices[panel] = count_neighbors_by_type(
        df, 
        radius = RADIUS, 
        x = 'x_um', 
        y = 'y_um', 
        cell_type_labels = 'cross_cluster')

    # subset the neighborhood matrix to query cells only and merge with tracked CellID 
    ndf_matrices[panel][f'{panel}_CellID'] = list(panels[panel]['CellID'])
    ndf_matrices[panel] = pd.merge(
        ndf_matrices[panel], 
        tracked[[x for x in list(tracked.columns) if 'CellID' in x]], 
        on = f'{panel}_CellID')

    ndf_matrices[panel]['panel'] = panel

In [None]:
# concatenate panels after generating counts matrices
ndf_matrix = pd.concat(ndf_matrices.values(), ignore_index=True)

In [None]:
# subset df to only counts columns
obs_cols = ['total_neighbors', 'query_cell_type', 'immune_CellID', 'tumor_CellID', 'panel']
ndf_obs = ndf_matrix[obs_cols]
ndf_matrix = ndf_matrix.drop(obs_cols, axis = 1)

In [None]:
# compute neighborhood percentages 
ndf_matrix = ndf_matrix.div(ndf_matrix.sum(axis = 1), axis = 0)

In [None]:
# sanity check
ndf_matrix.sum(axis = 1)

In [None]:
# run kmeans at different values of K to determine which to proceed with 
distortions = []
K = range(5,50,5)
for k in K:
    kmeanModel = KMeans(n_clusters=k)
    kmeanModel.fit(ndf_matrix)
    distortions.append(kmeanModel.inertia_)

In [None]:
# plot distortion by K 
fig,ax = plt.subplots()
sns.lineplot(x = K, y = distortions, ax = ax)
ax.set_xlabel('Number of clusters (k)')
ax.set_ylabel('Distortion')
fig.show()
fig.set_facecolor('white')
# plt.savefig('E06_RCN_elbow.png', dpi = 500)

In [None]:
# K selected by elbow plot 
k_selected = 15

In [None]:
# final clustering using selected K 
cluster = KMeans(n_clusters = k_selected, random_state = 20)
cluster.fit_predict(ndf_matrix)

# add the cluster labels to the original neighbor df that still has other metadata columns 
ndf_matrix['kmeans'] = cluster.labels_
ndf_matrix = pd.concat([ndf_matrix, ndf_obs], axis = 1)

In [None]:
# which kmeans clusters are each panel enriched for? 
kmeans_counts = ndf_matrix[['panel','kmeans']].groupby('panel').value_counts().reset_index()
kmeans_counts.columns = ['panel','kmeans','count']
kmeans_counts['kmeans'] = kmeans_counts['kmeans'].astype('category')
kmeans_counts['slide'] = np.where(kmeans_counts['panel'] == 'immune', ' Slide 1', 'Slide 2')

In [None]:
# mean cell type proportions per RCN 
kmeans_composition = ndf_matrix.drop(obs_cols, axis = 1).groupby('kmeans').mean()

In [None]:
g = sns.clustermap(
    kmeans_composition, 
    cmap = "Spectral_r", 
    linecolor = 'white', 
    linewidth = 2, 
    row_cluster=True, 
    col_cluster=True,
    vmax = 0.6, 
    cbar_kws = {'label' : 'Proportion'})

g.ax_heatmap.set_ylabel('')
g.fig.set_facecolor('white')

g.fig.subplots_adjust(right=0.6)
ax2 = g.fig.add_axes([0.61, 0.22, 0.2, 0.61])

# grab the ordering of RCNs determined by hierch. clustering to reorder barplot axis 
rcn_ax_order = [x.get_text() for x in g.ax_heatmap.get_yticklabels()]

sns.barplot(
    y = kmeans_counts['kmeans'].astype(str), 
    x = kmeans_counts['count'], 
    hue = kmeans_counts['slide'], 
    orient='horizontal',
    order = rcn_ax_order,
    ax = ax2
)

ax2.yaxis.tick_right()
ax2.yaxis.set_label_position("right")
ax2.set_ylabel('RCN')
ax2.set_xlabel('Anchor cell count')

g.ax_heatmap.get_yaxis().set_ticks([])
g.cax.set_aspect(10)

legend_elements = [matplotlib.patches.Patch(facecolor='#1f77b4', edgecolor='#1f77b4', label='Slide 1'),
                    matplotlib.patches.Patch(facecolor='#ff7f0e', edgecolor='#ff7f0e', label='Slide 2')]

ax2.legend(handles=legend_elements, loc='upper left', bbox_to_anchor = (0.2,1.1))

plt.savefig('Figure_4F.png', dpi = 500)

### Spatial plots of RCN 7

In [None]:
tumor_ndf_spatial = pd.merge(
    ndf_matrix[ndf_matrix['panel'] == 'tumor'],
    tumor[['CellID','x','y']],
    left_on = 'tumor_CellID',
    right_on = 'CellID')

tumor_ndf_spatial['kmeans'] = tumor_ndf_spatial['kmeans'].astype('category')

immune_ndf_spatial = pd.merge(
    ndf_matrix[ndf_matrix['panel'] == 'immune'],
    immune[['CellID','x','y']],
    left_on = 'immune_CellID',
    right_on = 'CellID')

immune_ndf_spatial['kmeans'] = immune_ndf_spatial['kmeans'].astype('category')

In [None]:
plt.style.use("dark_background")
fig,axs = plt.subplots(ncols = 2)

# plot immune spatial scatterplot
sns.scatterplot(
    x = immune_ndf_spatial['y'], 
    y = immune_ndf_spatial['x'], 
    color = 'grey',
    linewidth = 0,
    s = 2,
    ax = axs[0])

sns.scatterplot(
    x = immune_ndf_spatial[immune_ndf_spatial['kmeans'].astype(str) == '7']['y'], 
    y = immune_ndf_spatial[immune_ndf_spatial['kmeans'].astype(str) == '7']['x'], 
    color = 'orange',
    linewidth = 0,
    s = 2,
    ax = axs[0])


# plot tumor spatial scatterplot
sns.scatterplot(
    x = tumor_ndf_spatial['y'], 
    y = tumor_ndf_spatial['x'], 
    color = 'grey',
    linewidth = 0,
    s = 2,
    ax = axs[1])

sns.scatterplot(
    x = tumor_ndf_spatial[tumor_ndf_spatial['kmeans'].astype(str) == '7']['y'], 
    y = tumor_ndf_spatial[tumor_ndf_spatial['kmeans'].astype(str) == '7']['x'], 
    color = 'orange',
    linewidth = 0,
    s = 2,
    ax = axs[1])

for ax in axs:
    ax.tick_params(left = False, right = False , labelleft = False, labelbottom = False, bottom = False) 
    ax.invert_xaxis()
    ax.set_aspect('equal')
    ax.set_ylabel('')
    ax.set_xlabel('')

axs[0].set_title('Slide 1')
axs[1].set_title('Slide 2')

other = Line2D([0], [0], label='Other', marker='o', markersize=10, 
         markeredgecolor='k', markerfacecolor='gray', linestyle='')

rcn7 = Line2D([0], [0], label='RCN 7', marker='o', markersize=10, 
         markeredgecolor='k', markerfacecolor='orange', linestyle='')

plt.legend(handles=[rcn7, other])
sns.move_legend(ax, loc = 'upper left', bbox_to_anchor = (1,1))
plt.tight_layout()
plt.savefig('Figure_4G.png', dpi = 500)