This notebooks uses the dataset from 'Multiparameter persistent homology landscapes identify immune cell spatial patterns in tumors' by Vipond et al.   
https://doi.org/10.1073/pnas.2102166118

Download the original dataset from https://github.com/MultiparameterTDAHistology/SpatialPatterningOfImmuneCells
and run `preprocessing_regions.ipynb`.

Alternatively, precomputed ECCs and ECPs can be found as pickle files at  
https://drive.google.com/drive/folders/1RvGSG0TffSxbojCzuUWXz6jQeTn3RpOB?usp=sharing

In [None]:
import pandas as pd
import numpy as np
import pickle

from tqdm.notebook import tqdm

## Vectorizing

In [None]:
from src.bifiltration_utils import EC_at_bifiltration

def subsample_ECP(contributions, f1_range, f2_range, size=51):
    
    ecp = np.zeros((size, size))
    
    for i, f1 in enumerate(np.linspace(f1_range[0], f1_range[1], num=size)):
        for j, f2 in enumerate(np.linspace(f2_range[0], f2_range[1], num=size)):
            ecp[j,i] = EC_at_bifiltration(contributions, f1, f2)
            
    return ecp

In [None]:
data = {}

for LABEL in ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'N', 'O']:
    print()
    print('********')
    print(LABEL)
    print('********')

    with open('pkls/ECP_global_{}_0.1.pkl'.format(LABEL), 'rb') as f:
        ECP_global = pickle.load(f)

    list_of_ECP = [ECP_global[key]['contributions'] for key in ECP_global]

    f1_max = max( [max([c[0][0] for c in contributions]) for contributions in list_of_ECP] )
    f2_max = max( [max([c[0][1] for c in contributions]) for contributions in list_of_ECP] )

    print('max filtrations ', f1_max, f2_max)


    print('vectorizing')
    # Create holder arrays to load in landscapes of each cell type
    cd8 = np.array([subsample_ECP(ECP_global[key]['contributions'], [0, f1_max], [0, f2_max])
           for key in tqdm([k for k in ECP_global if "CD8" in k])])

    foxp3 = np.array([subsample_ECP(ECP_global[key]['contributions'], [0, f1_max], [0, f2_max])
            for key in tqdm([k for k in ECP_global if "FoxP3" in k])])

    cd68 = np.array([subsample_ECP(ECP_global[key]['contributions'], [0, f1_max], [0, f2_max])
           for key in tqdm([k for k in ECP_global if "CD68" in k])])
    

    print(cd8.shape, foxp3.shape, cd68.shape)

    cd8 = cd8.reshape(-1, 51**2)
    foxp3 = foxp3.reshape(-1, 51**2)
    cd68 = cd68.reshape(-1, 51**2)
    
    data[LABEL] = {
        'cd8': cd8,
        'cd68': cd68,
        'foxp3':foxp3
    }

In [None]:
with open('pkls/ECP_global_VECT_0.1.pkl', 'wb') as f:
    pickle.dump(data, f)

## Classify the cell types using the ECP vectors

In [None]:
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import (
    LinearDiscriminantAnalysis,
    QuadraticDiscriminantAnalysis,
)

ldr_classifier = LinearDiscriminantAnalysis()
qda_classifier = QuadraticDiscriminantAnalysis()
pca_classifier = PCA()
train_split = 0.8
nbr_of_repeats = 100

In [None]:
with open('pkls/ECP_global_VECT_0.1.pkl', 'rb') as f:
    data = pickle.load(f)

In [None]:
print(["CD68vsFoxP3", "CD8vsFoxP3", "CD8vsCD68", "3way"])

for LABEL in ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'N', 'O']:
    print()
    print('********')
    print(LABEL)
    print('********')

    cd8 = data[LABEL]['cd8']
    foxp3 = data[LABEL]['foxp3']
    cd68 = data[LABEL]['cd68']
    
    print(cd8.shape, foxp3.shape, cd68.shape)


    for X, y, label in zip(
        [
            np.vstack([cd68, foxp3]),
            np.vstack([cd8, foxp3]),
            np.vstack([cd8, cd68]),
            np.vstack([cd8, foxp3, cd68]),
        ],
        [
            np.array(["CD68"] * len(cd68) + ["FoxP3"] * len(foxp3)),
            np.array(["CD8"] * len(cd8) + ["FoxP3"] * len(foxp3)),
            np.array(["CD8"] * len(cd8) + ["CD68"] * len(cd68)),
            np.array(["CD8"] * len(cd8) + ["FoxP3"] * len(foxp3) + ["CD68"] * len(cd68)),
        ],
        ["CD68vsFoxP3", "CD8vsFoxP3", "CD8vsCD68", "3way"],
    ):

        #print(label)

        pca = PCA(n_components=3)
        Xr = pca.fit(X).transform(X)
        test_accuracy_lda = []
        test_accuracy_lda_pca = []
        test_accuracy_qda_pca = []
        mask = np.array([False] * len(y))
        mask[: int(np.floor(len(y) * train_split))] = True
        for i in range(nbr_of_repeats):
            np.random.seed(i)
            np.random.shuffle(mask)

            X_train = X[mask]
            Xr_train = Xr[mask]
            y_train = y[mask]

            X_test = X[np.logical_not(mask)]
            Xr_test = Xr[np.logical_not(mask)]
            y_test = y[np.logical_not(mask)]

            ldr_classifier = LinearDiscriminantAnalysis()
            ldr_classifier.fit(X_train, y_train)
            test_accuracy_lda.append(ldr_classifier.score(X_test, y_test))

            ldr_pcaclassifier = LinearDiscriminantAnalysis()
            ldr_pcaclassifier.fit(Xr_train, y_train)
            test_accuracy_lda_pca.append(ldr_pcaclassifier.score(Xr_test, y_test))

            qda_classifier = QuadraticDiscriminantAnalysis()
            qda_classifier.fit(Xr_train, y_train)
            test_accuracy_qda_pca.append(qda_classifier.score(Xr_test, y_test))

        # print("lda accuracy:", np.mean(test_accuracy_lda))
        # print("lda with pca accuracy:", np.mean(test_accuracy_lda_pca))
        # print("qda with pca accuracy:", np.mean(test_accuracy_qda_pca))
        print('{:0.3f} {:0.3f} {:0.3f}'.format(round(np.mean(test_accuracy_lda), 3), 
                                              round(np.mean(test_accuracy_lda_pca), 3),
                                              round(np.mean(test_accuracy_qda_pca), 3)), end=' ')


['CD68vsFoxP3', 'CD8vsFoxP3', 'CD8vsCD68', '3way']

********
A
********
(74, 2601) (73, 2601) (78, 2601)
0.941 0.964 0.931 0.988 0.967 0.992 0.856 0.830 0.838 0.886 0.859 0.861 
********
B
********
(66, 2601) (55, 2601) (63, 2601)
0.922 0.869 0.867 0.992 0.985 0.992 0.962 0.945 0.911 0.940 0.865 0.869 
********
C
********
(65, 2601) (74, 2601) (73, 2601)
0.904 0.896 0.906 0.859 0.842 0.872 0.699 0.614 0.568 0.755 0.627 0.640 
********
D
********
(65, 2601) (64, 2601) (67, 2601)
0.933 0.918 0.929 0.985 0.988 0.987 0.807 0.779 0.755 0.874 0.775 0.787 
********
E
********
(57, 2601) (55, 2601) (54, 2601)
0.940 0.883 0.871 0.869 0.920 0.952 0.688 0.743 0.754 0.719 0.683 0.724 
********
F
********
(25, 2601) (28, 2601) (26, 2601)
0.933 0.925 0.963 0.830 0.847 0.901 0.850 0.921 0.855 0.824 0.847 0.878 
********
G
********
(46, 2601) (41, 2601) (46, 2601)
0.858 0.927 0.928 0.904 0.975 0.979 0.609 0.692 0.699 0.659 0.746 0.767 
********
H
********
(26, 2601) (27, 2601) (26, 2601)
0.885 0.898 0



0.370 0.460 0.384 
********
O
********
(13, 2601) (12, 2601) (11, 2601)
0.840 0.952 0.984 0.754 0.772 0.934 0.598 0.684 0.662 0.615 0.632 0.738 