This notebooks uses the dataset from 'Multiparameter persistent homology landscapes identify immune cell spatial patterns in tumors' by Vipond et al.   
https://doi.org/10.1073/pnas.2102166118

Download the original dataset from https://github.com/MultiparameterTDAHistology/SpatialPatterningOfImmuneCells
and run `preprocessing_regions.ipynb`.

Alternatively, precomputed ECCs and ECPs can be found as pickle files at  
https://drive.google.com/drive/folders/1RvGSG0TffSxbojCzuUWXz6jQeTn3RpOB?usp=sharing

In [None]:
import pandas as pd
import numpy as np
import pickle

from tqdm.notebook import tqdm

## Vectorizing

In [None]:
! git clone --recursive https://github.com/dgurnari/pyEulerCurves.git
! pip install ./pyEulerCurves -q

In [None]:
from pyEulerCurves.ecc_utils import EC_at_filtration

def subsample_ECC(contributions, f_range, size=51):
    
    ecc = np.zeros(size)
    
    for i, f in enumerate(np.linspace(f_range[0], f_range[1], num=size)):
        ecc[i] = EC_at_filtration(contributions, f)
            
    return ecc

In [None]:
data = {}

for LABEL in ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'N', 'O']:
    print()
    print('********')
    print(LABEL)
    print('********')

    with open('pkls/ECC_global_{}_0.1.pkl'.format(LABEL), 'rb') as f:
        ECC_global = pickle.load(f)

    if LABEL=='E':
        ECC_global['T_E_ROI_26_locations_FoxP3.csv']['ecc'] = [(0.0, 5)]

    list_of_ECC = [ECC_global[key]['ecc'] for key in ECC_global]

    f_max = max( [max([c[0] for c in contributions]) for contributions in list_of_ECC] )

    print('max filtration ', f_max)


    print('vectorizing')
    # Create holder arrays to load in landscapes of each cell type
    cd8 = np.array([subsample_ECC(ECC_global[key]['ecc'], [0, f_max])
           for key in tqdm([k for k in ECC_global if "CD8" in k])])

    foxp3 = np.array([subsample_ECC(ECC_global[key]['ecc'], [0, f_max])
            for key in tqdm([k for k in ECC_global if "FoxP3" in k])])

    cd68 = np.array([subsample_ECC(ECC_global[key]['ecc'], [0, f_max])
           for key in tqdm([k for k in ECC_global if "CD68" in k])])
    

    print(cd8.shape, foxp3.shape, cd68.shape)

    data[LABEL] = {
        'cd8': cd8,
        'cd68': cd68,
        'foxp3':foxp3
    }

with open('pkls/ECC_global_VECT_0.1.pkl', 'wb') as f:
    pickle.dump(data, f)

## Classify the cell types using the ECP vectors

In [None]:
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import (
    LinearDiscriminantAnalysis,
    QuadraticDiscriminantAnalysis,
)

ldr_classifier = LinearDiscriminantAnalysis()
qda_classifier = QuadraticDiscriminantAnalysis()
pca_classifier = PCA()
train_split = 0.8
nbr_of_repeats = 100

In [None]:
with open('pkls/ECC_global_VECT_0.1.pkl', 'rb') as f:
    data = pickle.load(f)

In [None]:
for LABEL in ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'N', 'O']:
    print()
    # print('********')
    # print(LABEL)
    # print('********')

    cd8 = data[LABEL]['cd8']
    foxp3 = data[LABEL]['foxp3']
    cd68 = data[LABEL]['cd68']
    
    #print(cd8.shape, foxp3.shape, cd68.shape)


    for X, y, label in zip(
        [
            np.vstack([cd68, foxp3]),
            np.vstack([cd8, foxp3]),
            np.vstack([cd8, cd68]),
            np.vstack([cd8, foxp3, cd68]),
        ],
        [
            np.array(["CD68"] * len(cd68) + ["FoxP3"] * len(foxp3)),
            np.array(["CD8"] * len(cd8) + ["FoxP3"] * len(foxp3)),
            np.array(["CD8"] * len(cd8) + ["CD68"] * len(cd68)),
            np.array(["CD8"] * len(cd8) + ["FoxP3"] * len(foxp3) + ["CD68"] * len(cd68)),
        ],
        ["CD68vsFoxP3", "CD8vsFoxP3", "CD8vsCD68", "3way"],
    ):

        #print(label)

        pca = PCA(n_components=3)
        Xr = pca.fit(X).transform(X)
        test_accuracy_lda = []
        test_accuracy_lda_pca = []
        test_accuracy_qda_pca = []
        mask = np.array([False] * len(y))
        mask[: int(np.floor(len(y) * train_split))] = True
        for i in range(nbr_of_repeats):
            np.random.seed(i)
            np.random.shuffle(mask)

            X_train = X[mask]
            Xr_train = Xr[mask]
            y_train = y[mask]

            X_test = X[np.logical_not(mask)]
            Xr_test = Xr[np.logical_not(mask)]
            y_test = y[np.logical_not(mask)]

            ldr_classifier = LinearDiscriminantAnalysis()
            ldr_classifier.fit(X_train, y_train)
            test_accuracy_lda.append(ldr_classifier.score(X_test, y_test))

            ldr_pcaclassifier = LinearDiscriminantAnalysis()
            ldr_pcaclassifier.fit(Xr_train, y_train)
            test_accuracy_lda_pca.append(ldr_pcaclassifier.score(Xr_test, y_test))

            qda_classifier = QuadraticDiscriminantAnalysis()
            qda_classifier.fit(Xr_train, y_train)
            test_accuracy_qda_pca.append(qda_classifier.score(Xr_test, y_test))

        # print("lda accuracy:", np.mean(test_accuracy_lda))
        # print("lda with pca accuracy:", np.mean(test_accuracy_lda_pca))
        # print("qda with pca accuracy:", np.mean(test_accuracy_qda_pca))
        print('{:0.3f} {:0.3f} {:0.3f}'.format(round(np.mean(test_accuracy_lda), 3), 
                                              round(np.mean(test_accuracy_lda_pca), 3),
                                              round(np.mean(test_accuracy_qda_pca), 3)), end=' ')


0.938 0.967 0.945 0.994 0.973 0.990 0.894 0.840 0.840 0.896 0.858 0.865 
0.917 0.892 0.896 0.992 0.986 0.989 0.943 0.942 0.913 0.921 0.868 0.871 
0.947 0.906 0.915 0.884 0.847 0.881 0.811 0.584 0.552 0.842 0.628 0.627 
0.960 0.926 0.934 0.986 0.990 0.985 0.802 0.779 0.767 0.862 0.779 0.786 
0.941 0.886 0.876 0.867 0.929 0.949 0.806 0.735 0.751 0.842 0.702 0.754 
0.655 0.899 0.964 0.619 0.842 0.898 0.709 0.909 0.853 0.578 0.845 0.881 
0.788 0.932 0.937 0.791 0.970 0.982 0.614 0.696 0.723 0.673 0.738 0.771 
0.651 0.890 0.968 0.747 0.930 0.939 0.695 0.882 0.886 0.659 0.844 0.889 
0.788 0.912 0.928 0.716 0.908 0.900 0.693 0.470 0.531 0.716 0.619 0.621 
0.642 0.763 0.908 0.678 0.835 0.838 0.808 0.882 0.898 0.507 0.653 0.720 
0.872 0.868 0.874 0.784 0.918 0.990 0.823 0.857 0.923 0.594 0.718 0.751 
0.457 0.527 0.453 0.658 0.662 0.575 0.507 0.690 0.617 



0.462 0.388 0.392 
0.830 0.954 0.972 0.602 0.806 0.924 0.484 0.672 0.668 0.431 0.639 0.730 