# Precompute all persistence diagrams

In [None]:
%load_ext autoreload
%autoreload 2
import nibabel as nib
import numpy as np 
import matplotlib.pyplot as plt
from skimage import measure # For marching cubes
import polyscope as ps # For mesh display
from persim import plot_diagrams, PersistenceImager
import pandas as pd
import os
import sys
import skimage
import skimage.io
sys.path.append("../src")
import glob
from geomstats import *
from topostats import *
from kernels import *
from utils3d import *


def load_dictionary(metadata_path):
    df = pd.read_csv(metadata_path)
    data = {}
    for index, row in df.iterrows():
        patient_id = row["ID"]
        patient_id = "M-0".join(patient_id.split("M-")) #File paths have an extra 0 in ID
        data[patient_id] = row
    del data["UCSF-PDGM-0541"] # Skip Patient 541 because segmentation file is empty
    return data

def argsort(seq):
    return np.array(sorted(range(len(seq)), key=seq.__getitem__), dtype=int)

metadata_path = "../Data/UCSF-PDGM-metadata_v2.csv"
all_data_path = "../Data/UCSF-PDGM-v3"
data = load_dictionary(metadata_path) 

patients = list(data.keys())
diagnosis = [data[p]["Final pathologic diagnosis (WHO 2021)"] for p in patients]
dead = np.array([data[p]["1-dead 0-alive"] for p in patients])
# Sort by dead/alive first, then by diagnosis
idx = np.argsort(dead)
idx = idx[argsort([diagnosis[i] for i in idx])]
patients = [patients[i] for i in idx]

iso_names = {1:"Necrotic", 2:"Edema", 4:"Main Tumor"} # What the labels actually mean
iso_levels = [2, 4, 1] # Column order of the labels

for patient in os.listdir(all_data_path):
    print(".", end="")
    patient_folder_path = os.path.join(all_data_path, patient)
    patient = patient[:-6]

    tumor_seg_path = patient_folder_path + "/" + patient_folder_path[-20:-6] + "_tumor_segmentation.nii.gz"
    if not os.path.exists(tumor_seg_path) or not patient in data:
        continue
    tumor_seg_nifti = nib.load(tumor_seg_path)
    tumor_seg_mat = tumor_seg_nifti.get_fdata()
    
    for k, level in enumerate(iso_levels):
        binary = tumor_seg_mat==level
        level_name = iso_names[level]
        B = crop_binary_volume(binary)
        data[patient]["B{}".format(level_name)] = B
        X = binary_volume_2coords(binary)
        data[patient]["X{}".format(level_name)] = X

## Alpha Filtrations

In [None]:
to_delete = []
for p in data:
    if not "XEdema" in data[p].keys():
        to_delete.append(p)
print(to_delete)
for p in to_delete:
    del data[p]
print(len(data))

In [None]:
persistence_cutoff = 1

for patient in data:
    print(".", end="")
    for i, name in enumerate(iso_levels):
        name = iso_names[name]
        X = data[patient]["X{}".format(name)]
        PDs = get_alpha_filtration_3d(X)
        PDs = remove_infinite(PDs)
        for k in range(len(PDs)):
            Ik = PDs[k]
            if Ik.size > 0:
                PDs[k] = Ik[Ik[:, 1]-Ik[:, 0] > persistence_cutoff, :]
        data[patient]["{}_alpha_PDs".format(name)] = PDs

## Convolved Sublevelset Cubical Filtrations

In [None]:
kernels = get_random_3d_kernels(5, 10)
kernels = [gauss3d(w=3), gauss3d(w=5), gauss3d(w=7), laplacian3d(w=3), laplacian3d(w=5), laplacian3d(w=7)] + kernels


for patient in data:
    print(".", end="")
    for i, level in enumerate(iso_levels):
        name = iso_names[level]
        key = "{}_cubical_PDs".format(name)
        if key in data[patient]:
            continue
        B = data[patient]["B{}".format(name)]
        PDs = []
        for kernel in kernels:
            PDs += get_binary_kernel_cubical_filtration(B, kernel)
        PDs = remove_infinite(PDs)
        data[patient][key] = PDs

## Save Data

In [None]:
for patient in data:
    filename = "../preprocessed/{}.pkl".format(patient)
    pickle.dump(data[patient], open(filename, "wb"))