# Compute Persistent Homology of 3-D Scans

### We have the middle segment from the 421 + 144 TCIA lung cancer samples formatted for analysis of Cubical Complexes

In [1]:
import numpy as np
import matplotlib.pylab as plt
import math
import os
import gudhi as gd
import pandas as pd
import PersistenceImages.persistence_images as pimg

### Test the persistent homology pipeline

In [2]:
def BitmapToPhom(bitmap):
    l,w,h = np.shape(bitmap)
    
    cubical = gd.CubicalComplex(dimensions = [l,w,h], top_dimensional_cells = bitmap.flatten())
    phom = cubical.compute_persistence()
    phom_0 = cubical.persistence_intervals_in_dimension(0)
    phom_1 = cubical.persistence_intervals_in_dimension(1)
    phom_2 = cubical.persistence_intervals_in_dimension(2)

    return(phom_0, phom_1, phom_2)

In [3]:
lung_test = np.load('C:/Users/Adam/Documents/GitHub/TDA-Lung-Tumor-Classification/Tumor_data/Radiomics_Arrays/LUNG1-192.npy')
phom_0, phom_1, phom_2 = BitmapToPhom(lung_test)

#LUNG1-192 is the one that only has 2 layers. There will therefore be no 2-dimensional homology
print(phom_2)

[]


### Read in files and normalize

In [4]:
#Radiomics Dataset
directory = os.fsencode('C:/Users/Adam/Documents/GitHub/TDA-Lung-Tumor-Classification/Tumor_data/Radiomics_Arrays/')

all_rad_scans = []
rad_maxes = []
rad_mins = []

for file in os.listdir(directory):
    filename = directory + file
    lung = np.load(filename)
    rad_maxes.append(np.amax(lung))
    rad_mins.append(np.amin(lung))    
    all_rad_scans.append(lung)
    

    
#Radiogenomics Dataset
directory = os.fsencode('C:/Users/Adam/Documents/GitHub/TDA-Lung-Tumor-Classification/Tumor_data/Radiogenomics_Arrays/')

all_radg_scans = []
radg_maxes = []
radg_mins = []

for file in os.listdir(directory):
    filename = directory + file
    lung = np.load(filename)
    radg_maxes.append(np.amax(lung))
    radg_mins.append(np.amin(lung))    
    all_radg_scans.append(lung)

In [5]:
#We want to normalize to [0,1]
grand_max = max(np.max(rad_maxes), np.max(radg_maxes))
grand_min = max(np.min(rad_mins), np.min(radg_mins))

all_rad_scans_normalized = (all_rad_scans - grand_min) / (grand_max - grand_min)
all_radg_scans_normalized = (all_radg_scans - grand_min) / (grand_max - grand_min)

  """
  


### Apply persistent homology pipeline to all files
This cell takes ~30 minutes on my desktop!

In [6]:
#Radiomics Dataset

rad_phom_0s = []
rad_phom_1s = []
rad_phom_2s = []

for tumor in all_rad_scans_normalized:
    rad_phom_0, rad_phom_1, rad_phom_2 = BitmapToPhom(tumor)
    
    rad_phom_0s.append(rad_phom_0)
    rad_phom_1s.append(rad_phom_1)
    rad_phom_2s.append(rad_phom_2)
    
    
#Radiogenomics Dataset

radg_phom_0s = []
radg_phom_1s = []
radg_phom_2s = []

for tumor in all_radg_scans_normalized:
    radg_phom_0, radg_phom_1, radg_phom_2 = BitmapToPhom(tumor)
    
    radg_phom_0s.append(radg_phom_0)
    radg_phom_1s.append(radg_phom_1)
    radg_phom_2s.append(radg_phom_2)


### Save persistent homology files so I only need to run this once

In [8]:
np.save('C:/Users/Adam/Documents/GitHub/TDA-Lung-Tumor-Classification/Tumor_data/Radiomics_Homology/rad_phom_0s.npy', rad_phom_0s)
np.save('C:/Users/Adam/Documents/GitHub/TDA-Lung-Tumor-Classification/Tumor_data/Radiomics_Homology/rad_phom_1s.npy', rad_phom_1s)
np.save('C:/Users/Adam/Documents/GitHub/TDA-Lung-Tumor-Classification/Tumor_data/Radiomics_Homology/rad_phom_2s.npy', rad_phom_2s)

np.save('C:/Users/Adam/Documents/GitHub/TDA-Lung-Tumor-Classification/Tumor_data/Radiogenomics_Homology/radg_phom_0s.npy', radg_phom_0s)
np.save('C:/Users/Adam/Documents/GitHub/TDA-Lung-Tumor-Classification/Tumor_data/Radiogenomics_Homology/radg_phom_1s.npy', radg_phom_1s)
np.save('C:/Users/Adam/Documents/GitHub/TDA-Lung-Tumor-Classification/Tumor_data/Radiogenomics_Homology/radg_phom_2s.npy', radg_phom_2s)