In [None]:
import numpy as np
import pandas as pd
import scipy as sp
import random
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy.ndimage import distance_transform_edt
import multiprocessing as mp
import sys
import os
import glob
import gudhi as gd

In [None]:
# convert list to array
def pdarray(pd):
    pd_array=np.zeros((len(pd),3))
    for i in range(0,len(pd)):
        pd_array[i,0]=np.asarray(pd[i][0])
        pd_array[i,1]=np.asarray(pd[i][1][0])
        pd_array[i,2]=np.asarray(pd[i][1][1])
    return pd_array;

In [None]:
# Define function that can be run in parallel
def computedistpd(tt): # iteration tt (0 to 100) and image id ii (0 to 230)
    dirPath = os.path.join(os.getcwd(),'image/*.csv')
    filePath = glob.glob(dirPath)
    
    for ii in range(0,len(filePath)):    
        np.random.seed(tt+1000*ii) # seed
        idf = pd.read_csv(filePath[ii])
        
        # normal cell: 0
        # tumor cell: 1 
        # empty: 2
        
        idf2 = np.reshape(idf.to_numpy(), (-1)) 
        nonempty = idf2[np.where(idf2!=2)]
        np.random.shuffle(nonempty) # shuffle nonempty pixels
        idf2[np.where(idf2!=2)[0]]=nonempty
        idfrdm = np.reshape(idf2,idf.shape)
        idfrdm_pd = pd.DataFrame(idfrdm)

        dfnormal=idfrdm_pd.copy()
        dftumor=idfrdm_pd.copy()

        matdf = idfrdm_pd.values

        # for normal, normal -1, otherwise: 0
        matnormal = dfnormal.values
        # first make tumor or empty regions 1 and plug in 1-matnormal
        ## tumor or empty regions
        matnormal[matnormal>1]=1

        # for tumor, tumor: 1, otherwise:0
        mattumor = dftumor.values
        ## make empty regions 0 (only tumors are 1)
        mattumor[mattumor>1]=0

        # negative: tumor
        distimgn=distance_transform_edt(mattumor)
        # positive: normal
        distimgp=distance_transform_edt(1-matnormal)

        distimgp = distimgp.astype(np.float64)
        distimgn = distimgn.astype(np.float64)

        distimg=distimgp-distimgn

        per_disimg=np.ravel(distimg)

        # replace empty cells with inf
        per_matdf=np.ravel(matdf)
        per_disimg[per_matdf>1]=np.inf

        # filename
        base=os.path.basename(filePath[ii])
        filename = os.path.splitext(base)[0]

        # save as np array
        per_disimg_fin=np.array(per_disimg.flatten())
        info=np.array([2,idf.shape[1],idf.shape[0]])
        
        # write txt file
        f= open(os.getcwd()+"/sedt3/" + filename + "_" + str(tt) + ".txt","w+")
        for ll in range(0,len(info)):
            f.write("%d\n" % (info[ll]))
        for mm in range(0,len(per_disimg_fin)):  
            f.write("%f\n" % (per_disimg_fin[mm]))
        f.close()
        
        # compute PH
        md_cubical_complex = gd.CubicalComplex(perseus_file=os.getcwd()+"/sedt3/" + filename + "_" + str(tt) + ".txt")
        # result
        md_cc_diag=md_cubical_complex.persistence()

        pd_array=pdarray(md_cc_diag)

        # write txt file
        f= os.getcwd()+"/persistencediagram/" + filename + "_" + str(tt) + "_pd.txt"
        np.savetxt(f,pd_array,fmt='%1.6f')    
        
        # delete distance txt file
        os.remove(os.getcwd()+"/sedt3/" + filename + "_" + str(tt) + ".txt")

# Only run on main thread
if __name__ == '__main__':
    jobs = []
    
    # Launch processes
    for pind in range(100):
        p = mp.Process(target = computedistpd, args = (pind,))
        jobs.append(p)
        p.start()