# Compute PH on "all cells": a combination of sampled ECM, cancer cells, and leukcoytes.
Studying all cells can give insight into the shape and structure of airways.
 

In [None]:

include("../src/ECM_TDA.jl")
using .ECM_TDA


using Eirene
using Ripserer
using CSV
using TiffImages
using Images
using NPZ
using Plots
using PersistenceDiagrams
using Measures
using Distributions
using MultivariateStats
using LinearAlgebra
using Random
using StatsBase
using JLD2
using FileIO
using PersistenceDiagrams
using DelimitedFiles
using Distances
using UMAP


In [None]:
# color palettes
c_ECM = "#249EA0" # teal
c_cancer = "#592693" # purple
c_leukocytes = "#FAAB36" # orange

# color palettes

c1 = "#fd5184" # pink
c2 = "#ffb602" # orange
c3 = "#3ec0c9" # blue / teal 
c4 = "#d753ce" # purple
c5 = "#49a849"  # green
c6 = "#F28522"
c7 = "#265BF5"
c8 = "#AEF359" # lime green

# colors to 5 histological subtypes
c_acinar = "#fd5184" # pink
c_lepidic = "#49a849" #green
c_solid = "#d753ce" # purple
c_papillary = "#3ec0c9" # blue / teal
c_im = "#ffb602" # orange 
c_stromal = "#F28522" # darker orange

# markers to 5 histological subtypes
m_acinar = :rect
m_lepidic = :utriangle
m_solid = :diamond
m_papillary = :star
m_im = :pentagon
m_stromal = :dtriangle



# PH analysis on combined cells (cancer, leukocytes, ECM points) to study the airways

## run PH

In [None]:
dir = "data/4000x4000_combined/subregion_cells/"
csv_files = [item for item in walkdir(dir)][1][3:end][1];

# load subregions
subregion_centers = load("data/4000x4000/subregion_centers.jld2")["subregion_centers"];
subregion_centers_green = load("data/4000x4000_201222/subregion_centers_green.jld2")["subregion_centers_green"];
subregion_centers_purple = load("data/4000x4000_201222/subregion_centers_purple.jld2")["subregion_centers_purple"];
subregion_all = merge(subregion_centers, subregion_centers_green, subregion_centers_purple);

all_PD0 = Dict()
all_PD1 = Dict()

for file in csv_files
    LTX = split(file, "_")[1][4:end]
    Da = split(file, "_")[2][3:end]
    idx = parse(Int,string(split(file, ".")[1][end]))
    filename = file[1:end-4]
    
    # load cells
    cells = CSV.read("data/4000x4000_combined/subregion_cells/" * file);
    all_cells = cells[2:3]
    
    # load ECM points
    cells_ECM =  CSV.read("data/4000x4000_combined/ECM_sampled/points_CSV/" * file);
    
    # adjust indices of 
    center_x, center_y = subregion_all[(LTX,Da)][idx]
    subregion_size = 4000
    xmin, xmax, ymin, ymax = get_subregion_boundaries(center_x, center_y, subregion_size)

    # adjust index of cells
    n = size(all_cells, 1)
    all_cells_new = all_cells .- hcat(ones(n) * ymin, ones(n) * xmin);

    # combine cell locations and ECM 
    all_points = vcat(all_cells_new, cells_ECM);
    
    # convert to Ripser
    P = [tuple(all_points[i, :x], all_points[i, :y]) for i = 1:size(all_points,1)]

    # Run ripser (cohomology)
    PD = ripserer(P)
    PD0 = RipsererPD_to_array(PD[1])
    PD1 = RipsererPD_to_array(PD[2])
    
     # save
    filename = split(file, ".")[1]
    writedlm("data/4000x4000_combined/all_cells_PD/PD0/" * file, PD0, ",")
    writedlm("data/4000x4000_combined/all_cells_PD/PD1/" * file, PD1, ",")
    
    # combine to dictionary
    all_PD0[filename] = PD0
    all_PD1[filename] = PD1
    
end

In [None]:
# convert to Ripser PD
# PH0 = Dict(k => ECM_TDA.array_to_ripsererPD(v) for (k,v) in all_PD0 if v != reshape(Array([0.0]), 1, 1))
# PH1 = Dict(k => ECM_TDA.array_to_ripsererPD(v) for (k,v) in all_PD1 if v != reshape(Array([0.0]), 1, 1))

# # compute PI
# PI0 = ECM_TDA.compute_PI(PH0)
# PI1 = ECM_TDA.compute_PI(PH1);

# save("data/4000x4000_combined/all_cells_PD/PD.jld2", 
#     "PD0", all_PD0, 
#     "PD1", all_PD1, 
#     "PI0", PI0, 
#     "PI1", PI1)