In [None]:
include("../src/ECM_TDA.jl")
using .ECM_TDA
using Eirene
using Ripserer
using CSV
using TiffImages
using Images
using NPZ
using Plots
using Distances
using Clustering
using PersistenceDiagrams
using Measures
using Distributions
using MultivariateStats
using LinearAlgebra
using Random
using StatsBase
using JLD2
using FileIO
using PersistenceDiagrams
using DelimitedFiles
using DataFrames
using UMAP

In [None]:
function plot_ECM(group_selected,
    idx_files,
    save_name;
    grid_layout = nothing,
    size = nothing,
    right_margin = 4mm,
    left_margin = -7mm,
    bottom_margin = 0mm)
    
    plot_array = []
    n_group = length(group_selected)
    n_ROI = length(group_selected[1])


    for i=1:n_group
        R = group_selected[i]
        for idx in R
            f = idx_files[idx]
            LTX, row, col, _ = split(f, "_")
            LTX = LTX[4:end]
            ECM_path = "/Volumes/My Passport/iris_data/LTX" * LTX * "/tile_size_4000/" * row * "_" * col * "_psr.tif"
            points_path = "/Volumes/My Passport/wholeslide_sampled_points/" * LTX * "/points_CSV/" * row * "_" * col * "_psr.csv"

            p_ECM = Images.load(ECM_path)
            push!(plot_array, plot(p_ECM, ticks = [], frame = :box))

            df = CSV.read(points_path)
            p2 = scatter(df[:,:x], df[:,:y],
                yflip = :true,
                label = "", markersize = 1, markerstrokewidth = 0)

            push!(plot_array, p2)
        end
    end
    #return plot_array
    if grid_layout == nothing
        grid_layout = grid(n_group, n_ROI * 2)
    end

    if size == nothing
        size = (250 * n_ROI * 2, 250 * n_group)
    end
    p = plot(plot_array..., layout = grid_layout, size = size)
    savefig(save_name)
end

In [None]:
# color palettes
c1 = "#fd5184" # pink
c2 = "#ffb602" # yellow
c3 = "#3ec0c9" # blue / teal 
c4 = "#926EAE" # light purple
c5 = "#49a849" # green
c6 = "#F28522" # orange
c7 = "#265BF5" # dark blue 
c8 = "#AEF359" # lime green
c9 = "#FF1DCE" # purple 

gr()

## Perform UMAP

In [None]:
# load all PD 
LTXs = ["001","013", "021", "034", "041", "050", "051", "073", "079",
        "092", "100", "108", "115", "142", "143", "145", "185", "206", "210", "221"]

all_PD0 = Dict()
all_PD1 = Dict()

# load PD
for LTX in LTXs
    PD = load("data_TDA/LTX" *  LTX * "/PD.jld2")
    PD0 = PD["PD0"]
    PD1 = PD["PD1"];
   
    for (k,v) in PD0
        all_PD0["LTX" * LTX * "_" * k] = v
    end

    for (k,v) in PD1
        all_PD1["LTX" * LTX * "_" * k] = v
    end
end

# append from normal slides too
normal_LTXs = ["001", "021", "029", "075", "092", "115", "143"]

for LTX in normal_LTXs
     PD = load("data_TDA/normal_LTX" * LTX * "/PD.jld2")
     PD0 = PD["PD0"]
     PD1 = PD["PD1"]

     for (k,v) in PD0
         all_PD0["normal_LTX" * LTX * "_" * k] = v
     end

     for (k,v) in PD1
         all_PD1["normal_LTX" * LTX * "_" * k] = v
     end
end 

In [None]:
# recompute coarser PI
PH0_dict = Dict(k => ECM_TDA.array_to_ripsererPD(v) for (k,v) in all_PD0 if v != nothing);
PH1_dict = Dict(k => ECM_TDA.array_to_ripsererPD(v) for (k,v) in all_PD1 if v != nothing);

PI0 = PersistenceImage([PH0_dict[k] for k in keys(PH0_dict)], sigma=50, size = 20)
PI1 = PersistenceImage([PH1_dict[k] for k in keys(PH1_dict)], sigma=50, size = 20)


ECM_PI0 = Dict()
for i in keys(PH0_dict)
    ECM_PI0[i] = PI0(PH0_dict[i])
end

ECM_PI1 = Dict()
for i in keys(PH1_dict)
    ECM_PI1[i] = PI1(PH1_dict[i])
end


In [None]:
# combine features
features = Dict()
for f in keys(ECM_PI0)
    # check that f is a key in all dictionaries
    if f in keys(ECM_PI1)
        combined = vcat(ECM_PI0[f], vec(ECM_PI1[f]))
        features[f] = combined
    end
end

In [None]:
# save as dataframe
empty_cols = [[] for i = 1:421];
col_names = vcat(["ROI"],["coord_" * string(i) for i = 1:420]);

df = DataFrame(empty_cols, [Symbol(i) for i in col_names])
for f in keys(features)
    push!(df, vcat([f], features[f]))
end;

#CSV.write("wholeslide_tda_features.csv", df)

In [None]:
# index and ROIs 
#ROIs = collect(keys(features))
#idx_ROI = Dict(i => roi for (i, roi) in enumerate(ROIs));

#save("analysis_TDA/dim_red/idx_ROI.jld2", "idx_ROI", idx_ROI)
idx_ROI = load("analysis_TDA/dim_red/idx_ROI.jld2")["idx_ROI"];

In [None]:
# for saving idx_ROI as numpy 
ROIs = []
for i = 1:9382
    push!(ROIs, idx_ROI[i])
end

using PyCall
np = pyimport("numpy")
np.save("idx_ROI.npy", ROIs)

In [None]:
# prepare features array
n = length(idx_ROI)
features_array = hcat([features[idx_ROI[i]] for i = 1:n]...)
println("features array shape: ", size(features_array))

features_centered = features_array .- mean(features_array, dims = 2);

In [None]:
# compute UMAP & save
#embedding = umap(features_centered, 2; n_neighbors = 5);
#writedlm("analysis_TDA/dim_red/umap.csv", embedding, ",")

# load UMAP
embedding = Array(CSV.read("analysis_TDA/dim_red/umap.csv", header = false))

In [None]:
gr()
n = size(embedding, 2)
p = scatter(embedding[1,:], embedding[2,:], 
        markercolor = "slategrey",
        markersize = 1, 
        label = "", 
        xticks = [], 
        yticks = [], 
        framestyle = :box,  
        xlabel = "UMAP-1",
        ylabel = "UMAP-2",
        guidefontsize = 15,
        leftmargin = 5mm,
        size = (450, 350),
        hover = 1:n,
        legend = :topright)
#savefig("analysis_TDA/dim_red/UMAP.pdf")

## Plot example regions in UMAP

In [None]:
plotly()
n = size(embedding, 2)
p = scatter(embedding[1,:], embedding[2,:], 
        markercolor = "slategrey",
        markersize = 1, 
        label = "", 
        xticks = [], 
        yticks = [], 
        framestyle = :box,  
        xlabel = "UMAP-1",
        ylabel = "UMAP-2",
        guidefontsize = 15,
        leftmargin = 5mm,
        size = (450, 350),
        hover = 1:n,
        legend = :topright)

In [None]:
R1 = [7637, 9170, 396, 6406]
R2 = [7047, 8489, 141, 858]
R3 = [9092, 9053, 9001, 9193]
R4 = [8080, 7729, 7948, 9008]
R5 = [8652, 8899, 8835, 8610]
R6 = [9215, 8792, 9303, 8341]
R7 = [9299, 9321, 7361, 8783];

In [None]:
y = embedding
p = scatter(embedding[1,:], embedding[2,:], 
        markercolor = "slategrey",
        markersize = 1, 
        label = "", 
        xticks = [], 
        yticks = [], 
        framestyle = :box,  
        xlabel = "UMAP-1",
        ylabel = "UMAP-2",
        guidefontsize = 15,
        leftmargin = 5mm,
        size = (450, 350),
        hover = 1:n,
        legend = :topright)


scatter!(p, y[1,R1], y[2,R1], label = "", markersize = 5,  markercolor = c1, labels = "R1", legendfontsize = 4)
scatter!(p, y[1,R2], y[2, R2], label = "", markersize = 5,  markercolor = c2, labels = "R2")
scatter!(p, y[1,R3], y[2, R3], label = "", markersize = 5,  markercolor = c3, labels = "R3")
scatter!(p, y[1,R4], y[2, R4], label = "", markersize = 5,  markercolor = c4, labels = "R4")
scatter!(p, y[1,R5], y[2, R5], label = "", markersize = 5,  markercolor = c5, labels = "R5")
scatter!(p, y[1,R6], y[2, R6], label = "", markersize = 5,  markercolor = c6, labels = "R6")
scatter!(p, y[1,R7], y[2, R7], label = "", markersize = 5,  markercolor = c7, labels = "R7")


In [None]:
R1

In [None]:
LTX, row, col, _ = split(idx_ROI[7637], "_")

In [None]:
R = Dict(1 => [7637, 9170, 396, 6406],
2 => [7047, 8489, 141, 858],
3 => [9092, 9053, 9001, 9193],
4 => [8080, 7729, 7948, 9008],
5 => [8652, 8899, 8835, 8610],
6 => [9215, 8792, 9303, 8341],
7 => [9299, 9321, 7361, 8783])

In [None]:
gr()
plot_ECM(R, idx_ROI, "analysis_TDA/dim_red/example_regions.png")

In [None]:
gr()
plot(p1..., layout = grid(1,8))
savefig("analysis_TDA/dim_red/example_regions.png")

## Highlight normal ROIs

In [None]:
normal_idx = []
for (k,v) in idx_ROI
    if v[1:3] == "nor"
        append!(normal_idx, k)
    end
end

In [None]:
gr()
n = size(embedding, 2)
p = scatter(embedding[1,:], embedding[2,:], 
        markercolor = "slategrey",
        alpha = 0.8,
        markersize = 1, 
        label = "", 
        xticks = [], 
        yticks = [], 
        framestyle = :box,  
        xlabel = "UMAP-1",
        ylabel = "UMAP-2",
        guidefontsize = 15,
        leftmargin = 5mm,
        size = (450, 350),
        legend = :topright)
scatter!(embedding[1, normal_idx], embedding[2, normal_idx], 
        markercolor = :deeppink, label = "")
#savefig("analysis_TDA/dim_red/UMAP_with_normal.svg")

# Color UMAP by each LTX

In [None]:
function find_indices_of_LTX(idx_ROI, LTX)
    indices = []
    for (idx, ROI) in idx_ROI
        if ROI[4:6] == LTX 
            append!(indices, idx)
        end
    end
    return indices
end

In [None]:
# all LTX
LTXs = ["001","013", "021", "034", "041", "050", "051", "073", "079",
        "092", "100", "108", "115", "142", "143", "145", "185", "206", "210", "221"]

In [None]:
LTX = "001"
indices = find_indices_of_LTX(idx_ROI, LTX);
gr()
n = size(embedding, 2)
p = scatter(embedding[1,:], embedding[2,:], 
        markercolor = "slategrey",
        markersize = 1, 
        label = "", 
        xticks = [], 
        yticks = [], 
        framestyle = :box,  
        xlabel = "UMAP-1",
        ylabel = "UMAP-2",
        guidefontsize = 15,
        leftmargin = 5mm,
        size = (450, 350),
        hover = 1:n,
        legend = :topright)
scatter!(embedding[1, indices], embedding[2, indices], markersize = 2,
        markercolor = :deeppink, label = LTX)
savefig("analysis_TDA/dim_red/UMAP_" * LTX * ".pdf")

In [None]:
for LTX in LTXs
    indices = find_indices_of_LTX(idx_ROI, LTX);
    gr()
    n = size(embedding, 2)
    p = scatter(embedding[1,:], embedding[2,:], 
            markercolor = "slategrey",
            markersize = 1, 
            label = "", 
            xticks = [], 
            yticks = [], 
            framestyle = :box,  
            xlabel = "UMAP-1",
            ylabel = "UMAP-2",
            guidefontsize = 15,
            leftmargin = 5mm,
            size = (450, 350),
            hover = 1:n,
            legend = :topright)
    scatter!(embedding[1, indices], embedding[2, indices], markersize = 2,
            markercolor = :deeppink, label = LTX)
    savefig("analysis_TDA/dim_red/UMAP_" * LTX * ".svg")
end


# Color dim-reduced UMAP with the assigned clusters

In [None]:
gr()

In [None]:
idx_ROI = load("analysis_TDA/dim_red/idx_ROI.jld2")["idx_ROI"];
# load
embedding = Array(CSV.read("analysis_TDA/dim_red/umap.csv", header = false));

In [None]:
gr()
n = size(embedding, 2)
p = scatter(embedding[1,:], embedding[2,:], 
        markercolor = "slategrey",
        markersize = 1, 
        label = "", 
        xticks = [], 
        yticks = [], 
        framestyle = :box,  
        xlabel = "UMAP-1",
        ylabel = "UMAP-2",
        guidefontsize = 15,
        leftmargin = 5mm,
        size = (450, 350),
        legend = :topright)
savefig("analysis_TDA/dim_red/umap.svg")

In [None]:
clusters_all_ROIs= Dict(-1 => [],
0 => [],
1 => [],
2 => [],
3 => [],
4 => [],
5 => [],
6 => [],
7 => []);


ROI_idx = Dict(v => k for (k,v) in idx_ROI);

LTXs = ["001","013", "021", "034", "041", "050", "051", "073", "079",
        "092", "100", "108", "115", "142", "143", "145", "185", "206", "210", "221"]

for LTX in LTXs

    # load assigned cluster
    df_LTX = DataFrame(CSV.File("analysis_TDA/LTX" * string(LTX) * "/assigned_clusters.csv"))
    n = size(df_LTX, 1)
    for i = 1:n
        coord, c = df_LTX[i,:]
        ROI = "LTX" * LTX * "_" * coord * "_psr"
        idx = ROI_idx[ROI]
        push!(clusters_all_ROIs[c], idx)
    end
end


In [None]:
function plot_clusters(embedding, clusters_all_ROIs, cluster)
    # specify colors
    colors = Dict(
        -1 => "gray44",
        0 => "#780000", 
        1=> "#cb334c", 
        2 => "#f89981",
        3 => "#ffbd00",
        4 => "#02c39a",
        5 => "#429bb4",
        6 => "#7851A9",
        7 => "#32174D"
        )
    gr()
    n = size(embedding, 2)
    p = scatter(embedding[1,:], embedding[2,:], 
            markercolor = "seashell3",
            alpha = 0.5,
            markersize = 1.5,
            markerstrokewidth = 0.1,
            label = "", 
            xticks = [], 
            yticks = [], 
            framestyle = :box,  
            #xlabel = "UMAP-1",
            #ylabel = "UMAP-2",
            guidefontsize = 15,
            leftmargin = 5mm,
            markerstrokecolor = "white",
            size = (450, 350),
            legend = :topright)
    scatter!(embedding[1,clusters_all_ROIs[cluster]], embedding[2, clusters_all_ROIs[cluster]], 
            markercolor = colors[cluster], label = cluster, markersize = 1.5, markerstrokewidth = 0.1,
            markerstrokecolor = "white")
    plot(p)

end

In [None]:
p_unassigned = plot_clusters(embedding, clusters_all_ROIs, -1)
savefig("analysis_TDA/dim_red/cluster_unassigned.svg")
p0 = plot_clusters(embedding, clusters_all_ROIs, 0)
savefig("analysis_TDA/dim_red/cluster_0.svg")
p1 = plot_clusters(embedding, clusters_all_ROIs, 1)
savefig("analysis_TDA/dim_red/cluster_1.svg")
p2 = plot_clusters(embedding, clusters_all_ROIs, 2)
savefig("analysis_TDA/dim_red/cluster_2.svg")
p3 = plot_clusters(embedding, clusters_all_ROIs, 3)
savefig("analysis_TDA/dim_red/cluster_3.svg")
p4 = plot_clusters(embedding, clusters_all_ROIs, 4)
savefig("analysis_TDA/dim_red/cluster_4.svg")
p5 = plot_clusters(embedding, clusters_all_ROIs, 5)
savefig("analysis_TDA/dim_red/cluster_5.svg")
p6 = plot_clusters(embedding, clusters_all_ROIs, 6)
savefig("analysis_TDA/dim_red/cluster_6.svg")
p7 = plot_clusters(embedding, clusters_all_ROIs, 7)
savefig("analysis_TDA/dim_red/cluster_7.svg")

Plots.plot(p_unassigned, p0, p1, p2, p3, p4, p5, p6, p7, layout = grid(3,3), size = (1200, 1000))
savefig("analysis_TDA/dim_red/color_by_assigned_clusters.svg")

In [None]:
# in one plot 


# specify colors
colors = Dict(
    -1 => "gray44",
    0 => "#780000", 
    1=> "#cb334c", 
    2 => "#f89981",
    3 => "#ffbd00",
    4 => "#02c39a",
    5 => "#429bb4",
    6 => "#7851A9",
    7 => "#32174D"
    )
gr()
n = size(embedding, 2)
p = scatter(embedding[1,:], embedding[2,:], 
        markercolor = "seashell3",
        alpha = 0.5,
        markersize = 1.5,
        markerstrokewidth = 0.1,
        label = "",  
        xlims = (-8, 14),
        xticks = [],
        yticks = [],
        #ylims = (-12, 18),
        #yticks = [], 
        framestyle = :box,  
        #xlabel = "UMAP-1",
        #ylabel = "UMAP-2",
        guidefontsize = 15,
        leftmargin = 5mm,
        markerstrokecolor = "white",
        size = (450, 350),
        legend = :topright)

for cluster = -1:7
    scatter!(embedding[1,clusters_all_ROIs[cluster]], embedding[2, clusters_all_ROIs[cluster]], 
        markercolor = colors[cluster], label = cluster, markersize = 1.5, markerstrokewidth = 0.1,
        markerstrokecolor = "white")
end
plot(p)
savefig("analysis_TDA/dim_red/assigned_colors.pdf")