# Dowker PH analysis
* This notebook performs dimensionality-reduction and clustering analysis on Dowker PH features

In [None]:
include("../src/ECM_TDA.jl")
using .ECM_TDA

using Eirene
using Ripserer
using CSV
using TiffImages
using Images
using NPZ
using Plots
using PersistenceDiagrams
using Measures
using Distributions
using MultivariateStats
using LinearAlgebra
using Random
using StatsBase
using JLD2
using FileIO
using PersistenceDiagrams
using DelimitedFiles
using Distances
using UMAP


In [None]:
# color palettes

c1 = "#fd5184" # pink
c2 = "#ffb602" # yellow
c3 = "#3ec0c9" # blue
c4 = "#d753ce" # purple
c5 = "#49a849"  # green
c6 = "#F28522" # orange

# cell colors
c_ECM = "#249EA0" # teal
c_cancer = "#592693" # purple
c_leukocytes = "#FAAB36" # orange

celltype_colors = Dict("cancer" => c_cancer, 
                        "leukocytes"=> c_leukocytes);
gr()


In [None]:
# ROIs with distinctive patterns
ex1 = ["LTX221_Da505_idx1", "LTX221_Da505_idx2", "LTX221_Da505_idx3", "LTX221_Da2892_idx1"]
ex2 = ["LTX013_Da153_idx3", "LTX013_Da153_idx2", "LTX013_Da153_idx1", "LTX013_Da154_idx3"]
ex3 = ["LTX092_Da252_idx3", "LTX092_Da379_idx4", "LTX092_Da378_idx4", "LTX092_Da440_idx1"]
ex4 = ["LTX097_Da113_idx2", "LTX097_Da636_idx1", "LTX097_Da113_idx3", "LTX097_Da636_idx2"]
ex5 = ["LTX124_Da104_idx2", "LTX124_Da104_idx1", "LTX079_Da108_idx1", "LTX079_Da108_idx2"];

# Plot example

In [None]:
LTX = "001"
Da = "232"
idx = 1

# load ECM image
image_path = "data/4000x4000/subregion_ECM/LTX" * LTX * "_Da" * Da * "_idx" * string(idx) * ".tif"
img = Array(Images.load(image_path));

# load sampled ECM
ecm_points_path = "data/4000x4000/ECM_sampled/points_CSV/LTX" * LTX * "_Da" * Da * "_idx" * string(idx) * ".csv" 
ECM_points = CSV.read(ecm_points_path)

# load cells
cell_path = "data/4000x4000/subregion_cells/LTX" * LTX * "_Da" * Da * "_idx" * string(idx) * ".csv"
cells = CSV.read(cell_path);

In [None]:
p1 = plot(Gray.(img), frame = :box, ticks = [], size = (300, 300), background_color=:transparent, foreground_color=:black)
p2 = scatter(ECM_points[:,1], ECM_points[:,2], yflip = :true, c = c_ECM, label = "", frame = :box, ticks = [], markersize = 2, background_color=:transparent, foreground_color=:black)
plot(p1, p2, size = (600, 300))

In [None]:
cell_types =["cancer", "leukocytes", "fibroblast"]
plot_array = Any[]
for ct in cell_types
    # get locations of cell type
    cell_ct = cells[cells.class .== ct, :]
    c = celltype_colors[ct]
    p_ct = scatter(cell_ct.x, cell_ct.y,
                 markersize = 2,
                 yflip = true,
                 label = ct,
                 markerstrokewidth = 0.2,
                 frame = :box,
                 ticks = [],
                 c = c) 
    push!(plot_array, p_ct)
end

plot(plot_array..., layout = grid(1,3), size = (900, 300))

In [None]:
# one example computation of dowker

W_barcode0, W_barcode1, cells1, cells2, _ = compute_Dowker_cells(cells; celltype1 = "leukocytes", celltype2 = "fibroblast", subsample_size = 300)

In [None]:

# plot sampled cells
p1 = scatter(cells1[:,1], cells1[:,2],
            markersize = 2,
            yflip = true,
            label = "",
            markerstrokewidth = 0.2,
            frame = :box,
            ticks = [],
            c = c_leukocytes) 
scatter!(p1, cells2[:,1], cells2[:,2],
            markersize = 2,
            yflip = true,
            label = "",
            markerstrokewidth = 0.2,
            frame = :box,
            ticks = [],
            c = c_fibroblast) 

Get an idea of the number of cells of each type

In [None]:
n_cancer = []
n_leukocytes = []
n_fibroblast = []
cell_types =["cancer", "leukocytes", "fibroblast"]

In [None]:
for (idx, file) in enumerate(csv_files)
    cells = CSV.read(dir * file)
    for ct in cell_types
        cell_ct = cells[cells.class .== ct, :]
        if ct == "cancer"
            append!(n_cancer, size(cell_ct, 1))
        elseif ct == "leukocytes"
            append!(n_leukocytes, size(cell_ct, 1))
        elseif ct == "fibroblast"
            append!(n_fibroblast, size(cell_ct, 1))
        end
    end
end

In [None]:
# plot histogram of number of cells
p1 = histogram(n_cancer, label = "", title = "cancer")
p2 = histogram(n_leukocytes, label = "", title = "leukocytes")
p3 = histogram(n_fibroblast, label = "", title = "fibroblast")
plot(p1, p2, p3, layout = grid(1,3), size = (900, 300))

# cancer and leukocytes

In [None]:
# load data
systems = "cancer_leukocytes"
data = load("data/4000x4000_combined/Dowker/cancer_leukocytes/PD.jld2")

PD0 = data["PD0"]
PD1 = data["PD1"]
PI0 = compute_PI2(PD0)
PI1 = compute_PI2(PD1)
PD0_max = data["PD0_max"]
PD1_max = data["PD1_max"];

In [None]:
# recompute PI 
# recompute coarser PI 

PH0_dict = Dict(k => ECM_TDA.array_to_ripsererPD(v) for (k,v) in PD0 if v != nothing);
PH1_dict = Dict(k => ECM_TDA.array_to_ripsererPD(v) for (k,v) in PD1 if v != nothing);

pi0 = PersistenceImage([PH0_dict[k] for k in keys(PH0_dict)], sigma=50, size = 20)
pi1 = PersistenceImage([PH1_dict[k] for k in keys(PH1_dict)], sigma=50, size = 20)


PI0 = Dict()
for i in keys(PH0_dict)
    PI0[i] = pi0(PH0_dict[i])
end

PI1 = Dict()
for i in keys(PH1_dict)
    PI1[i] = pi1(PH1_dict[i])
end

# save the min, max coordinates of PDs (useful for plotting)
PI0_xmin = pi0.xs[1]
PI0_xmax = pi0.xs[end]
PI0_ymin = pi0.ys[1]
PI0_ymax = pi0.ys[end]

PI1_xmin = pi1.xs[1]
PI1_xmax = pi1.xs[end]
PI1_ymin = pi1.ys[1]
PI1_ymax = pi1.ys[end]

save("data/4000x4000_combined/Dowker/cancer_leukocytes/PI_ranges.jld2",
    "PI0_xmin", PI0_xmin,
    "PI0_xmax", PI0_xmax,
    "PI0_ymin", PI0_ymin,
    "PI0_ymax", PI0_ymax,
    "PI1_xmin", PI1_xmin,
    "PI1_xmax", PI1_xmax,
    "PI1_ymin", PI1_ymin,
    "PI1_ymax", PI1_ymax)

## Analysis: dim 0

Create index

In [None]:
ROIs = collect(keys(PI0))
idx_ROI = Dict(i => roi for (i, roi) in enumerate(ROIs))
n = length(ROIs)

# create new PI0 with integers as keys
PI = Dict(i => PI0[ROIs[i]] for i=1:n);
PD = Dict(i => PD0[ROIs[i]] for i = 1:n);

#save("analysis/cancer_leukocytes/dim_0/data.jld2",
#    "idx_ROI", idx_ROI,
#    "PI", PI,
#    "PD", PD);


In [None]:
# load PI, index
data = load("analysis/cancer_leukocytes/dim_0/data.jld2")
idx_ROI = data["idx_ROI"]
n = length(idx_ROI)
PI = Dict(i => PI0[idx_ROI[i]] for i=1:n);
PD = Dict(i => PD0[idx_ROI[i]] for i = 1:n);

ROI_idx = Dict(v => k for (k,v) in idx_ROI);

In [None]:
# recompute PI to get xmin, xmax, ymin, ymax coordinates for PI plots
PH_dict = Dict(k => ECM_TDA.array_to_ripsererPD(v) for (k,v) in PD if v != nothing);

PIs = PersistenceImage([PH_dict[k] for k in keys(PH_dict)], sigma=50, size = 20)

# get ranges of x and y (useful for plotting )
xmin = PIs.xs[1]
xmax = PIs.xs[end]

ymin = PIs.ys[1]
ymax = PIs.ys[end];

### UMAP

In [None]:
Random.seed!(10)
PI_centered = center_PI(PI);
embedding = umap(PI_centered, 2; n_neighbors = 5);

In [None]:
writedlm("analysis/cancer_leukocytes/dim_0/umap.csv", embedding, ",")
y = embedding = readdlm("analysis/cancer_leukocytes/dim_0/umap.csv", ',');

In [None]:
gr()
n = size(embedding, 2)
p = scatter(embedding[1,:], embedding[2,:], 
        markercolor = "slategrey",
        markersize = 5, 
        label = "", 
        xticks = [], 
        yticks = [], 
        framestyle = :box,  
        xlabel = "UMAP-1",
        ylabel = "UMAP-2",
        guidefontsize = 15,
        leftmargin = 5mm,
        size = (450, 350),
        hover = 1:n,
        legend = :topright)
savefig("analysis/cancer_leukocytes/dim_0/umap.pdf")
plot(p)

In [None]:
U1 = [70, 155, 95]
U2 = [392, 293, 398]
U3 = [164, 371, 331]
U4 = [340, 260, 318]

In [None]:
# plot with regions
gr()

# load PCA
#y = readdlm("analysis/cancer_leukocytes/dim_0/umap.csv", ',');
y = embedding
markersize = 3
legendfontsize = 5
n = size(y, 2)
annotated = vcat(U1, U2, U3, U4)
nonannotated = [i for i = 1:size(y, 2) if i ∉ annotated]
p = scatter(y[1,nonannotated], y[2,nonannotated], 
        markercolor = "lightgray",
        alpha = 0.6,
        markersize = markersize, 
        markerstrokewidth = 3,
        label = "", 
        xaxis = "UMAP-1",
        yaxis = "UMAP-2",
        #xtickfontsize = 15,
        #ytickfontsize = 15,
        #xrotation = 45,
        ticks = [],
        guidefontsize = 7,
        framestyle = :box,
        size = (200, 150),
        #leftmargin = 2mm,
        legend = :topright
        #background_color=:transparent, foreground_color=:black,
        )
scatter!(y[1,U1], y[2,U1], markersize = markersize, markershape = :rect, markercolor = c1, labels = "U1", legendfontsize = legendfontsize)
scatter!(y[1,U2], y[2,U2], markersize = markersize, markershape = :utriangle, markercolor = c2, labels = "U2")
scatter!(y[1,U3], y[2,U3], markersize = markersize, markershape = :star, markercolor = c3, labels = "U3")
scatter!(y[1,U4], y[2,U4], markersize = markersize, markershape = :diamond, markercolor = c4, labels = "U4")
savefig("analysis/cancer_leukocytes/dim_0/umap.pdf")
plot(p)

In [None]:
# plot cancer, leukocytes, PSRH images
regions = Dict(1 => U1, 2=> U2, 3=> U3, 4=> U4)
gr()
plot_array = []
n = length(regions)
for i=1:n
    R = regions[i]
    for idx in R
        f = idx_ROI[idx]
        
        # cancer
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * f * ".csv")
        df_cell = df[df.class .== "cancer", :]
        p_C = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                         c = c_cancer) 
        push!(plot_array, p_C)
        
        # leukocytes
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * f * ".csv")
        df_cell = df[df.class .== "leukocytes", :]
        p_L = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                         c = c_leukocytes) 
        push!(plot_array, p_L)
        
        # PSRH
        p_PSRH = Images.load("data/4000x4000_combined/PSRH/" * f * ".tif")
        push!(plot_array, plot(p_PSRH, ticks = [], frame = :box, bottom_margin = -3mm))
    end
end

p = plot(plot_array..., layout = grid(n, 3 * 3), size = (250 * 3 * 3, 250 * n))
savefig("analysis/cancer_leukocytes/dim_0/UMAP_examples.png")

Locations of exemplary images

In [None]:
# test 
ex1 = [ROI_idx["LTX221_Da505_idx1"], ROI_idx["LTX221_Da505_idx2"], ROI_idx["LTX221_Da505_idx3"], ROI_idx["LTX221_Da2892_idx1"]]
ex2 = [ROI_idx["LTX013_Da153_idx3"], ROI_idx["LTX013_Da153_idx2"], ROI_idx["LTX013_Da153_idx1"], ROI_idx["LTX013_Da154_idx3"]]
ex3 = [ROI_idx["LTX092_Da252_idx3"], ROI_idx["LTX092_Da379_idx4"], ROI_idx["LTX092_Da378_idx4"], ROI_idx["LTX092_Da440_idx1"]]
ex4 = [ROI_idx["LTX097_Da113_idx2"], ROI_idx["LTX097_Da636_idx1"], ROI_idx["LTX097_Da113_idx3"], ROI_idx["LTX097_Da636_idx2"]]

In [None]:
y = embedding
markersize = 3
legendfontsize = 5
n = size(y, 2)
annotated = vcat(ex1, ex2, ex3, ex4)
nonannotated = [i for i = 1:size(y, 2) if i ∉ annotated]
p = scatter(y[1,nonannotated], y[2,nonannotated], 
        markercolor = "lightgray",
        alpha = 0.6,
        markersize = markersize, 
        markerstrokewidth = 3,
        label = "", 
        xaxis = "UMAP-1",
        yaxis = "UMAP-2",
        #xtickfontsize = 15,
        #ytickfontsize = 15,
        #xrotation = 45,
        ticks = [],
        guidefontsize = 7,
        framestyle = :box,
        size = (200, 150),
        #leftmargin = 2mm,
        legend = :bottomright
        #background_color=:transparent, foreground_color=:black,
        )
scatter!(y[1,ex1], y[2,ex1], markersize = markersize, markershape = :rect, markercolor = c1, labels = "ex1", legendfontsize = legendfontsize)
scatter!(y[1,ex2], y[2,ex2], markersize = markersize, markershape = :utriangle, markercolor = c2, labels = "ex2")
scatter!(y[1,ex3], y[2,ex3], markersize = markersize, markershape = :star, markercolor = c3, labels = "ex3")
scatter!(y[1,ex4], y[2,ex4], markersize = markersize, markershape = :diamond, markercolor = c4, labels = "ex4")
savefig("analysis/cancer_leukocytes/dim_0/umap_clean_examples.pdf")
plot(p)


In [None]:
# plot cancer, leukocytes, PSRH images
regions = Dict(1 => ex1, 2=> ex2, 3=> ex3, 4 => ex4)
gr()
plot_array = []
n = length(regions)
for i=1:n
    R = regions[i]
    for idx in R
        f = idx_ROI[idx]
        
        # cancer
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * f * ".csv")
        df_cell = df[df.class .== "cancer", :]
        p_C = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                         c = c_cancer) 
        push!(plot_array, p_C)
        
        # leukocytes
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * f * ".csv")
        df_cell = df[df.class .== "leukocytes", :]
        p_L = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                         c = c_leukocytes) 
        push!(plot_array, p_L)
        
        # PSRH
        p_PSRH = Images.load("data/4000x4000_combined/PSRH/" * f * ".tif")
        push!(plot_array, plot(p_PSRH, ticks = [], frame = :box, bottom_margin = -3mm))
    end
end

p = plot(plot_array..., layout = grid(n, 3 * 4), size = (250 * 3 * 4, 250 * n))
savefig("analysis/cancer_leukocytes/dim_0/clean_examples.png")

### PCA

In [None]:

# compute PCA
#transformed, eigenvectors, variance_1, variance_2, _ = PI_to_PCA2(PI; pratio = 0.99)


# save("analysis/cancer_leukocytes/dim_0/PCA.jld2",
#     "transformed", transformed,
#     "eigenvectors", eigenvectors,
#     "variance_1", variance_1,
#     "variance_2", variance_2)


In [None]:
# load
data = load("analysis/cancer_leukocytes/dim_0/PCA.jld2")
transformed = data["transformed"]
eigenvectors = data["eigenvectors"]
variance_1 = data["variance_1"]
variance_2 = data["variance_2"]

println("number of components: ", length(eigenvectors))
println("variance explained by 1 eigenvectors: ", variance_1)
println("variance explained by 2 eigenvectors: ", variance_2)
println("variance difference between 2 and 1:", variance_2 - variance_1)

In [None]:
gr()
p = scatter(transformed[1,:], transformed[2,:], 
        label = "", 
        hover = collect(1:size(transformed, 2)),
        frame = :box,
        c = :slategrey,
        ticks = [])
#savefig("analysis/" * systems * "/dim_0/pca.pdf")
plot(p)

In [None]:
# save index of example points
# R1 = [295, 45, 352, 382]
# R2 = [144, 168, 13, 389]
# R3 = [316, 289, 128, 368]
# R4 = [54, 227, 283, 138]

R1 = [45, 382]
R2 = [144, 168]
R3 = [289, 316]
R4 = [54, 227]

In [None]:
# plot with regions
gr()

# load PCA
y = load("analysis/cancer_leukocytes/dim_0/PCA.jld2")["transformed"]


markersize = 3
legendfontsize = 5
n = size(y, 2)
annotated = vcat(R1, R2, R3, R4)
nonannotated = [i for i = 1:size(y, 2) if i ∉ annotated]
p = scatter(y[1,nonannotated], y[2,nonannotated], 
        markercolor = "lightgray",
        alpha = 0.6,
        markersize = markersize, 
        markerstrokewidth = 1,
        label = "", 
        xaxis = "PC1 (70%)",
        yaxis = "PC2 (15%)",
        xticks = (0,0),
        yticks = (0,0),
        xlims = (-0.0004, 0.0004),
        
        guidefontsize = 7,
        framestyle = :box,
        size = (200, 150),
        #leftmargin = 2mm,
        legend = :bottomleft
        #background_color=:transparent, foreground_color=:black,
        )
scatter!(y[1,R1], y[2,R1], label = "", markersize = markersize, markershape = :rect, markercolor = c1, labels = "R1", legendfontsize = legendfontsize)
scatter!(y[1,R2], y[2,R2], label = "", markersize = markersize, markershape = :utriangle, markercolor = c2, labels = "R2")
scatter!(y[1,R3], y[2,R3], label = "", markersize = markersize, markershape = :star, markercolor = c3, labels = "R3")
scatter!(y[1,R4], y[2,R4], label = "", markersize = markersize, markershape = :diamond, markercolor = c4, labels = "R4")
savefig("analysis/cancer_leukocytes/dim_0/pca_low_dim.svg")
plot(p)

In [None]:
# plot cancer, leukocytes, PSRH images
regions = Dict(1 => R1, 2=> R2, 3=> R3, 4=> R4)
gr()
plot_array = []
n = length(regions)
for i=1:n
    R = regions[i]
    for idx in R
        f = idx_ROI[idx]
        
        # cancer
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * f * ".csv")
        df_cell = df[df.class .== "cancer", :]
        p_C = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                         c = c_cancer,
                         right_margin = -4mm) 
        push!(plot_array, p_C)
        
        # leukocytes
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * f * ".csv")
        df_cell = df[df.class .== "leukocytes", :]
        p_L = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                         c = c_leukocytes,
                         right_margin = -4mm) 
        push!(plot_array, p_L)
        
        # PSRH
        p_PSRH = Images.load("data/4000x4000_combined/PSRH/" * f * ".tif")
        push!(plot_array, plot(p_PSRH, ticks = [], frame = :box, bottom_margin = -3mm))
    end
end

p = plot(plot_array..., layout = grid(n, 3 * 4), size = (250 * 3 * 4, 250 * n))
savefig("analysis/cancer_leukocytes/dim_0/PCA_examples.png")

Plot for manuscript figures

In [None]:
# plot cancer, leukocytes, PSRH images
regions = Dict(1 => R1, 2=> R2, 3=> R3, 4=> R4)
gr()
n = length(regions)
for i=1:n
    R = regions[i]
    for idx in R
        plot_array = []
        f = idx_ROI[idx]
        
        # cancer
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * f * ".csv")
        df_cell = df[df.class .== "cancer", :]
        p_C = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                         c = c_cancer,
                         right_margin = -4mm) 
        push!(plot_array, p_C)
        
        # leukocytes
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * f * ".csv")
        df_cell = df[df.class .== "leukocytes", :]
        p_L = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                         c = c_leukocytes,
                         right_margin = -4mm) 
        push!(plot_array, p_L)
        
        # PSRH
        p_PSRH = Images.load("data/4000x4000_combined/PSRH/" * f * ".tif")
        push!(plot_array, plot(p_PSRH, ticks = [], frame = :box, bottom_margin = -3mm))
        plot(plot_array..., layout = grid(1,3), size = (250 * 3, 250))
        savefig("analysis/cancer_leukocytes/dim_0/PCA_examples_manuscript_figures/R" * string(i) * "_" * string(f) *  ".png")
    end
    
end


In [None]:
# plot cancer, leukocytes, PSRH images
regions = Dict(1 => R1, 2=> R2, 3=> R3, 4=> R4)
gr()
plot_array = []
n = length(regions)
for i=1:n
    R = regions[i]
    for idx in R
        f = idx_ROI[idx]
        
        # cancer
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * f * ".csv")
        df_cell = df[df.class .== "cancer", :]
        p_C = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                         c = c_cancer,
                         right_margin = -4mm) 
        push!(plot_array, p_C)
        
        # cancer - subsampled
        df_cell = CSV.read("data/4000x4000_combined/Dowker/cancer/" * f * ".csv")
        p_C = scatter(df_cell[:,1], df_cell[:,2],
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                         c = c_cancer,
                         right_margin = -4mm) 
        push!(plot_array, p_C)
        
        # leukocytes
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * f * ".csv")
        df_cell = df[df.class .== "leukocytes", :]
        p_L = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                         c = c_leukocytes,
                         right_margin = -4mm) 
        push!(plot_array, p_L)
        
        # leukocytes - subsampled
        df_cell = CSV.read("data/4000x4000_combined/Dowker/leukocytes/" * f * ".csv")
        p_L = scatter(df_cell[:,1], df_cell[:,2],
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                         c = c_leukocytes,
                         right_margin = -4mm) 
        push!(plot_array, p_L)
        
        # PSRH
        p_PSRH = Images.load("data/4000x4000_combined/PSRH/" * f * ".tif")
        push!(plot_array, plot(p_PSRH, ticks = [], frame = :box, bottom_margin = -3mm))
    end
end


In [None]:
p = plot(plot_array..., layout = grid(n, 5 * 4), size = (250 * 5 * 4, 250 * n))
savefig("analysis/cancer_leukocytes/dim_0/PCA_examples2.png")

Plot more regions

In [None]:
R1 = [168, 260, 144, 158]
R2 = [211, 350, 282, 395]
R3 = [45, 26, 352, 182]
R4 = [311, 264, 357, 160]

# plot cancer, leukocytes, PSRH images
regions = Dict(1 => R1, 2=> R2, 3=> R3, 4=> R4)
gr()
plot_array = []
n = length(regions)
for i=1:n
    R = regions[i]
    for idx in R
        f = idx_ROI[idx]
        
        # cancer
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * f * ".csv")
        df_cell = df[df.class .== "cancer", :]
        p_C = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                         c = c_cancer,
                         right_margin = -4mm) 
        push!(plot_array, p_C)
        
        # leukocytes
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * f * ".csv")
        df_cell = df[df.class .== "leukocytes", :]
        p_L = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                         c = c_leukocytes,
                         right_margin = -4mm) 
        push!(plot_array, p_L)
        
        # PSRH
        p_PSRH = Images.load("data/4000x4000_combined/PSRH/" * f * ".tif")
        push!(plot_array, plot(p_PSRH, ticks = [], frame = :box, bottom_margin = -3mm))
    end
end

p = plot(plot_array..., layout = grid(n, 3 * 4), size = (250 * 3 * 4, 250 * n))
savefig("analysis/cancer_leukocytes/dim_0/PCA_examples_more_regions.png")

### eigenvectors

In [None]:
# get min and max pixels of the first four eigenvectors
eigenvector_min = minimum(minimum.(eigenvectors[i] for i = 1:4))
eigenvector_max = maximum(maximum.(eigenvectors[i] for i = 1:4))

println("min pixel: ", eigenvector_min)
println("max pixel: ", eigenvector_max)

In [None]:
max1 = PD0_max

In [None]:
  # scale all min, max
  x_min_scaled = x_min * plot_scale / 100
  x_max_scaled = x_max * plot_scale / 100
  y_min_scaled = y_min * plot_scale / 100
  y_max_scaled = y_max * plot_scale / 100
  
  # locate "0"
  x_loc0 = 0 - x_min
  y_loc0 = 0 - y_min
  
  x_zero_tick = x_loc0/ (x_max - x_min) * n
  y_zero_tick = y_loc0 / (y_max - y_min) * n
  
  # compute the number of ticks to draw
  x_tick_loc = x_tick_interval/x_max_scaled * plot_scale 
  y_tick_loc = y_tick_interval/y_max_scaled * plot_scale 

  p = heatmap(PI[1:plot_scale, 1:plot_scale], 
      label = "",
      title = "",
      framestyle = :box,
      xticks = (x_zero_tick:x_tick_loc:plot_scale, Int32.(round.(0:x_tick_interval:x_max_scaled))),  # (location of ticks, tick values)
      yticks = (y_zero_tick:y_tick_loc:plot_scale, Int32.(round.(0:y_tick_interval:y_max_scaled)))
      ;kwargs...)

In [None]:
  # scale all min, max
  x_min_scaled = x_min * plot_scale / 100
  x_max_scaled = x_max * plot_scale / 100
  y_min_scaled = y_min * plot_scale / 100
  y_max_scaled = y_max * plot_scale / 100
  
  # locate "0"
  x_loc0 = 0 - x_min
  y_loc0 = 0 - y_min
  
  x_zero_tick = x_loc0/ (x_max - x_min) * n
  y_zero_tick = y_loc0 / (y_max - y_min) * n
  
  # compute the number of ticks to draw
  x_tick_loc = x_tick_interval/x_max_scaled * plot_scale 
  y_tick_loc = y_tick_interval/y_max_scaled * plot_scale 


In [None]:
# plot the first four eigenvectors
gr()
plot_scale = 20 # only show plot_scale% of persistence image
ps = [plot_PI2(eigenvectors[i], xmin, xmax, ymin, ymax, 
            clims = (eigenvector_min, eigenvector_max), 
            xlabel = "birth",
            ylabel = "persistence",
            show_axis = false,
            left_margin = 5mm,
            bottom_margin = 7mm,
            x_tick_interval = 400,
            y_tick_interval = 400,
            legend = :false # no colorbar 
            ) for i =1:4]

l = @layout[grid(1,4) a{0.05w}] # Stack a layout that rightmost one is for color bar
Plots.GridLayout(1, 2)

n = 100 # length of colorbar (as a vector)
cbar_interval = 0.2
cbar_ticks = vcat(reverse(collect(0:cbar_interval: -eigenvector_min))[1:end-1] .* -1, collect(0:cbar_interval:eigenvector_max))
cbar_loc = [cbar_tickvals_to_loc(eigenvector_min, eigenvector_max, n, val) for val in cbar_ticks]

p = plot(ps..., 
         heatmap(collect(range(eigenvector_min, eigenvector_max, length = n)) .* ones(n,1), 
                legend=:none, 
                xticks=:none,
                yticks=(cbar_loc, cbar_ticks)),
         layout=l,
         topmargin = 3mm,
         size = (1000, 200))
savefig("analysis/cancer_leukocytes/dim_0/eigenvectors.svg")
plot(p)

<font color = "red">CAUTION</font>
* PC2 seems to encode exclusion, but not just the global exclusion -- there can be global exclusion, or simply a lack of points in either system, which still constitutes exclusion
* To distinguish such cases, I reckon that combining Dowker info with PD0 of each system would help
 - Include SI figure
* PC2: colocalization. Again, multiple patterns
 

### PCA with exemplary regions

In [None]:
ROI_idx = Dict(v => k for (k,v) in idx_ROI);

ex1_idx = [ROI_idx[i] for i in ex1]
ex2_idx = [ROI_idx[i] for i in ex2]
ex3_idx = [ROI_idx[i] for i in ex3]
ex4_idx = [ROI_idx[i] for i in ex4]
ex5_idx = [ROI_idx[i] for i in ex5]

In [None]:
# plot with regions
gr()

# load PCA
y = load("analysis/cancer_leukocytes/dim_0/PCA.jld2")["transformed"]

markersize = 3
legendfontsize = 5
n = size(y, 2)
annotated = vcat(ex1_idx, ex2_idx, ex3_idx, ex4_idx, ex5_idx)
nonannotated = [i for i = 1:size(y, 2) if i ∉ annotated]
p = scatter(y[1,nonannotated], y[2,nonannotated], 
        markercolor = "lightgray",
        alpha = 0.6,
        markersize = markersize, 
        markerstrokewidth = 3,
        label = "", 
        xaxis = "PC1 (83%)",
        yaxis = "PC2 (8%)",
        #xtickfontsize = 15,
        #ytickfontsize = 15,
        #xrotation = 45,
        ticks = [],
        guidefontsize = 7,
        framestyle = :box,
        size = (200, 150),
        #leftmargin = 2mm,
        legend = :bottomleft
        #background_color=:transparent, foreground_color=:black,
        )
scatter!(y[1,ex1_idx], y[2,ex1_idx], label = "", markersize = markersize, markershape = :rect, markercolor = c1, labels = "ex1", legendfontsize = legendfontsize)
scatter!(y[1,ex2_idx], y[2,ex2_idx], label = "", markersize = markersize, markershape = :utriangle, markercolor = c2, labels = "ex2")
scatter!(y[1,ex3_idx], y[2,ex3_idx], label = "", markersize = markersize, markershape = :star, markercolor = c3, labels = "ex3")
scatter!(y[1,ex4_idx], y[2,ex4_idx], label = "", markersize = markersize, markershape = :diamond, markercolor = c4, labels = "ex4")
scatter!(y[1,ex5_idx], y[2,ex5_idx], label = "", markersize = markersize, markershape = :pentagon, markercolor = c5, labels = "ex5")
savefig("analysis/cancer_leukocytes/dim_0/exemplary_regions_PCA.pdf")
plot(p)

In [None]:
# plot cancer, leukocytes, PSRH images
regions = Dict(1 => ex1_idx, 2=> ex2_idx, 3=> ex3_idx, 4=> ex4_idx, 5 =>ex5_idx)
gr()
plot_array = []
n = length(regions)
for i=1:n
    R = regions[i]
    for idx in R
        f = idx_ROI[idx]
        
        # cancer
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * f * ".csv")
        df_cell = df[df.class .== "cancer", :]
        p_C = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                         c = c_cancer,
                         right_margin = -4mm) 
        push!(plot_array, p_C)
        
        # leukocytes
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * f * ".csv")
        df_cell = df[df.class .== "leukocytes", :]
        p_L = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                         c = c_leukocytes,
                        right_margin = -4mm) 
        push!(plot_array, p_L)
        
        # PSRH
        p_PSRH = Images.load("data/4000x4000_combined/PSRH/" * f * ".tif")
        push!(plot_array, plot(p_PSRH, ticks = [], frame = :box, bottom_margin = -3mm))
    end
end

p = plot(plot_array..., layout = grid(n, 3 * 4), size = (250 * 3 * 4, 250 * n))
savefig("analysis/cancer_leukocytes/dim_0/exemplary_regions_images.png")

### Dowker complexes on examples

In [None]:
# plotting parameters
c_highlight = :deeppink2

### for point cloud
p_markersize = 5
p_markerstrokewidth = 0.5
p_imagesize = (300, 300)

### for PD
diagonal_lw = 2
pd_markersize = 10
tickfontsize = 20
PD_size = (500, 500)
inf_markerstrokewidth = 6

In [None]:
# specify example
LTX = "092"
Da = "252"
idx = 3


# load cells
dir = "data/4000x4000_combined/Dowker"
filename = "LTX" * LTX * "_Da" * Da * "_idx" * string(idx) * ".csv" 
cells_cancer = readdlm(dir * "/cancer/" * filename, ',')
cells_leukocytes = readdlm(dir * "/leukocytes/" * filename, ',');

In [None]:
# plot cancer and leukocytes
p = scatter(cells_cancer[:,1], cells_cancer[:,2], 
            markersize = p_markersize,
                 yflip = true,
                 label = "",
                 markerstrokewidth = p_markerstrokewidth,
                 frame = :box,
                 ticks = [],
                 c = c_cancer)
scatter!(p, cells_leukocytes[:,1], cells_leukocytes[:,2], 
            markersize = p_markersize,
                 yflip = true,
                 label = "",
                markerstrokewidth = p_markerstrokewidth,
                 frame = :box,
                 ticks = [],
                 c = c_leukocytes)
plot(p, aspect = :equal, size = p_imagesize)

plot(p)

In [None]:
gr()
DPD0 = readdlm("data/4000x4000_combined/Dowker/cancer_leukocytes/PD0/LTX" * LTX * "_Da" * Da * "_idx" * string(idx) * ".csv", ',')
p = plot_PD(DPD0;
        pd_min = 0,
        pd_max = 1050,
        inf_coord = 1000,
        diagonal_lw = diagonal_lw,
        markersize = pd_markersize,
        highlight = [3, 24],
        tickfontsize = tickfontsize,
        inf_markerstrokewidth = inf_markerstrokewidth,
        size = PD_size,
        right_margin = 3mm)
plot(p)

In [None]:
P1 = cells_leukocytes
P2 = cells_cancer

# comute pairwise distances between P1 and P2
d_leukocytes_cancer = Distances.pairwise(Euclidean(), P1, P2, dims = 1)
d_cancer_leukocytes = Array(transpose(d_leukocytes_cancer));

In [None]:
# plot on cancer
param = 500
p = plot_Dowker_complex(d_cancer_leukocytes, param, cells_cancer; 
    show_2simplex = false, show_unborn_vertices = true, c = c_cancer)
plot(p, yflip = true, size = (500, 500))

In [None]:
# plot on leukocytes
param = 500
p = plot_Dowker_complex(d_leukocytes_cancer, param, cells_leukocytes; 
    show_2simplex = false, show_unborn_vertices = true, c = c_leukocytes)
plot(p, yflip = true, size = (500, 500))

<b> Example with more mixture </b>

In [None]:
# specify example
LTX = "124"
Da = "104"
idx = 2

# load cells
dir = "data/4000x4000_combined/Dowker"
filename = "LTX" * LTX * "_Da" * Da * "_idx" * string(idx) * ".csv" 
cells_cancer = readdlm(dir * "/cancer/" * filename, ',')
cells_leukocytes = readdlm(dir * "/leukocytes/" * filename, ',');

In [None]:
# plot cancer and leukocytes
p = scatter(cells_cancer[:,1], cells_cancer[:,2], 
            markersize = p_markersize,
                 yflip = true,
                 label = "",
                 markerstrokewidth = p_markerstrokewidth,
                 frame = :box,
                 ticks = [],
                 c = c_cancer)
scatter!(p, cells_leukocytes[:,1], cells_leukocytes[:,2], 
            markersize = p_markersize,
                 yflip = true,
                 label = "",
                markerstrokewidth = p_markerstrokewidth,
                 frame = :box,
                 ticks = [],
                 c = c_leukocytes)
plot(p, aspect = :equal, size = p_imagesize)

plot(p)

In [None]:
gr()
DPD0 = readdlm("data/4000x4000_combined/Dowker/cancer_leukocytes/PD0/LTX" * LTX * "_Da" * Da * "_idx" * string(idx) * ".csv", ',')
p = plot_PD(DPD0;
        pd_min = 0,
        pd_max = 1050,
        inf_coord = 1000,
        diagonal_lw = diagonal_lw,
        markersize = pd_markersize,
        #highlight = [3, 24],
        tickfontsize = tickfontsize,
        inf_markerstrokewidth = inf_markerstrokewidth,
        size = PD_size,
        right_margin = 3mm)
plot(p)

### SI for different types of exclusion

In [None]:
exclusion1 = ex2
exclusion2 = ex3
exclusion3 = ex4


ROI_idx = Dict(v => k for (k,v) in idx_ROI);

exclusion1_idx = [ROI_idx[i] for i in exclusion1]
exclusion2_idx = [ROI_idx[i] for i in exclusion2]
exclusion3_idx = [ROI_idx[i] for i in exclusion3];

In [None]:
# plot with regions
gr()

# load PCA
y = load("analysis/cancer_leukocytes/dim_0/PCA.jld2")["transformed"]

markersize = 3
legendfontsize = 5
n = size(y, 2)
annotated = vcat(exclusion1_idx, exclusion2_idx, exclusion3_idx)
nonannotated = [i for i = 1:size(y, 2) if i ∉ annotated]
p = scatter(y[1,nonannotated], y[2,nonannotated], 
        markercolor = "lightgray",
        alpha = 0.6,
        markersize = markersize, 
        markerstrokewidth = 1,
        label = "", 
        xaxis = "PC1 (70%)",
        yaxis = "PC2 (15%)",
        ticks = [],
        guidefontsize = 7,
        framestyle = :box,
        size = (200, 150),
        #leftmargin = 2mm,
        legend = :bottomright
        #background_color=:transparent, foreground_color=:black,
        )
scatter!(y[1,exclusion1_idx], y[2,exclusion1_idx], label = "", markersize = markersize, markershape = :rect, markercolor = c1, labels = "exclusion 1", legendfontsize = legendfontsize)
scatter!(y[1,exclusion2_idx], y[2,exclusion2_idx], label = "", markersize = markersize, markershape = :utriangle, markercolor = c2, labels = "exclusion 2")
scatter!(y[1,exclusion3_idx], y[2,exclusion3_idx], label = "", markersize = markersize, markershape = :star, markercolor = c3, labels = "exclusion 3")
#savefig("analysis/cancer_leukocytes/dim_0/PCA_exclusions.svg")
plot(p)

In [None]:
# plot cancer, leukocytes, PSRH images
regions = Dict(1 => exclusion1_idx, 2=> exclusion2_idx, 3=> exclusion3_idx)
gr()
plot_array = []
n = length(regions)
for i=1:n
    R = regions[i]
    for idx in R
        f = idx_ROI[idx]
        
        # cancer
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * f * ".csv")
        df_cell = df[df.class .== "cancer", :]
        p_C = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                         c = c_cancer,
                         right_margin = -4mm) 
        push!(plot_array, p_C)
        
        # leukocytes
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * f * ".csv")
        df_cell = df[df.class .== "leukocytes", :]
        p_L = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                         c = c_leukocytes,
                         right_margin = -4mm) 
        push!(plot_array, p_L)
        
        # PSRH
        p_PSRH = Images.load("data/4000x4000_combined/PSRH/" * f * ".tif")
        push!(plot_array, plot(p_PSRH, ticks = [], frame = :box, bottom_margin = -3mm))
    end
end

p = plot(plot_array..., layout = grid(n, 3 * 4), size = (250 * 3 * 4, 250 * n))
savefig("analysis/cancer_leukocytes/dim_0/exclusion_patterns.png")

### plot example PD, PI, PCA coordinates

In [None]:
# save index of example points
R1 = [144, 168, 13, 389]
R2 = [295, 45, 352, 382]
R3 = [316, 289, 128, 368]
R4 = [54, 227, 283, 138];

In [None]:
regions = Dict("R1" => R1, "R2"=> R2, "R3"=> R3, "R4"=> R4)
examples = [R1[1], R2[1], R3[1], R4[1]]

plot_array = []
gr()

# get maximum PI pixel value for the selected examples
PI_pixel_max = maximum([maximum(PI[i]) for i in examples])
for (R_string, R) in regions
    for i in R
        f = idx_ROI[i]

        # plot cancer
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * f * ".csv")
        df_cell = df[df.class .== "cancer", :]
        p_C = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                         c = c_cancer) 
        
        savefig("analysis/cancer_leukocytes/dim_0/PCA_coordinates/" * R_string * "_" * string(i) * "_cancer.svg")
        push!(plot_array, plot(p_C, ticks = [], frame = :box))
        
        
        # plot leukocytes
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * f * ".csv")
        df_cell = df[df.class .== "leukocytes", :]
        p_L = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                         c = c_leukocytes) 
        
        savefig("analysis/cancer_leukocytes/dim_0/PCA_coordinates/" * R_string * "_" * string(i) * "_leukocytes.pdf")
        push!(plot_array, plot(p_L, ticks = [], frame = :box))
        
        # plot PD
        p_PD = plot_PD(PD[i], pd_min = 0, pd_max = PD0_max * plot_scale / 100, lw = 1, 
            tickfontsize = 9,
            labelfontsize = 12,
            markersize = 5,
            inf_coord = PD0_max,
            xlabel = "birth", ylabel = "death", size = (200,200), 
            )
        savefig("analysis/cancer_leukocytes/dim_0/PCA_coordinates/" * R_string * "_" * string(i) * "_PD.svg")
        push!(plot_array, p_PD)
        
        
        p_PI = plot_PI2(PI[i], xmin, xmax, ymin, ymax, 
            xlabel = "birth",
            ylabel = "persistence",
            x_tick_interval = 400,
            y_tick_interval = 400,
            show_axis = false,
            size = (200, 185),
            framestyle = :box,
            aspect_ratio = :equal,
            legend = :false # no colorbar 
            )
        savefig("analysis/cancer_leukocytes/dim_0/PCA_coordinates/" * R_string * "_" * string(i) * "_PI.svg")
        push!(plot_array, p_PI)

        p_coords = plot_scores(transformed[1:4,i]; xtickfontsize = 8, ytickfontsize = 8, coord_label = "PC")
        p_coords = plot(p_coords, size = (200, 200))
        savefig("analysis/cancer_leukocytes/dim_0/PCA_coordinates/" * R_string * "_" * string(i) * "_coords.svg")
        push!(plot_array, p_coords)
    end
end
#p = plot(plot_array..., layout = grid(4,5, widths=[0.20 ,0.20, 0.20, 0.20, 0.20]), size = (1000, 900) )
#savefig("analysis/cancer_leukocytes/dim_0/PCA_coordinates.png")

### Plot: global exclusion vs local exclusion

In [None]:
global_idx = [144, 389]
local_idx = [316, 289]

# plot cancer, leukocytes, PSRH images
gr()
plot_array = []
for idx in global_idx
    f = idx_ROI[idx]
    df = CSV.read("data/4000x4000_combined/subregion_cells/" * f * ".csv")

    # cancer
    df_C = df[df.class .== "cancer", :]
    p_C = scatter(df_C.x, df_C.y,
                     markersize = 1.5,
                     yflip = true,
                     label = "",
                     markerstrokewidth = 0.2,
                     frame = :box,
                     ticks = [],
                    aspect_ratio = :equal,
                    size = (150,150),
                     c = c_cancer,
                    right_margin = -4mm) 
    push!(plot_array, p_C)

    # leukocytes
    df_L = df[df.class .== "leukocytes", :]
    p_L = scatter(df_L.x, df_L.y,
                     markersize = 1.5,
                     yflip = true,
                     label = "",
                     markerstrokewidth = 0.2,
                     frame = :box,
                     ticks = [],
                    aspect_ratio = :equal,
                    size = (150,150),
                     c = c_leukocytes,
                     right_margin = -4mm) 
    push!(plot_array, p_L)

    # plot cancer and leukocytes together
    p_both = scatter(df_C.x, df_C.y,
                     markersize = 1.5,
                     yflip = true,
                     label = "",
                     markerstrokewidth = 0.2,
                     frame = :box,
                     ticks = [],
                    aspect_ratio = :equal,
                    size = (150,150),
                     c = c_cancer) 
    scatter!(p_both, df_L.x, df_L.y,
                     markersize = 1.5,
                     yflip = true,
                     label = "",
                     markerstrokewidth = 0.2,
                     frame = :box,
                     ticks = [],
                    aspect_ratio = :equal,
                    size = (150,150),
                     c = c_leukocytes)
    push!(plot_array,p_both)
end

p = plot(plot_array..., layout = grid(2, 3), size = (250 * 3, 250 *2))
savefig("analysis/cancer_leukocytes/dim_0/global_exclusions.png")

In [None]:
global_idx = [144, 389]
local_idx = [316, 289]

# plot cancer, leukocytes, PSRH images
gr()
plot_array = []
for idx in local_idx
    f = idx_ROI[idx]
    df = CSV.read("data/4000x4000_combined/subregion_cells/" * f * ".csv")

    # cancer
    df_C = df[df.class .== "cancer", :]
    p_C = scatter(df_C.x, df_C.y,
                     markersize = 1.5,
                     yflip = true,
                     label = "",
                     markerstrokewidth = 0.2,
                     frame = :box,
                     ticks = [],
                    aspect_ratio = :equal,
                    size = (150,150),
                     c = c_cancer,
                    right_margin = -4mm) 
    push!(plot_array, p_C)

    # leukocytes
    df_L = df[df.class .== "leukocytes", :]
    p_L = scatter(df_L.x, df_L.y,
                     markersize = 1.5,
                     yflip = true,
                     label = "",
                     markerstrokewidth = 0.2,
                     frame = :box,
                     ticks = [],
                    aspect_ratio = :equal,
                    size = (150,150),
                     c = c_leukocytes,
                     right_margin = -4mm) 
    push!(plot_array, p_L)

    # plot cancer and leukocytes together
    p_both = scatter(df_C.x, df_C.y,
                     markersize = 1.5,
                     yflip = true,
                     label = "",
                     markerstrokewidth = 0.2,
                     frame = :box,
                     ticks = [],
                    aspect_ratio = :equal,
                    size = (150,150),
                     c = c_cancer) 
    scatter!(p_both, df_L.x, df_L.y,
                     markersize = 1.5,
                     yflip = true,
                     label = "",
                     markerstrokewidth = 0.2,
                     frame = :box,
                     ticks = [],
                    aspect_ratio = :equal,
                    size = (150,150),
                     c = c_leukocytes)
    push!(plot_array,p_both)
end

p = plot(plot_array..., layout = grid(2, 3), size = (250 * 3, 250 *2))
savefig("analysis/cancer_leukocytes/dim_0/local_exclusions.png")

### images with high and low PCs

In [None]:
for i = 1:4
    min_indices, max_indices = get_coordinate_min_max_examples(transformed, i)
    plot_low_high_PC_cancer_leukocytes_PSRH(min_indices, max_indices, idx_ROI, "analysis/cancer_leukocytes/dim_0/PCA_coordinates/PC" * string(i) * "_examples.png")
end

Plot example ROIs with high and low PC with "close to 0" coordinates for other PC

In [None]:
plot_array = []
for i = 1:4
    p = histogram(transformed[i,:], label = "", title = "PC " * string(i))
    push!(plot_array, p)
end
plot(plot_array..., layout = grid(2,2), size = (800, 500))

In [None]:
thresholds = [0.0005, 0.0002, 0.00005, 0.00008]
small_1, large_1 = get_small_large_coordinate_examples(transformed, 1, thresholds; n_coordinates = 4 )
small_2, large_2 = get_small_large_coordinate_examples(transformed, 2, thresholds; n_coordinates = 4 )
small_3, large_3 = get_small_large_coordinate_examples(transformed, 3, thresholds; n_coordinates = 4 )
small_4, large_4 = get_small_large_coordinate_examples(transformed, 4, thresholds; n_coordinates = 4 );

In [None]:
plot_low_high_PC_cancer_leukocytes_PSRH(small_1, large_1, idx_ROI, "analysis/cancer_leukocytes/dim_0/PCA_coordinates/PC1_examples_controlled.png")
plot_low_high_PC_cancer_leukocytes_PSRH(small_2, large_2, idx_ROI, "analysis/cancer_leukocytes/dim_0/PCA_coordinates/PC2_examples_controlled.png")
plot_low_high_PC_cancer_leukocytes_PSRH(small_3, large_3, idx_ROI, "analysis/cancer_leukocytes/dim_0/PCA_coordinates/PC3_examples_controlled.png")
plot_low_high_PC_cancer_leukocytes_PSRH(small_4, large_4, idx_ROI, "analysis/cancer_leukocytes/dim_0/PCA_coordinates/PC4_examples_controlled.png")

### Persistence vector

In [None]:
PD0_new = Dict(i => PD0[idx_ROI[i]] for i = 1:length(idx_ROI));
PD0_persistence = Dict(i => reverse(sort(PD0_new[i][:,2] - PD0_new[i][:,1])) for i in keys(PD0_new))

l = minimum([length(PD0_persistence[i]) for i in keys(PD0_persistence)])
PD0_persistence = Dict(i => PD0_persistence[i][2:l] for i in keys(PD0_persistence));

In [None]:
# subtract the mean
n = length(PD0_persistence)
PI_array = hcat([vec(PD0_persistence[i]) for i =1:n]...)
PI_centered = PI_array .- mean(PI_array, dims = 2)
embedding = umap(PI_centered, 2; n_neighbors = 5);

In [None]:
#writedlm("analysis/cancer_leukocytes/dim_0/persistence_vectors/umap.csv", embedding, ",")
#y = embedding = readdlm("analysis/cancer_leukocytes/dim_0/persistence_vectors/umap.csv", ',');

In [None]:
plotly()
n = size(embedding, 2)
p = scatter(embedding[1,:], embedding[2,:], 
        markercolor = "slategrey",
        markersize = 5, 
        label = "", 
        xticks = [], 
        yticks = [], 
        framestyle = :box,  
        xlabel = "UMAP-1",
        ylabel = "UMAP-2",
        guidefontsize = 15,
        leftmargin = 5mm,
        size = (450, 350),
        hover = 1:n,
        legend = :topright)

In [None]:
U1 = [187, 69, 210, 175]
U2 = [353, 111, 341, 52]
U3 = [254, 352, 236, 276]
U4 = [334, 202, 177, 23]

In [None]:
# plot with regions
gr()

# load PCA
y = embedding
markersize = 3
legendfontsize = 5
n = size(y, 2)
annotated = vcat(U1, U2, U3, U4)
nonannotated = [i for i = 1:size(y, 2) if i ∉ annotated]
p = scatter(y[1,nonannotated], y[2,nonannotated], 
        markercolor = "lightgray",
        alpha = 0.6,
        markersize = markersize, 
        markerstrokewidth = 3,
        label = "", 
        xaxis = "UMAP-1",
        yaxis = "UMAP-2",
        #xtickfontsize = 15,
        #ytickfontsize = 15,
        #xrotation = 45,
        ticks = [],
        guidefontsize = 7,
        framestyle = :box,
        size = (200, 150),
        #leftmargin = 2mm,
        legend = :bottomright
        #background_color=:transparent, foreground_color=:black,
        )
scatter!(y[1,U1], y[2,U1], markersize = markersize, markershape = :rect, markercolor = c1, labels = "U1", legendfontsize = legendfontsize)
scatter!(y[1,U2], y[2,U2], markersize = markersize, markershape = :utriangle, markercolor = c2, labels = "U2")
scatter!(y[1,U3], y[2,U3], markersize = markersize, markershape = :star, markercolor = c3, labels = "U3")
scatter!(y[1,U4], y[2,U4], markersize = markersize, markershape = :diamond, markercolor = c4, labels = "U4")
#savefig("analysis/cancer_leukocytes/dim_0/persistence_vectors/umap.pdf")
plot(p)

In [None]:
# plot cancer, leukocytes, PSRH images
regions = Dict(1 => U1, 2=> U2, 3=> U3, 4=> U4)
gr()
plot_array = []
n = length(regions)
for i=1:n
    R = regions[i]
    for idx in R
        f = idx_ROI[idx]
        
        # cancer
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * f * ".csv")
        df_cell = df[df.class .== "cancer", :]
        p_C = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                         c = c_cancer) 
        push!(plot_array, p_C)
        
        # leukocytes
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * f * ".csv")
        df_cell = df[df.class .== "leukocytes", :]
        p_L = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                         c = c_leukocytes) 
        push!(plot_array, p_L)
        
        # PSRH
        p_PSRH = Images.load("data/4000x4000_combined/PSRH/" * f * ".tif")
        push!(plot_array, plot(p_PSRH, ticks = [], frame = :box, bottom_margin = -3mm))
    end
end


In [None]:

p = plot(plot_array..., layout = grid(n, 4 * 3), size = (250 * 4 * 3, 250 * n))
savefig("analysis/cancer_leukocytes/dim_0/persistence_vectors/UMAP_examples.png")

#### PCA

In [None]:
# subtract the mean
n = length(PD0_persistence)
PI_array = hcat([vec(PD0_persistence[i]) for i =1:n]...)
PI_centered = PI_array .- mean(PI_array, dims = 2);

In [None]:
# compute PCA
pratio = 0.99
# variance explained with 1 component
M = fit(PCA, PI_centered, maxoutdim = 1)
transformed = MultivariateStats.transform(M, PI_centered)
variance_1 = principalratio(M)

# variance explained with 2 components
M = fit(PCA, PI_centered, maxoutdim = 2)
transformed = MultivariateStats.transform(M, PI_centered)
variance_2 = principalratio(M)

# variance explained with 4 components
M = fit(PCA, PI_centered, maxoutdim = 4)
transformed = MultivariateStats.transform(M, PI_centered)
variance_4 = principalratio(M)

# perform PCA
M = fit(PCA, PI_centered, pratio = pratio)
transformed = MultivariateStats.transform(M, PI_centered)

# get eigenvectors
n_eigenvectors = size(transformed, 1)
eigenvectors_array = projection(M)
eigenvectors = Dict(i => eigenvectors_array[:,i] for i = 1:n_eigenvectors)

save("analysis/cancer_leukocytes/dim_0/persistence_vectors/PCA.jld2",
    "transformed", transformed,
    "eigenvectors", eigenvectors,
    "variance_1", variance_1,
    "variance_2", variance_2)

println("number of components: ", length(eigenvectors))
println("variance explained by 1 eigenvectors: ", variance_1)
println("variance explained by 2 eigenvectors: ", variance_2)
println("variance difference between 2 and 1:", variance_2 - variance_1)

In [None]:
plotly()
y = transformed
n = size(y, 2)
p = scatter(y[1,:], y[2,:], 
        markercolor = "slategrey",
        markersize = 5, 
        label = "", 
        xticks = [], 
        yticks = [], 
        framestyle = :box,  
        guidefontsize = 15,
        leftmargin = 5mm,
        size = (450, 350),
        hover = 1:n,
        legend = :topright)

In [None]:
R1 =  [129, 134, 24, 184]
R2 = [178, 47, 26, 342]
R3 = [109, 384, 96, 66]
R4 = [350, 168, 266, 122];

In [None]:
# plot with regions
gr()

# load PCA
y = load("analysis/cancer_leukocytes/dim_0/persistence_vectors/PCA.jld2")["transformed"]

markersize = 3
legendfontsize = 5
n = size(y, 2)
annotated = vcat(R1, R2, R3, R4)
nonannotated = [i for i = 1:size(y, 2) if i ∉ annotated]
p = scatter(y[1,nonannotated], y[2,nonannotated], 
        markercolor = "lightgray",
        alpha = 0.6,
        markersize = markersize, 
        markerstrokewidth = 3,
        label = "", 
        xaxis = "PC1 (58%)",
        yaxis = "PC2 (24%)",
        #xtickfontsize = 15,
        #ytickfontsize = 15,
        #xrotation = 45,
        xticks = (0, 0),
        yticks = (0,0),
        guidefontsize = 7,
        framestyle = :box,
        size = (200, 150),
        #leftmargin = 2mm,
        legend = :bottomleft
        #background_color=:transparent, foreground_color=:black,
        )
scatter!(y[1,R1], y[2,R1], label = "", markersize = markersize, markershape = :rect, markercolor = c1, labels = "R1", legendfontsize = legendfontsize)
scatter!(y[1,R2], y[2,R2], label = "", markersize = markersize, markershape = :utriangle, markercolor = c2, labels = "R2")
scatter!(y[1,R3], y[2,R3], label = "", markersize = markersize, markershape = :star, markercolor = c3, labels = "R3")
scatter!(y[1,R4], y[2,R4], label = "", markersize = markersize, markershape = :diamond, markercolor = c4, labels = "R4")
savefig("analysis/cancer_leukocytes/dim_0/persistence_vectors/PCA.pdf")
plot(p)

In [None]:
# plot cancer, leukocytes, PSRH images
regions = Dict(1 => R1, 2=> R2, 3=> R3, 4=> R4)
gr()
plot_array = []
n = length(regions)
for i=1:n
    R = regions[i]
    for idx in R
        f = idx_ROI[idx]
        
        # cancer
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * f * ".csv")
        df_cell = df[df.class .== "cancer", :]
        p_C = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                         c = c_cancer) 
        push!(plot_array, p_C)
        
        # leukocytes
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * f * ".csv")
        df_cell = df[df.class .== "leukocytes", :]
        p_L = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                         c = c_leukocytes) 
        push!(plot_array, p_L)
        
        # PSRH
        p_PSRH = Images.load("data/4000x4000_combined/PSRH/" * f * ".tif")
        push!(plot_array, plot(p_PSRH, ticks = [], frame = :box, bottom_margin = -3mm))
    end
end


In [None]:
p = plot(plot_array..., layout = grid(n, 4 * 3), size = (250 * 4 * 3, 250 * n))
savefig("analysis/cancer_leukocytes/dim_0/persistence_vectors/PCA_examples.png")

## analysis: dim 1

In [None]:
ROIs = collect(keys(PI1))
idx_ROI = Dict(i => roi for (i, roi) in enumerate(ROIs))
n = length(ROIs)

# create new PI1 with integers as keys
PI = Dict(i => PI1[ROIs[i]] for i=1:n);
PD = Dict(i => PD1[ROIs[i]] for i = 1:n);

# save index

# save("analysis/cancer_leukocytes/dim_1/data.jld2",
#    "idx_ROI", idx_ROI,
#    "PI", PI,
#    "PD", PD);


In [None]:
# load PI, index
data = load("analysis/cancer_leukocytes/dim_1/data.jld2")
idx_ROI = data["idx_ROI"]
n = length(idx_ROI)
PI = Dict(i => PI1[idx_ROI[i]] for i=1:n);
PD = Dict(i => PD1[idx_ROI[i]] for i = 1:n);

ROI_idx = Dict(v => k for (k,v) in idx_ROI);

### UMAP

In [None]:
Random.seed!(10)
PI_centered = center_PI(PI);
embedding = umap(PI_centered, 2; n_neighbors = 5);

In [None]:
#writedlm("analysis/cancer_leukocytes/dim_1/umap.csv", embedding, ",")
y = embedding = readdlm("analysis/cancer_leukocytes/dim_1/umap.csv", ',');

In [None]:
gr()
n = size(embedding, 2)
p = scatter(embedding[1,:], embedding[2,:], 
        markercolor = "slategrey",
        markersize = 5, 
        label = "", 
        xticks = [], 
        yticks = [], 
        framestyle = :box,  
        xlabel = "UMAP-1",
        ylabel = "UMAP-2",
        guidefontsize = 15,
        leftmargin = 5mm,
        size = (450, 350),
        hover = 1:n,
        legend = :topright)
#savefig("analysis/cancer_leukocytes/dim_1/umap.pdf")
plot(p)

### PCA

In [None]:
# compute PCA
transformed, eigenvectors, variance_1, variance_2, _ = PI_to_PCA2(PI; pratio = 0.99)

# save("analysis/cancer_leukocytes/dim_1/PCA.jld2",
#     "transformed", transformed,
#     "eigenvectors", eigenvectors,
#     "variance_1", variance_1,
#     "variance_2", variance_2)

In [None]:
# load
data = load("analysis/cancer_leukocytes/dim_1/PCA.jld2")
transformed = data["transformed"]
eigenvectors = data["eigenvectors"]
variance_1 = data["variance_1"]
variance_2 = data["variance_2"]

println("number of components: ", length(eigenvectors))
println("variance explained by 1 eigenvectors: ", variance_1)
println("variance explained by 2 eigenvectors: ", variance_2)
println("variance difference between 2 and 1:", variance_2 - variance_1)

In [None]:
gr()
p = scatter(transformed[1,:], transformed[2,:], 
        label = "", 
        hover = collect(1:size(transformed, 2)),
        frame = :box,
        c = :slategrey,
        ticks = [])
#savefig("analysis/cancer_leukocytes/dim_1/pca.pdf")
plot(p)

In [None]:
plotly()

p = scatter(transformed[1,:], transformed[2,:], 
        label = "", 
        hover = collect(1:size(transformed, 2)),
        frame = :box,
        c = :slategrey,
        ticks = [])
#savefig("analysis/" * systems * "/dim_1/pca.svg")
plot(p)

In [None]:
# save index of example points
R1 = [295, 182, 45, 26]
R2 = [71, 148, 55, 132]
R3 = [138, 31, 254, 335]
R4 = [153, 226, 28, 19]


In [None]:
ROI_idx = Dict(v => k for (k,v) in idx_ROI)

In [None]:
ROI_

In [None]:
# test 
R1 = [ROI_idx["LTX221_Da505_idx1"], ROI_idx["LTX221_Da505_idx2"], ROI_idx["LTX221_Da505_idx3"], ROI_idx["LTX221_Da2892_idx1"]]
R2 = [ROI_idx["LTX013_Da153_idx3"], ROI_idx["LTX013_Da153_idx2"], ROI_idx["LTX013_Da153_idx1"], ROI_idx["LTX013_Da154_idx3"]]
R3 = []

In [None]:
# plot with regions
gr()

# load PCA
#y = load("analysis/cancer_leukocytes/dim_1/PCA.jld2")["transformed"]
y= transformed
markersize = 3
legendfontsize = 5
n = size(y, 2)
annotated = vcat(R1, R2, R3, R4)
nonannotated = [i for i = 1:size(y, 2) if i ∉ annotated]
p = scatter(y[1,nonannotated], y[2,nonannotated], 
        markercolor = "lightgray",
        alpha = 0.6,
        markersize = markersize, 
        markerstrokewidth = 3,
        label = "", 
        xaxis = "PC1 (83%)",
        yaxis = "PC2 (8%)",
        #xtickfontsize = 15,
        #ytickfontsize = 15,
        #xrotation = 45,
        ticks = [],
        guidefontsize = 7,
        framestyle = :box,
        size = (200, 150),
        #leftmargin = 2mm,
        legend = :topright
        #background_color=:transparent, foreground_color=:black,
        )
scatter!(y[1,R1], y[2,R1], label = "", markersize = markersize, markershape = :rect, markercolor = c1, labels = "R1", legendfontsize = legendfontsize)
scatter!(y[1,R2], y[2,R2], label = "", markersize = markersize, markershape = :utriangle, markercolor = c2, labels = "R2")
scatter!(y[1,R3], y[2,R3], label = "", markersize = markersize, markershape = :star, markercolor = c3, labels = "R3")
scatter!(y[1,R4], y[2,R4], label = "", markersize = markersize, markershape = :diamond, markercolor = c4, labels = "R4")
savefig("analysis/cancer_leukocytes/dim_1/pca.pdf")
plot(p)

In [None]:
# plot cancer, leukocytes, PSRH images
regions = Dict(1 => R1, 2=> R2, 3=> R3, 4=> R4)
gr()
plot_array = []
n = length(regions)
for i=1:n
    R = regions[i]
    for idx in R
        f = idx_ROI[idx]
        
        # cancer
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * f * ".csv")
        df_cell = df[df.class .== "cancer", :]
        p_C = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                         c = c_cancer) 
        push!(plot_array, p_C)
        
        # leukocytes
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * f * ".csv")
        df_cell = df[df.class .== "leukocytes", :]
        p_L = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                         c = c_leukocytes) 
        push!(plot_array, p_L)
        
        # PSRH
        p_PSRH = Images.load("data/4000x4000_combined/PSRH/" * f * ".tif")
        push!(plot_array, plot(p_PSRH, ticks = [], frame = :box, bottom_margin = -3mm))
    end
end

p = plot(plot_array..., layout = grid(n, 3 * 4), size = (250 * 3 * 4, 250 * n))
savefig("analysis/cancer_leukocytes/dim_1/PCA_examples.png")

In [None]:
# plot with regions
gr()

y = transformed
markersize = 10
legendfontsize = 15
example_idx = vcat([R1, R2, R3, R4]...)
n = size(y, 2)
non_example = [i for i = 1:n if i ∉ example_idx]
p = scatter(y[1,non_example], y[2,non_example], 
        markercolor = "slategrey",
        markersize = 5, 
       label = "", xticks = [], yticks = [], framestyle = :box,  
        background_color=:transparent, foreground_color=:black)
scatter!(y[1,R1], y[2,R1], label = "", markersize = markersize, markershape = :rect, markercolor = c1, labels = "R1", legendfontsize = legendfontsize)
scatter!(y[1,R2], y[2,R2], label = "", markersize = 15, markershape = :utriangle, markercolor = c2, labels = "R2")
scatter!(y[1,R3], y[2,R3], label = "", markersize = 15, markershape = :star, markercolor = c3, labels = "R3")
scatter!(y[1,R4], y[2,R4], label = "", markersize = 15, markershape = :diamond, markercolor = c4, labels = "R4")
#savefig("analysis/" * systems * "/dim_1/pca_annotated.svg")
plot(p)

In [None]:
# plot and save example regions
for (R_key, R_val) in regions

    plot_array = []
    for idx in R_val
        p1, p2, p3, p4, p5, p6 = plot_Dowker_profile_cells(nonempty_keys_to_original, idx_file, idx, PD1, PI1, PD1_max, "cancer", "leukocytes", c_cancer, c_leukocytes)
        append!(plot_array, [p1, p2, p3, p4, p5, p6])
    end
    p = plot(plot_array..., layout = grid(4,6), size = (1200, 800))
    savefig("analysis/" * systems * "/dim_1/example_" * R_key * ".svg")
end

In [None]:
# plot eigenvectors

gr()
plot_array = []
n_eigenvectors = 4

for i =1:n_eigenvectors
    p = plot(heatmap(eigenvectors[i]), title = "eigenvector "* string(i), label = "")
    push!(plot_array, p)
end
p = plot(plot_array..., layout = (n_eigenvectors, 1), size = (300, 200 * n_eigenvectors), background_color=:transparent, foreground_color=:black)
savefig("analysis/" * systems * "/dim_1/eigenvectors.svg")


In [None]:
# plot histogram of the coordinates of the eigenvectors
plot_array = []
n_eigenvectors = 4
for i = 1:n_eigenvectors
    p = histogram(transformed[i,:], label = "")
    push!(plot_array, p)
end
plot(plot_array..., size = (900, 200))


In [None]:
# decide on threshold for selecting example images with high and low coordinates

thresholds = Dict()
t = 0.5
for i =1:n_eigenvectors
   thresholds[i] = quantile!(transformed[i,:], t) 
end

In [None]:
# plot example profiles for high eigenvector coordinates 
for i =1:n_eigenvectors
    selected = get_large_coordinate_examples(transformed, i, thresholds, n_coordinates = n_eigenvectors);
    plot_array = []
    for idx in selected[1:4]
        p1, p2, p3, p4, p5, p6 = plot_Dowker_profile_cells(nonempty_keys_to_original, idx_file, idx, PD1, PI1, PD1_max, "cancer", "leukocytes", c_cancer, c_leukocytes)

        # plot the eigenvector scores
        p7 = plot_scores(transformed[:,idx], xtickfontsize = 9, ytickfontsize = 8, coord_label = "", n_coordinates = 4)
        append!(plot_array, [p1, p2, p3, p4, p5, p6, p7])
    end
    p = plot(plot_array..., layout = grid(4,7), size = (1400, 800))
    savefig("analysis/" * systems * "/dim_1/coordinate_" * string(i) * "_examples.svg")    
end

In [None]:
# subtract the mean
PI_dict = PI1_nonempty
n = length(PI_dict)
PI_array = hcat([vec(PI_dict[i]) for i =1:n]...)
PI_centered = PI_array .- mean(PI_array, dims = 2)
embedding = umap(PI_centered, 2; n_neighbors = 5);

In [None]:
# non-linear dimensionality reduction
plotly()
p = scatter(embedding[1,:], embedding[2,:], 
        markercolor = "slategrey",
        markersize = 5, 
        hover = collect(1:size(embedding, 2)),
       label = "", xticks = [], yticks = [], framestyle = :box,  
        background_color=:transparent, foreground_color=:black, legend = :topright)

In [None]:
R1 = [35, 254, 154, 53]
R2 = [125, 49, 172, 238]
R3 = [146, 151, 129, 138];

In [None]:
gr()
markersize = 10
legendfontsize = 15
example_idx = vcat([R1, R2, R3]...)
n = size(embedding, 2)
non_example = [i for i = 1:n if i ∉ example_idx]
p = scatter(embedding[1,non_example], embedding[2,non_example], 
        markercolor = "slategrey",
        markersize = 5, 
       label = "", xticks = [], yticks = [], framestyle = :box,  
        background_color=:transparent, foreground_color=:black, legend = :bottomleft)
scatter!(embedding[1,R1], embedding[2,R1], label = "", markersize = markersize, markershape = :rect, markercolor = c1, labels = "R1", legendfontsize = legendfontsize)
scatter!(embedding[1,R2], embedding[2,R2], label = "", markersize = 15, markershape = :utriangle, markercolor = c2, labels = "R2")
scatter!(embedding[1,R3], embedding[2,R3], label = "", markersize = 15, markershape = :star, markercolor = c3, labels = "R3")
savefig("analysis/" * systems * "/dim_1/umap_annotated.svg")
plot(p)

In [None]:
# save 
save("analysis/" * systems * "/dim_1/umap.jld2", 
        "n_neighbors", 5, 
        "embedding", embedding,
        "R1", R1,
        "R2", R2,
        "R3", R3,
        )

In [None]:
# plot example regions
regions = Dict()
regions["R1"] = R1
regions["R2"] = R2
regions["R3"] = R3
for (R_key, R_val) in regions

    plot_array = []
    for idx in R_val
        p1, p2, p3, p4, p5, p6 = plot_Dowker_profile_cells(nonempty_keys_to_original, idx_file, idx, PD1, PI1, PD1_max, "cancer", "leukocytes", c_cancer, c_leukocytes)
        append!(plot_array, [p1, p2, p3, p4, p5, p6])
    end
    p = plot(plot_array..., layout = grid(4,6), size = (1200, 800))
    savefig("analysis/" * systems * "/dim_1/umap_example_" * R_key * ".svg")
    
end

## Take 2 PC from Dowker features + density

In [None]:
# load
data = load("analysis/cancer_leukocytes/dim_0/PCA.jld2")
transformed = data["transformed"]
eigenvectors = data["eigenvectors"]
variance_1 = data["variance_1"]
variance_2 = data["variance_2"];

# load PI, index
data = load("analysis/cancer_leukocytes/dim_0/data.jld2")
idx_ROI = data["idx_ROI"]
PI = data["PI"]
PD = data["PD"];

ROI_idx = Dict(v => k for (k,v) in idx_ROI);

In [None]:
# distinct types of exclusion
ex1 = [ROI_idx["LTX221_Da505_idx1"], ROI_idx["LTX221_Da505_idx2"], ROI_idx["LTX221_Da505_idx3"], ROI_idx["LTX221_Da2892_idx1"]]
ex2 = [ROI_idx["LTX013_Da153_idx3"], ROI_idx["LTX013_Da153_idx2"], ROI_idx["LTX013_Da153_idx1"], ROI_idx["LTX013_Da154_idx3"]]
ex3 = [ROI_idx["LTX092_Da252_idx3"], ROI_idx["LTX092_Da379_idx4"], ROI_idx["LTX092_Da378_idx4"], ROI_idx["LTX092_Da440_idx1"]]
ex4 = [ROI_idx["LTX097_Da113_idx2"], ROI_idx["LTX097_Da636_idx1"], ROI_idx["LTX097_Da113_idx3"], ROI_idx["LTX097_Da636_idx2"]]


exclusion1 = ex2
exclusion2 = ex3
exclusion3 = ex4

In [None]:
# get number of cancer cells and leukocytes
cancer_count = Dict()
leukocyte_count = Dict()
n = length(PI)

for i = 1:n
    ROI = idx_ROI[i]
     # load cells
    cells = CSV.read("data/4000x4000_combined/subregion_cells/" * ROI * ".csv")
    
    cancer_count[i] = size(cells[cells[:class].=="cancer",:],1)
    leukocyte_count[i] = size(cells[cells[:class].=="leukocytes",:],1)
end

# as lists
cancer_count_list = [cancer_count[i] for i=1:n]
leukocytes_count_list = [leukocyte_count[i] for i = 1:n];

In [None]:
plotlyjs()

markersize = 2
# set color scheme
counts = [cancer_count[i] for i = 1:n]
min_count = 0
max_count = maximum(counts)
z = counts ./ max_count
z = [Int(round(item, digits = 3) * 1000) for item in z]

# Create a discrete color gradient 
my_colors = [cgrad(:haline, [0, 1])[z] for z ∈ range(0.0, 1.0, length = 1000)]



annotated = vcat(exclusion1, exclusion2, exclusion3)
nonannotated = [i for i = 1:size(y, 2) if i ∉ annotated]
plt3d= Plots.plot(transformed[1,nonannotated],transformed[2,nonannotated], cancer_count_list[nonannotated],
         seriestype=:scatter, 
        markersize = 2, 
        markerstrokewidth = 0,
        label ="",
        color = "lightgray",
        #alpha = 0.1,
        #opacity = 0.9,
        xlabel = "PC1",
        ylabel = "PC2",
        zlabel = "cancer count",
        )


scatter!(transformed[1,exclusion1], transformed[2,exclusion1], cancer_count_list[exclusion1], label = "", markersize = 3, markershape = :rect, markercolor = c1, labels = "exclusion 1", legendfontsize = legendfontsize)
scatter!(transformed[1,exclusion2], transformed[2,exclusion2], cancer_count_list[exclusion2], label = "", markersize = 3, markershape = :diamond, markercolor = c2, labels = "exclusion 2")
scatter!(transformed[1,exclusion3], transformed[2,exclusion3], cancer_count_list[exclusion3], label = "", markersize = 1.5, markershape = :xcross, markercolor = c3, labels = "exclusion 3")



display(plt3d)


In [None]:
plotlyjs()

markersize = 2
# set color scheme
counts = [cancer_count[i] for i = 1:n]
min_count = 0
max_count = maximum(counts)
z = counts ./ max_count
z = [Int(round(item, digits = 3) * 1000) for item in z]

# Create a discrete color gradient 
my_colors = [cgrad(:plasma, [0, 1])[z] for z ∈ range(0.0, 1.0, length = 1000)]



annotated = vcat(exclusion1, exclusion2, exclusion3)
nonannotated = [i for i = 1:size(y, 2) if i ∉ annotated]
plt3d= Plots.plot(transformed[1,nonannotated],transformed[2,nonannotated], leukocyte_count_list[nonannotated],
         seriestype=:scatter, 
        markersize = 2, 
        markerstrokewidth = 0,
        label ="",
        color = "lightgray",
        #alpha = 0.1,
        #opacity = 0.9,
        xlabel = "PC1",
        ylabel = "PC2",
        zlabel = "leukocytes count",
        )


scatter!(transformed[1,exclusion1], transformed[2,exclusion1], leukocyte_count_list[exclusion1], label = "", markersize = 3, markershape = :rect, markercolor = c1, labels = "exclusion 1", legendfontsize = legendfontsize)
scatter!(transformed[1,exclusion2], transformed[2,exclusion2], leukocyte_count_list[exclusion2], label = "", markersize = 3, markershape = :diamond, markercolor = c2, labels = "exclusion 2")
scatter!(transformed[1,exclusion3], transformed[2,exclusion3], leukocyte_count_list[exclusion3], label = "", markersize = 1.5, markershape = :xcross, markercolor = c3, labels = "exclusion 3")



display(plt3d)

In [None]:
plotlyjs()
scatter([0],[0], markershape = :rect, markercolor = c1, markersize = 6, ticks =[])
scatter!([0],[0.5], markershape = :diamond, markercolor = c2, markersize = 6)
scatter!([0],[1], markershape = :xcross, markercolor = c3, markersize = 6)
Plots.savefig("analysis/cancer_leukocytes/dim_0/SI_exclusion_markers.pdf")

## Cancer and leukocytes - combined dim 0 & 1 

In [None]:
# load data
systems = "cancer_leukocytes"
data = load("data/4000x4000_combined/Dowker/cancer_leukocytes/PD.jld2")

PD0 = data["PD0"]
PD1 = data["PD1"]
PI0 = compute_PI2(PD0)
PI1 = compute_PI2(PD1)
PD0_max = data["PD0_max"]
PD1_max = data["PD1_max"];

features = combine_PI0_PI1_dicts_Dowker(PI0, PI1);

In [None]:
#ROIs = collect(keys(features))
#idx_ROI = Dict(i => roi for (i, roi) in enumerate(ROIs));

#save("analysis/cancer_leukocytes/dim_01_combined/idx_ROI.jld2", "idx_ROI", idx_ROI)
idx_ROI = load("analysis/cancer_leukocytes/dim_01_combined/idx_ROI.jld2")["idx_ROI"];
ROI_idx = Dict(v => k for (k,v) in idx_ROI);

In [None]:
# prepare features array
n = length(idx_ROI)
features_array = hcat([features[idx_ROI[i]] for i = 1:n]...)
println("features array shape: ", size(features_array))

features_centered = features_array .- mean(features_array, dims = 2);

### UMAP

In [None]:
embedding = umap(features_centered, 2; n_neighbors = 5);

In [None]:
writedlm("analysis/cancer_leukocytes/dim_01_combined/umap.csv", embedding, ",")
#embedding = readdlm("analysis/cancer_leukocytes/dim_01_combined/umap.csv", ',');

In [None]:
gr()
n = size(embedding, 2)
p = scatter(embedding[1,:], embedding[2,:], 
        markercolor = "slategrey",
        markersize = 5, 
        label = "", 
        xticks = [], 
        yticks = [], 
        framestyle = :box,  
        xlabel = "UMAP-1",
        ylabel = "UMAP-2",
        guidefontsize = 15,
        leftmargin = 5mm,
        size = (450, 350),
        #hover = 1:n,
        legend = :topright)
savefig("analysis/cancer_leukocytes/dim_01_combined/umap.svg")

### PCA

In [None]:
transformed, variance_1, variance_2, variance_4 = centered_features_to_PCA(features_centered)
println(variance_1)
println(variance_2)

In [None]:
writedlm("analysis/cancer_leukocytes/dim_01_combined/PCA.csv", transformed, ",")

In [None]:
p = scatter(transformed[1,:], transformed[2,:], 
        label = "", 
        hover = collect(1:size(transformed, 2)),
        frame = :box,
        xlabel = "PCA1(65%)",
        ylabel = "PCA2(80%)",
        c = :slategrey,
        size = (500, 500),
        ticks = [])
savefig("analysis/cancer_leukocytes/dim_01_combined/PCA.svg")
plot(p)

# ECM & cancer

## data preparation

In [None]:
# load data
systems = "ECM_cancer"
data = load("data/4000x4000_combined/Dowker/ECM_cancer/PD.jld2")

PD0 = data["PD0"]
PD1 = data["PD1"]
PI0 = compute_PI2(PD0)
PI1 = compute_PI2(PD1)
PD0_max = data["PD0_max"]
PD1_max = data["PD1_max"];


In [None]:
# recompute PI 
# recompute coarser PI 

PH0_dict = Dict(k => ECM_TDA.array_to_ripsererPD(v) for (k,v) in PD0 if v != nothing);
PH1_dict = Dict(k => ECM_TDA.array_to_ripsererPD(v) for (k,v) in PD1 if v != nothing);

pi0 = PersistenceImage([PH0_dict[k] for k in keys(PH0_dict)], sigma=50, size = 20)
pi1 = PersistenceImage([PH1_dict[k] for k in keys(PH1_dict)], sigma=50, size = 20)


PI0 = Dict()
for i in keys(PH0_dict)
    PI0[i] = pi0(PH0_dict[i])
end

PI1 = Dict()
for i in keys(PH1_dict)
    PI1[i] = pi1(PH1_dict[i])
end

# save the min, max coordinates of PDs (useful for plotting)
PI0_xmin = pi0.xs[1]
PI0_xmax = pi0.xs[end]
PI0_ymin = pi0.ys[1]
PI0_ymax = pi0.ys[end]

PI1_xmin = pi1.xs[1]
PI1_xmax = pi1.xs[end]
PI1_ymin = pi1.ys[1]
PI1_ymax = pi1.ys[end]

save("data/4000x4000_combined/Dowker/ECM_cancer/PI_ranges.jld2",
    "PI0_xmin", PI0_xmin,
    "PI0_xmax", PI0_xmax,
    "PI0_ymin", PI0_ymin,
    "PI0_ymax", PI0_ymax,
    "PI1_xmin", PI1_xmin,
    "PI1_xmax", PI1_xmax,
    "PI1_ymin", PI1_ymin,
    "PI1_ymax", PI1_ymax)

## dim 0 

In [None]:
ROIs = collect(keys(PI0))
idx_ROI = Dict(i => roi for (i, roi) in enumerate(ROIs))
n = length(ROIs)

# create new PI0 with integers as keys
PI = Dict(i => PI0[ROIs[i]] for i=1:n);
PD = Dict(i => PD0[ROIs[i]] for i = 1:n);

# save("analysis/ECM_cancer/dim_0/data.jld2",
#     "idx_ROI", idx_ROI,
#     "PI", PI,
#     "PD", PD);


In [None]:
# load PI, index
data = load("analysis/ECM_cancer/dim_0/data.jld2")
idx_ROI = data["idx_ROI"]
n = length(idx_ROI)
PI = Dict(i => PI0[idx_ROI[i]] for i=1:n);
PD = Dict(i => PD0[idx_ROI[i]] for i = 1:n);

ROI_idx = Dict(v => k for (k,v) in idx_ROI);

In [None]:
# recompute PI to get xmin, xmax, ymin, ymax coordinates for PI plots
PH_dict = Dict(k => ECM_TDA.array_to_ripsererPD(v) for (k,v) in PD if v != nothing);

PIs = PersistenceImage([PH_dict[k] for k in keys(PH_dict)], sigma=50, size = 20)

# get ranges of x and y (useful for plotting )
xmin = PIs.xs[1]
xmax = PIs.xs[end]

ymin = PIs.ys[1]
ymax = PIs.ys[end];

### UMAP

In [None]:
Random.seed!(10)
PI_centered = center_PI(PI);
embedding = umap(PI_centered, 2; n_neighbors = 5);

In [None]:
writedlm("analysis/ECM_cancer/dim_0/umap.csv", embedding, ",")
y = embedding = readdlm("analysis/ECM_cancer/dim_0/umap.csv", ',');

In [None]:
gr()
n = size(embedding, 2)
p = scatter(embedding[1,:], embedding[2,:], 
        markercolor = "slategrey",
        markersize = 5, 
        label = "", 
        xticks = [], 
        yticks = [], 
        framestyle = :box,  
        xlabel = "UMAP-1",
        ylabel = "UMAP-2",
        guidefontsize = 15,
        leftmargin = 5mm,
        size = (450, 350),
        hover = 1:n,
        legend = :topright)
savefig("analysis/ECM_cancer/dim_0/umap.pdf")
plot(p)

### PCA

In [None]:

# compute PCA
transformed, eigenvectors, variance_1, variance_2, _ = PI_to_PCA2(PI; pratio = 0.99)


# save("analysis/ECM_cancer/dim_0/PCA.jld2",
#     "transformed", transformed,
#     "eigenvectors", eigenvectors,
#     "variance_1", variance_1,
#     "variance_2", variance_2)


In [None]:
# load
data = load("analysis/ECM_cancer/dim_0/PCA.jld2")
transformed = data["transformed"]
eigenvectors = data["eigenvectors"]
variance_1 = data["variance_1"]
variance_2 = data["variance_2"]

println("number of components: ", length(eigenvectors))
println("variance explained by 1 eigenvectors: ", variance_1)
println("variance explained by 2 eigenvectors: ", variance_2)
println("variance difference between 2 and 1:", variance_2 - variance_1)

In [None]:
gr()
p = scatter(transformed[1,:], transformed[2,:], 
        label = "", 
        hover = collect(1:size(transformed, 2)),
        frame = :box,
        c = :slategrey,
        ticks = [])
#savefig("analysis/ECM_cancer/dim_0/pca.pdf")
plot(p)

## dim 1

In [None]:
ROIs = collect(keys(PI1))
idx_ROI = Dict(i => roi for (i, roi) in enumerate(ROIs))
n = length(ROIs)

# create new PI with integers as keys
PI = Dict(i => PI1[ROIs[i]] for i=1:n);
PD = Dict(i => PD1[ROIs[i]] for i = 1:n);

# save("analysis/ECM_cancer/dim_1/data.jld2",
#    "idx_ROI", idx_ROI,
#    "PI", PI,
#    "PD", PD);


In [None]:
# load PI, index
data = load("analysis/ECM_cancer/dim_1/data.jld2")
idx_ROI = data["idx_ROI"]
n = length(idx_ROI)
PI = Dict(i => PI1[idx_ROI[i]] for i=1:n);
PD = Dict(i => PD1[idx_ROI[i]] for i = 1:n);

ROI_idx = Dict(v => k for (k,v) in idx_ROI);

In [None]:
# recompute PI to get xmin, xmax, ymin, ymax coordinates for PI plots
PH_dict = Dict(k => ECM_TDA.array_to_ripsererPD(v) for (k,v) in PD if v != nothing);

PIs = PersistenceImage([PH_dict[k] for k in keys(PH_dict)], sigma=50, size = 20)

# get ranges of x and y (useful for plotting )
xmin = PIs.xs[1]
xmax = PIs.xs[end]

ymin = PIs.ys[1]
ymax = PIs.ys[end];

### UMAP

In [None]:
Random.seed!(10)
PI_centered = center_PI(PI);
embedding = umap(PI_centered, 2; n_neighbors = 5);

In [None]:
writedlm("analysis/ECM_cancer/dim_1/umap.csv", embedding, ",")
y = embedding = readdlm("analysis/ECM_cancer/dim_1/umap.csv", ',');

In [None]:
gr()
n = size(embedding, 2)
p = scatter(embedding[1,:], embedding[2,:], 
        markercolor = "slategrey",
        markersize = 5, 
        label = "", 
        xticks = [], 
        yticks = [], 
        framestyle = :box,  
        xlabel = "UMAP-1",
        ylabel = "UMAP-2",
        guidefontsize = 15,
        leftmargin = 5mm,
        size = (450, 350),
        hover = 1:n,
        legend = :topright)
savefig("analysis/ECM_cancer/dim_1/umap.pdf")
plot(p)

### PCA

In [None]:
# compute PCA
transformed, eigenvectors, variance_1, variance_2, _ = PI_to_PCA2(PI; pratio = 0.99)

# save("analysis/ECM_cancer/dim_1/PCA.jld2",
#     "transformed", transformed,
#     "eigenvectors", eigenvectors,
#     "variance_1", variance_1,
#     "variance_2", variance_2)


In [None]:
# load
data = load("analysis/ECM_cancer/dim_1/PCA.jld2")
transformed = data["transformed"]
eigenvectors = data["eigenvectors"]
variance_1 = data["variance_1"]
variance_2 = data["variance_2"]

println("number of components: ", length(eigenvectors))
println("variance explained by 1 eigenvectors: ", variance_1)
println("variance explained by 2 eigenvectors: ", variance_2)
println("variance difference between 2 and 1:", variance_2 - variance_1)

In [None]:
gr()
p = scatter(transformed[1,:], transformed[2,:], 
        label = "", 
        hover = collect(1:size(transformed, 2)),
        frame = :box,
        c = :slategrey,
        ticks = [])
savefig("analysis/ECM_cancer/dim_1/pca.pdf")
plot(p)

## ECM - cancer Combined dim-0 and dim-1

In [None]:
# load data
systems = "ECM_cancer"
data = load("data/4000x4000_combined/Dowker/ECM_cancer/PD.jld2")

PD0 = data["PD0"]
PD1 = data["PD1"]
PI0 = compute_PI2(PD0)
PI1 = compute_PI2(PD1)
PD0_max = data["PD0_max"]
PD1_max = data["PD1_max"];

features = combine_PI0_PI1_dicts_Dowker(PI0, PI1);

In [None]:
#ROIs = collect(keys(features))
#idx_ROI = Dict(i => roi for (i, roi) in enumerate(ROIs));

#save("analysis/ECM_cancer/dim_01_combined/idx_ROI.jld2", "idx_ROI", idx_ROI)
idx_ROI = load("analysis/ECM_cancer/dim_01_combined/idx_ROI.jld2")["idx_ROI"];
ROI_idx = Dict(v => k for (k,v) in idx_ROI);

In [None]:
# prepare features array
n = length(idx_ROI)
features_array = hcat([features[idx_ROI[i]] for i = 1:n]...)
println("features array shape: ", size(features_array))

features_centered = features_array .- mean(features_array, dims = 2);

#### UMAP

In [None]:
embedding = umap(features_centered, 2; n_neighbors = 5);

In [None]:
#writedlm("analysis/ECM_cancer/dim_01_combined/umap.csv", embedding, ",")
#embedding = readdlm("analysis/ECM_cancer/dim_01_combined/umap.csv", ',');

In [None]:
gr()
n = size(embedding, 2)
p = scatter(embedding[1,:], embedding[2,:], 
        markercolor = "slategrey",
        markersize = 5, 
        label = "", 
        xticks = [], 
        yticks = [], 
        framestyle = :box,  
        xlabel = "UMAP-1",
        ylabel = "UMAP-2",
        guidefontsize = 15,
        leftmargin = 5mm,
        size = (450, 350),
        #hover = 1:n,
        legend = :topright)
savefig("analysis/ECM_cancer/dim_01_combined/umap.svg")

#### PCA

In [None]:
transformed, variance_1, variance_2, variance_4 = centered_features_to_PCA(features_centered)
println(variance_1)
println(variance_2)

In [None]:
writedlm("analysis/ECM_cancer/dim_01_combined/PCA.csv", transformed, ",")

In [None]:
p = scatter(transformed[1,:], transformed[2,:], 
        label = "", 
        hover = collect(1:size(transformed, 2)),
        frame = :box,
        xlabel = "PCA1(71%)",
        ylabel = "PCA2(81%)",
        c = :slategrey,
        size = (500, 500),
        ticks = [])
savefig("analysis/ECM_cancer/dim_01_combined/PCA.svg")
plot(p)

# ECM & leukocytes

## Data preparation

In [None]:
# load data
systems = "ECM_leukocytes"
data = load("data/4000x4000_combined/Dowker/ECM_leukocytes/PD.jld2")

PD0 = data["PD0"]
PD1 = data["PD1"]
PI0 = compute_PI2(PD0)
PI1 = compute_PI2(PD1)
PD0_max = data["PD0_max"]
PD1_max = data["PD1_max"];


## dim 0 

In [None]:
ROIs = collect(keys(PI0))
idx_ROI = Dict(i => roi for (i, roi) in enumerate(ROIs))
n = length(ROIs)

# create new PI0 with integers as keys
PI = Dict(i => PI0[ROIs[i]] for i=1:n);
PD = Dict(i => PD0[ROIs[i]] for i = 1:n);

# save("analysis/ECM_leukocytes/dim_0/data.jld2",
#     "idx_ROI", idx_ROI,
#     "PI", PI,
#     "PD", PD);


In [None]:
# load PI, index
data = load("analysis/ECM_leukocytes/dim_0/data.jld2")
idx_ROI = data["idx_ROI"]
n = length(idx_ROI)
PI = Dict(i => PI0[idx_ROI[i]] for i=1:n);
PD = Dict(i => PD0[idx_ROI[i]] for i = 1:n);

ROI_idx = Dict(v => k for (k,v) in idx_ROI);

In [None]:
# recompute PI to get xmin, xmax, ymin, ymax coordinates for PI plots
PH_dict = Dict(k => ECM_TDA.array_to_ripsererPD(v) for (k,v) in PD if v != nothing);

PIs = PersistenceImage([PH_dict[k] for k in keys(PH_dict)], sigma=50, size = 20)

# get ranges of x and y (useful for plotting )
xmin = PIs.xs[1]
xmax = PIs.xs[end]

ymin = PIs.ys[1]
ymax = PIs.ys[end];

### umap 

In [None]:
Random.seed!(10)
PI_centered = center_PI(PI);
embedding = umap(PI_centered, 2; n_neighbors = 5);

In [None]:
writedlm("analysis/ECM_leukocytes/dim_0/umap.csv", embedding, ",")
y = embedding = readdlm("analysis/ECM_leukocytes/dim_0/umap.csv", ',');

In [None]:
gr()
n = size(embedding, 2)
p = scatter(embedding[1,:], embedding[2,:], 
        markercolor = "slategrey",
        markersize = 5, 
        label = "", 
        xticks = [], 
        yticks = [], 
        framestyle = :box,  
        xlabel = "UMAP-1",
        ylabel = "UMAP-2",
        guidefontsize = 15,
        leftmargin = 5mm,
        size = (450, 350),
        hover = 1:n,
        legend = :topright)
savefig("analysis/ECM_leukocytes/dim_0/umap.pdf")
plot(p)

### PCA

In [None]:

# compute PCA
transformed, eigenvectors, variance_1, variance_2, _ = PI_to_PCA2(PI; pratio = 0.99)


# save("analysis/ECM_leukocytes/dim_0/PCA.jld2",
#     "transformed", transformed,
#     "eigenvectors", eigenvectors,
#     "variance_1", variance_1,
#     "variance_2", variance_2)


In [None]:
# load
data = load("analysis/ECM_leukocytes/dim_0/PCA.jld2")
transformed = data["transformed"]
eigenvectors = data["eigenvectors"]
variance_1 = data["variance_1"]
variance_2 = data["variance_2"]

println("number of components: ", length(eigenvectors))
println("variance explained by 1 eigenvectors: ", variance_1)
println("variance explained by 2 eigenvectors: ", variance_2)
println("variance difference between 2 and 1:", variance_2 - variance_1)

In [None]:
plotly()
p = scatter(transformed[1,:], transformed[2,:], 
        label = "", 
        hover = collect(1:size(transformed, 2)),
        frame = :box,
        c = :slategrey,
        ticks = [])
#savefig("analysis/ECM_leukocytes/dim_0/pca.pdf")
plot(p)

In [None]:
# save index of example points
R1 = [43, 380, 245]
R2 = [381, 237, 279]
R3 = [73, 214, 113]
R4 = [330, 205, 86]


In [None]:
# plot with regions
gr()

y = transformed
markersize = 3
legendfontsize = 5
n = size(y, 2)
annotated = vcat(R1, R2, R3, R4)
nonannotated = [i for i = 1:size(y, 2) if i ∉ annotated]
p = scatter(y[1,nonannotated], y[2,nonannotated], 
        markercolor = "lightgray",
        alpha = 0.6,
        markersize = markersize, 
        markerstrokewidth = 1,
        label = "", 
        xaxis = "PC1 (65%)",
        yaxis = "PC2 (17%)",
        xticks = (0,0),
        yticks = (0,0),
        
        guidefontsize = 7,
        framestyle = :box,
        size = (200, 150),
        #leftmargin = 2mm,
        legend = :topleft
        #background_color=:transparent, foreground_color=:black,
        )
scatter!(y[1,R1], y[2,R1], label = "", markersize = markersize, markershape = :rect, markercolor = c1, labels = "R1", legendfontsize = legendfontsize)
scatter!(y[1,R2], y[2,R2], label = "", markersize = markersize, markershape = :utriangle, markercolor = c2, labels = "R2")
scatter!(y[1,R3], y[2,R3], label = "", markersize = markersize, markershape = :star, markercolor = c3, labels = "R3")
scatter!(y[1,R4], y[2,R4], label = "", markersize = markersize, markershape = :diamond, markercolor = c4, labels = "R4")
savefig("analysis/ECM_leukocytes/dim_0/pca.svg")
plot(p)

In [None]:
regions = Dict(1 => R1, 2=> R2, 3=> R3, 4=> R4)

plot_array = []
n = length(regions)
for i=1:n
    R = regions[i]
    for idx in R
        f = idx_ROI[idx]
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * f * ".csv")
        
        # plot ECM
        p_ECM = Images.load("data/4000x4000_combined/subregion_ECM/" * f * ".tif" )
        
        # plot leukocytes
        df_cell = df[df.class .== "leukocytes", :]
        p_L = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                        #background_color=:transparent, foreground_color=:black, 
                         c = c_leukocytes) 
        
        # plot psrh
        p_PSRH = Images.load("data/4000x4000_combined/PSRH/" * f * ".tif")
        push!(plot_array, plot(p_ECM, ticks = [], frame = :box))
        push!(plot_array, plot(p_L, ticks = [], frame = :box, left_margin = -5mm))
        push!(plot_array, plot(p_PSRH, ticks = [], frame = :box, left_margin = -5mm, 
        right_margin = 5mm))
    end
end

p = plot(plot_array..., layout = grid(n, 9), size = (250 * 9, 250 * n))
savefig("analysis/ECM_leukocytes/dim_0/pca_regions.png")

In [None]:
# plot the first four eigenvectors
gr()

# get min and max pixels of the first four eigenvectors
eigenvector_min = minimum(minimum.(eigenvectors[i] for i = 1:4))
eigenvector_max = maximum(maximum.(eigenvectors[i] for i = 1:4))

println("min pixel: ", eigenvector_min)
println("max pixel: ", eigenvector_max)

plot_scale = 20 # only show plot_scale% of persistence image
ps = [plot_PI2(eigenvectors[i], xmin, xmax, ymin, ymax, 
            clims = (eigenvector_min, eigenvector_max), 
            xlabel = "birth",
            ylabel = "persistence",
            show_axis = false,
            left_margin = 5mm,
            bottom_margin = 7mm,
            x_tick_interval = 400,
            y_tick_interval = 400,
            legend = :false # no colorbar 
            ) for i =1:4]

l = @layout[grid(1,4) a{0.05w}] # Stack a layout that rightmost one is for color bar
Plots.GridLayout(1, 2)

n = 100 # length of colorbar (as a vector)
cbar_interval = 0.2
cbar_ticks = vcat(reverse(collect(0:cbar_interval: -eigenvector_min))[1:end-1] .* -1, collect(0:cbar_interval:eigenvector_max))
cbar_loc = [cbar_tickvals_to_loc(eigenvector_min, eigenvector_max, n, val) for val in cbar_ticks]

p = plot(ps..., 
         heatmap(collect(range(eigenvector_min, eigenvector_max, length = n)) .* ones(n,1), 
                legend=:none, 
                xticks=:none,
                yticks=(cbar_loc, cbar_ticks)),
         layout=l,
         topmargin = 3mm,
         size = (1000, 200))
savefig("analysis/ECM_leukocytes/dim_0/eigenvectors.svg")
plot(p)

## dim 1

In [None]:
ROIs = collect(keys(PI1))
idx_ROI = Dict(i => roi for (i, roi) in enumerate(ROIs))
n = length(ROIs)

# create new PI with integers as keys
PI = Dict(i => PI1[ROIs[i]] for i=1:n);
PD = Dict(i => PD1[ROIs[i]] for i = 1:n);

# save("analysis/ECM_leukocytes/dim_1/data.jld2",
#    "idx_ROI", idx_ROI,
#    "PI", PI,
#    "PD", PD);


In [None]:
# load PI, index
data = load("analysis/ECM_leukocytes/dim_1/data.jld2")
idx_ROI = data["idx_ROI"]
n = length(idx_ROI)
#PI = Dict(i => PI1[idx_ROI[i]] for i=1:n);
#PD = Dict(i => PD1[idx_ROI[i]] for i = 1:n);

ROI_idx = Dict(v => k for (k,v) in idx_ROI);

In [None]:
# recompute PI to get xmin, xmax, ymin, ymax coordinates for PI plots
PH_dict = Dict(k => ECM_TDA.array_to_ripsererPD(v) for (k,v) in PD if v != nothing);

PIs = PersistenceImage([PH_dict[k] for k in keys(PH_dict)], sigma=50, size = 20)

# get ranges of x and y (useful for plotting )
xmin = PIs.xs[1]
xmax = PIs.xs[end]

ymin = PIs.ys[1]
ymax = PIs.ys[end];

### UMAP

In [None]:
Random.seed!(10)
PI_centered = center_PI(PI);
embedding = umap(PI_centered, 2; n_neighbors = 5);

In [None]:
writedlm("analysis/ECM_leukocytes/dim_1/umap.csv", embedding, ",")
y = embedding = readdlm("analysis/ECM_leukocytes/dim_1/umap.csv", ',');

In [None]:
gr()
n = size(embedding, 2)
p = scatter(embedding[1,:], embedding[2,:], 
        markercolor = "slategrey",
        markersize = 5, 
        label = "", 
        xticks = [], 
        yticks = [], 
        framestyle = :box,  
        xlabel = "UMAP-1",
        ylabel = "UMAP-2",
        guidefontsize = 15,
        leftmargin = 5mm,
        size = (450, 350),
        hover = 1:n,
        legend = :topright)
#savefig("analysis/ECM_leukocytes/dim_1/umap.pdf")
plot(p)

### PCA

In [None]:

# compute PCA
transformed, eigenvectors, variance_1, variance_2, _ = PI_to_PCA2(PI; pratio = 0.99)


# save("analysis/ECM_leukocytes/dim_1/PCA.jld2",
#     "transformed", transformed,
#     "eigenvectors", eigenvectors,
#     "variance_1", variance_1,
#     "variance_2", variance_2)


In [None]:
# load
data = load("analysis/ECM_leukocytes/dim_1/PCA.jld2")
transformed = data["transformed"]
eigenvectors = data["eigenvectors"]
variance_1 = data["variance_1"]
variance_2 = data["variance_2"]

println("number of components: ", length(eigenvectors))
println("variance explained by 1 eigenvectors: ", variance_1)
println("variance explained by 2 eigenvectors: ", variance_2)
println("variance difference between 2 and 1:", variance_2 - variance_1)

In [None]:
plotly()
p = scatter(transformed[1,:], transformed[2,:], 
        label = "", 
        hover = collect(1:size(transformed, 2)),
        frame = :box,
        c = :slategrey,
        ticks = [])
#savefig("analysis/ECM_leukocytes/dim_1/pca.svg")
plot(p)

In [None]:
# save index of example points
R1 = [43, 380, 245]
R2 = [381, 237, 279]
R3 = [330, 205, 86]
R4 = [73, 214, 113]


In [None]:
# plot with regions
gr()

y = transformed
markersize = 3
legendfontsize = 5
n = size(y, 2)
annotated = vcat(R1, R2, R3, R4)
nonannotated = [i for i = 1:size(y, 2) if i ∉ annotated]
p = scatter(y[1,nonannotated], y[2,nonannotated], 
        markercolor = "lightgray",
        alpha = 0.6,
        markersize = markersize, 
        markerstrokewidth = 1,
        label = "", 
        xaxis = "PC1 (46%)",
        yaxis = "PC2 (19%)",
        xticks = (0,0),
        yticks = (0,0),
        
        guidefontsize = 7,
        framestyle = :box,
        size = (200, 150),
        #leftmargin = 2mm,
        legend = :bottomleft
        #background_color=:transparent, foreground_color=:black,
        )
scatter!(y[1,R1], y[2,R1], label = "", markersize = markersize, markershape = :rect, markercolor = c1, labels = "R1", legendfontsize = legendfontsize)
scatter!(y[1,R2], y[2,R2], label = "", markersize = markersize, markershape = :utriangle, markercolor = c2, labels = "R2")
scatter!(y[1,R3], y[2,R3], label = "", markersize = markersize, markershape = :star, markercolor = c3, labels = "R3")
scatter!(y[1,R4], y[2,R4], label = "", markersize = markersize, markershape = :diamond, markercolor = c4, labels = "R4")
savefig("analysis/ECM_leukocytes/dim_1/pca.svg")
plot(p)

In [None]:
regions = Dict(1 => R1, 2=> R2, 3=> R3, 4=> R4)

plot_array = []
n = length(regions)
for i=1:n
    R = regions[i]
    for idx in R
        f = idx_ROI[idx]
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * f * ".csv")
        
        # plot ECM
        p_ECM = Images.load("data/4000x4000_combined/subregion_ECM/" * f * ".tif" )
        
        # plot leukocytes
        df_cell = df[df.class .== "leukocytes", :]
        p_L = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                        #background_color=:transparent, foreground_color=:black, 
                         c = c_leukocytes) 
        
        # plot psrh
        p_PSRH = Images.load("data/4000x4000_combined/PSRH/" * f * ".tif")
        push!(plot_array, plot(p_ECM, ticks = [], frame = :box))
        push!(plot_array, plot(p_L, ticks = [], frame = :box, left_margin = -5mm))
        push!(plot_array, plot(p_PSRH, ticks = [], frame = :box, left_margin = -5mm, 
        right_margin = 5mm))
    end
end

p = plot(plot_array..., layout = grid(n, 9), size = (250 * 9, 250 * n))
savefig("analysis/ECM_leukocytes/dim_1/pca_regions.png")

In [None]:
# plot the first four eigenvectors
gr()

# get min and max pixels of the first four eigenvectors
eigenvector_min = minimum(minimum.(eigenvectors[i] for i = 1:4))
eigenvector_max = maximum(maximum.(eigenvectors[i] for i = 1:4))

println("min pixel: ", eigenvector_min)
println("max pixel: ", eigenvector_max)

plot_scale = 20 # only show plot_scale% of persistence image
ps = [plot_PI2(eigenvectors[i], xmin, xmax, ymin, ymax, 
            clims = (eigenvector_min, eigenvector_max), 
            xlabel = "birth",
            ylabel = "persistence",
            show_axis = false,
            left_margin = 5mm,
            bottom_margin = 7mm,
            x_tick_interval = 600,
            y_tick_interval = 400,
            legend = :false # no colorbar 
            ) for i =1:4]

l = @layout[grid(1,4) a{0.05w}] # Stack a layout that rightmost one is for color bar
Plots.GridLayout(1, 2)

n = 100 # length of colorbar (as a vector)
cbar_interval = 0.2
cbar_ticks = vcat(reverse(collect(0:cbar_interval: -eigenvector_min))[1:end-1] .* -1, collect(0:cbar_interval:eigenvector_max))
cbar_loc = [cbar_tickvals_to_loc(eigenvector_min, eigenvector_max, n, val) for val in cbar_ticks]

p = plot(ps..., 
         heatmap(collect(range(eigenvector_min, eigenvector_max, length = n)) .* ones(n,1), 
                legend=:none, 
                xticks=:none,
                yticks=(cbar_loc, cbar_ticks)),
         layout=l,
         topmargin = 3mm,
         size = (1000, 200))
savefig("analysis/ECM_leukocytes/dim_1/eigenvectors.svg")
plot(p)

## ECM leukocytes comdined dim 0 and dim 1

In [None]:
# load data
systems = "ECM_leukocytes"
data = load("data/4000x4000_combined/Dowker/ECM_leukocytes/PD.jld2")

PD0 = data["PD0"]
PD1 = data["PD1"]
PI0 = compute_PI2(PD0)
PI1 = compute_PI2(PD1)
PD0_max = data["PD0_max"]
PD1_max = data["PD1_max"];

features = combine_PI0_PI1_dicts_Dowker(PI0, PI1);

In [None]:
# recompute PI 
# recompute coarser PI 

PH0_dict = Dict(k => ECM_TDA.array_to_ripsererPD(v) for (k,v) in PD0 if v != nothing);
PH1_dict = Dict(k => ECM_TDA.array_to_ripsererPD(v) for (k,v) in PD1 if v != nothing);

pi0 = PersistenceImage([PH0_dict[k] for k in keys(PH0_dict)], sigma=50, size = 20)
pi1 = PersistenceImage([PH1_dict[k] for k in keys(PH1_dict)], sigma=50, size = 20)


PI0 = Dict()
for i in keys(PH0_dict)
    PI0[i] = pi0(PH0_dict[i])
end

PI1 = Dict()
for i in keys(PH1_dict)
    PI1[i] = pi1(PH1_dict[i])
end

# save the min, max coordinates of PDs (useful for plotting)
PI0_xmin = pi0.xs[1]
PI0_xmax = pi0.xs[end]
PI0_ymin = pi0.ys[1]
PI0_ymax = pi0.ys[end]

PI1_xmin = pi1.xs[1]
PI1_xmax = pi1.xs[end]
PI1_ymin = pi1.ys[1]
PI1_ymax = pi1.ys[end]

# save("data/4000x4000_combined/Dowker/ECM_leukocytes/PI_ranges.jld2",
#     "PI0_xmin", PI0_xmin,
#     "PI0_xmax", PI0_xmax,
#     "PI0_ymin", PI0_ymin,
#     "PI0_ymax", PI0_ymax,
#     "PI1_xmin", PI1_xmin,
#     "PI1_xmax", PI1_xmax,
#     "PI1_ymin", PI1_ymin,
#     "PI1_ymax", PI1_ymax)

In [None]:
#ROIs = collect(keys(features))
#idx_ROI = Dict(i => roi for (i, roi) in enumerate(ROIs));

#save("analysis/ECM_leukocytes/dim_01_combined/idx_ROI.jld2", "idx_ROI", idx_ROI)
idx_ROI = load("analysis/ECM_leukocytes/dim_01_combined/idx_ROI.jld2")["idx_ROI"];
ROI_idx = Dict(v => k for (k,v) in idx_ROI);

In [None]:
# prepare features array
n = length(idx_ROI)
features_array = hcat([features[idx_ROI[i]] for i = 1:n]...)
println("features array shape: ", size(features_array))

features_centered = features_array .- mean(features_array, dims = 2);

### UMAP

In [None]:
embedding = umap(features_centered, 2; n_neighbors = 5);

In [None]:
#writedlm("analysis/ECM_leukocytes/dim_01_combined/umap.csv", embedding, ",")
#embedding = readdlm("analysis/ECM_leukocytes/dim_01_combined/umap.csv", ',');

In [None]:
gr()
n = size(embedding, 2)
p = scatter(embedding[1,:], embedding[2,:], 
        markercolor = "slategrey",
        markersize = 5, 
        label = "", 
        xticks = [], 
        yticks = [], 
        framestyle = :box,  
        xlabel = "UMAP-1",
        ylabel = "UMAP-2",
        guidefontsize = 15,
        leftmargin = 5mm,
        size = (450, 350),
        #hover = 1:n,
        legend = :topright)
#savefig("analysis/ECM_leukocytes/dim_01_combined/umap.svg")

### PCA

In [None]:
transformed, variance_1, variance_2, variance_4 = centered_features_to_PCA(features_centered)
println(variance_1)
println(variance_2)

In [None]:
writedlm("analysis/ECM_leukocytes/dim_01_combined/PCA.csv", transformed, ",")

In [None]:
p = scatter(transformed[1,:], transformed[2,:], 
        label = "", 
        hover = collect(1:size(transformed, 2)),
        frame = :box,
        xlabel = "PCA1(58%)",
        ylabel = "PCA2(75%)",
        c = :slategrey,
        size = (500, 500),
        ticks = [])
savefig("analysis/ECM_leukocytes/dim_01_combined/PCA.svg")
plot(p)