# Cells PH analysis

This notebook performs dimensionality reduction on PH features from cells (cancer cells, leukocytes)

In [None]:
include("../src/ECM_TDA.jl")
using .ECM_TDA
using Eirene
using Ripserer
using CSV
using TiffImages
using Images
using NPZ
using Plots
using PersistenceDiagrams
using Measures
using Distributions
using MultivariateStats
using LinearAlgebra
using Random
using StatsBase
using JLD2
using FileIO
using PersistenceDiagrams
using DelimitedFiles
using UMAP
using DataFrames

In [None]:
# get indices with large and small i-th coordinates
function get_coordinate_min_max_examples(transformed, i; n =4)

    sorted = sortperm(transformed[i,:])
    min_indices = sorted[1:n]
    max_indices = sorted[end-n+1:end]
    return min_indices, max_indices
end

function plot_low_high_PC_cancer_PSRH(min_indices, max_indices, idx_files, save_filename)
    gr()
    plot_array = []
    n = length(min_indices)
    
    for indices in [max_indices, min_indices]
        for idx in indices
            f = idx_files[idx]
        
             
            # get cells
            df = CSV.read("data/4000x4000_combined/subregion_cells/" * idx_files[idx] * ".csv")

            # plot cancer
            df_cell = df[df.class .== "cancer", :]
            p = scatter(df_cell.x, df_cell.y,
                             markersize = 1.5,
                             yflip = true,
                             label = "",
                             markerstrokewidth = 0.2,
                             frame = :box,
                             ticks = [],
                            aspect_ratio = :equal,
                            size = (150,150),
                            background_color=:transparent, foreground_color=:black, 
                             c = c_cancer,
                            right_margin = -5mm) 

            push!(plot_array, p)


            p_PSRH = Images.load("data/4000x4000_combined/PSRH/" * f * ".tif")
            push!(plot_array, plot(p_PSRH, ticks = [], frame = :box, bottom_margin = -3mm, right_margin = 5mm))
        end
    end
    p = plot(plot_array..., layout = grid(2, n * 2), size = (250 * n * 2, 250 * 2))
    savefig(save_filename)   
end


function plot_low_high_PC_leukocytes_PSRH(min_indices, max_indices, idx_files, save_filename)
    gr()
    plot_array = []
    n = length(min_indices)
    
    for indices in [max_indices, min_indices]
        for idx in indices
            f = idx_files[idx]
        
             
            # get cells
            df = CSV.read("data/4000x4000_combined/subregion_cells/" * idx_files[idx] * ".csv")

            # plot cancer
            df_cell = df[df.class .== "leukocytes", :]
            p = scatter(df_cell.x, df_cell.y,
                             markersize = 1.5,
                             yflip = true,
                             label = "",
                             markerstrokewidth = 0.2,
                             frame = :box,
                             ticks = [],
                            aspect_ratio = :equal,
                            size = (150,150),
                            background_color=:transparent, foreground_color=:black, 
                             c = c_leukocytes,
                            right_margin = -5mm) 

            push!(plot_array, p)


            p_PSRH = Images.load("data/4000x4000_combined/PSRH/" * f * ".tif")
            push!(plot_array, plot(p_PSRH, ticks = [], frame = :box, bottom_margin = -3mm, right_margin = 5mm))
        end
    end
    p = plot(plot_array..., layout = grid(2, n * 2), size = (250 * n * 2, 250 * 2))
    savefig(save_filename)   
end

function get_small_large_coordinate_examples(transformed, i, thresholds; n_coordinates = nothing, lim = 4)
    if n_coordinates == nothing
        n = size(transformed,1)
    else
        n = n_coordinates
    end
    
    
    ### get index of large coordinates
    example_idx = reverse(sortperm(transformed[i,:]))

    # select indices only if i is the largest coordinate
    example_idx = [k for k in example_idx if i == findmax(transformed[:,k])[2]]

    # select indices only if the remaining coordinates are small enough
    large_idx = []
    other_coords = [j for j=1:n if j != i]
    for k in example_idx
        append_bool = true
        for j in other_coords
            if abs(transformed[j,k]) > thresholds[j]
                append_bool = false
            end
        end
        
        if append_bool == true
            append!(large_idx, k) 
        end
    end
    
    ### get index of small coordinates
    example_idx = sortperm(transformed[i,:])

    # select indices only if i is the largest coordinate
    example_idx = [k for k in example_idx if i == findmin(transformed[:,k])[2]]
    
    # select indices only if the remaining coordinates are small enough
    small_idx = []
    other_coords = [j for j=1:n if j != i]
    for k in example_idx
        append_bool = true
        for j in other_coords
            if abs(transformed[j,k]) > thresholds[j]
                append_bool = false
            end
        end
        
        if append_bool == true
            append!(small_idx, k) 
        end
    end
    
    return small_idx[1:lim], large_idx[1:lim]
end

function center_PI(PI)
    n = length(PI)
    PI_array =  hcat([vec(PI[i]) for i =1:n]...)
    PI_centered = PI_array .- mean(PI_array, dims = 2)
    return PI_centered
end

function compute_PI2(PD; sigma = 50, size = 20)
    PH_dict = Dict(k => ECM_TDA.array_to_ripsererPD(v) for (k,v) in PD if v != nothing);

    PI = PersistenceImage([PH_dict[k] for k in keys(PH_dict)], sigma=sigma, size = size)
    PH_PI = Dict()
    for i in keys(PH_dict)
        PH_PI[i] = PI(PH_dict[i])
    end
    return PH_PI
end

In [None]:
# color palettes
c_ECM = "#249EA0" # teal
c_cancer = "#592693" # purple
c_leukocytes = "#FAAB36" # orange

celltype_colors = Dict("cancer" => c_cancer, 
                        "leukocytes"=> c_leukocytes);


c1 = "#fd5184" # pink
c2 = "#ffb602" # orange
c3 = "#3ec0c9" # blue / teal 
c4 = "#926EAE" # purple
c5 = "#49a849"  # green
c6 = "#F28522"
c7 = "#265BF5"
c8 = "#AEF359" # lime green
c9 = "#FF1DCE"


# plotting parameters
c_highlight = :deeppink2

### for point cloud
p_markersize = 5
p_markerstrokewidth = 0.1
p_imagesize = (300, 300)

### for PD
diagonal_lw = 2
pd_markersize = 15
tickfontsize = 25
PD_size = (500, 500)
inf_markerstrokewidth = 6

Load data

In [None]:
# load PI
#PI = load("data/4000x4000_combined/cells_PD/PI.jld2")
# PI0_cancer = PI["PI0_cancer"]
# PI1_cancer = PI["PI1_cancer"]
# PI0_leukocytes = PI["PI0_leukocytes"]
# PI1_leukocytes = PI["PI1_leukocytes"]

# load PD
PD = load("data/4000x4000_combined/cells_PD/PD.jld2")
PD0_cancer = PD["PD0_cancer"]
PD1_cancer = PD["PD1_cancer"]
PD0_leukocytes = PD["PD0_leukocytes"]
PD1_leukocytes = PD["PD1_leukocytes"]

# compute PI (with smaller size)
PI0_cancer = compute_PI2(PD0_cancer)
PI1_cancer = compute_PI2(PD1_cancer)
PI0_leukocytes = compute_PI2(PD0_leukocytes)
PI1_leukocytes = compute_PI2(PD1_leukocytes)

# load PD_max
PD_max = load("data/4000x4000_combined/cells_PD/PD_max.jld2")
cancer0_max = PD_max["cancer0_max"]
cancer1_max = PD_max["cancer1_max"]
leukocytes0_max = PD_max["leukocytes0_max"]
leukocytes1_max = PD_max["leukocytes1_max"];

In [None]:
# save("data/4000x4000_combined/cells_PD/PI.jld2",
#     "PI0_cancer", PI0_cancer,
#     "PI1_cancer", PI1_cancer,
#     "PI0_leukocytes", PI0_leukocytes,
#     "PI1_leukocytes", PI1_leukocytes)

In [None]:
# load cancer & leukocyte features
PI = load("data/4000x4000_combined/cells_PD/PI.jld2")
cancer_PI0 = PI["PI0_cancer"]
cancer_PI1 = PI["PI1_cancer"]
leukocytes_PI0 = PI["PI0_leukocytes"]
leukocytes_PI1 = PI["PI1_leukocytes"];

In [None]:
# recompute coarser PI 
PD0 = PD0_cancer
PD1 = PD1_cancer

PH0_dict = Dict(k => ECM_TDA.array_to_ripsererPD(v) for (k,v) in PD0 if v != nothing);
PH1_dict = Dict(k => ECM_TDA.array_to_ripsererPD(v) for (k,v) in PD1 if v != nothing);

pi0 = PersistenceImage([PH0_dict[k] for k in keys(PH0_dict)], sigma=50, size = 20)
pi1 = PersistenceImage([PH1_dict[k] for k in keys(PH1_dict)], sigma=50, size = 20)


PI0 = Dict()
for i in keys(PH0_dict)
    PI0[i] = pi0(PH0_dict[i])
end

PI1 = Dict()
for i in keys(PH1_dict)
    PI1[i] = pi1(PH1_dict[i])
end


In [None]:
# save the min, max coordinates of PDs (useful for plotting)
PI0_xmin = pi0.xs[1]
PI0_xmax = pi0.xs[end]
PI0_ymin = pi0.ys[1]
PI0_ymax = pi0.ys[end]

PI1_xmin = pi1.xs[1]
PI1_xmax = pi1.xs[end]
PI1_ymin = pi1.ys[1]
PI1_ymax = pi1.ys[end]

save("data/4000x4000_combined/cells_PD/cancer_PI_ranges.jld2",
    "PI0_xmin", PI0_xmin,
    "PI0_xmax", PI0_xmax,
    "PI0_ymin", PI0_ymin,
    "PI0_ymax", PI0_ymax,
    "PI1_xmin", PI1_xmin,
    "PI1_xmax", PI1_xmax,
    "PI1_ymin", PI1_ymin,
    "PI1_ymax", PI1_ymax)

In [None]:
# recompute coarser PI 
PD0 = PD0_leukocytes
PD1 = PD1_leukocytes

PH0_dict = Dict(k => ECM_TDA.array_to_ripsererPD(v) for (k,v) in PD0 if v != nothing);
PH1_dict = Dict(k => ECM_TDA.array_to_ripsererPD(v) for (k,v) in PD1 if v != nothing);

pi0 = PersistenceImage([PH0_dict[k] for k in keys(PH0_dict)], sigma=50, size = 20)
pi1 = PersistenceImage([PH1_dict[k] for k in keys(PH1_dict)], sigma=50, size = 20)


PI0 = Dict()
for i in keys(PH0_dict)
    PI0[i] = pi0(PH0_dict[i])
end

PI1 = Dict()
for i in keys(PH1_dict)
    PI1[i] = pi1(PH1_dict[i])
end

# save the min, max coordinates of PDs (useful for plotting)
PI0_xmin = pi0.xs[1]
PI0_xmax = pi0.xs[end]
PI0_ymin = pi0.ys[1]
PI0_ymax = pi0.ys[end]

PI1_xmin = pi1.xs[1]
PI1_xmax = pi1.xs[end]
PI1_ymin = pi1.ys[1]
PI1_ymax = pi1.ys[end]

save("data/4000x4000_combined/cells_PD/leukocytes_PI_ranges.jld2",
    "PI0_xmin", PI0_xmin,
    "PI0_xmax", PI0_xmax,
    "PI0_ymin", PI0_ymin,
    "PI0_ymax", PI0_ymax,
    "PI1_xmin", PI1_xmin,
    "PI1_xmax", PI1_xmax,
    "PI1_ymin", PI1_ymin,
    "PI1_ymax", PI1_ymax)


# cancer, dimension 0

In [None]:
# create indices
#files = collect(keys(PI0_cancer))
#idx_files = Dict(i => files[i] for i=1:length(files));

#save("analysis/cancer/cancer_PI0_idx_files.jld2", "idx_files", idx_files)
idx_files = load("analysis/cancer/cancer_PI0_idx_files.jld2")["idx_files"];
file_idx = Dict(v => k for (k,v) in idx_files);

In [None]:
PI0_new = Dict(i => PI0_cancer[idx_files[i]] for i = 1:length(file_idx));

## UMAP

In [None]:
Random.seed!(10)
PI_centered = center_PI(PI0_new);
embedding = umap(PI_centered, 2; n_neighbors = 5);

In [None]:
#writedlm("analysis/cancer/dim_0/umap.csv", embedding, ",")
y = embedding = readdlm("analysis/cancer/dim_0/umap.csv", ',');

In [None]:
gr()
n = size(embedding, 2)
p = scatter(embedding[1,:], embedding[2,:], 
        markercolor = "slategrey",
        markersize = 5, 
        label = "", 
        xticks = [], 
        yticks = [], 
        framestyle = :box,  
        xlabel = "UMAP-1",
        ylabel = "UMAP-2",
        guidefontsize = 15,
        leftmargin = 5mm,
        size = (450, 350),
        hover = 1:n,
        legend = :topright)
savefig("analysis/cancer/dim_0/umap.pdf")
plot(p)

The following cells are from larger PI (PI size 100 x 100)

In [None]:
U1 = [401, 390, 172]
U2 = [289, 313, 378]
U3 = [400, 344, 353]
U4 = [315, 379, 203]
U5 = [322, 106, 283]
U6 = [382, 369, 181]

In [None]:
# plot with regions
gr()

# load PCA
y = readdlm("analysis/cancer/dim_0/umap.csv", ',');

markersize = 3
legendfontsize = 5
n = size(y, 2)
annotated = vcat(U1, U2, U3, U4)
nonannotated = [i for i = 1:size(y, 2) if i ∉ annotated]
p = scatter(y[1,nonannotated], y[2,nonannotated], 
        markercolor = "lightgray",
        alpha = 0.6,
        markersize = markersize, 
        markerstrokewidth = 0.1,
        label = "", 
        xaxis = "UMAP-1",
        yaxis = "UMAP-2",
        #xtickfontsize = 15,
        #ytickfontsize = 15,
        #xrotation = 45,
        ticks = [],
        xlims = (-15, 11),
        #ylims = (-8, 8),
        guidefontsize = 7,
        framestyle = :box,
        size = (200, 150),
        #leftmargin = 2mm,
        legend = :bottomleft,
        #background_color=:transparent, foreground_color=:black,
        )
scatter!(y[1,U1], y[2,U1], markersize = markersize, markershape = :rect, markercolor = c1, labels = "U1", legendfontsize = legendfontsize)
scatter!(y[1,U2], y[2,U2], markersize = markersize, markershape = :utriangle, markercolor = c2, labels = "U2")
scatter!(y[1,U3], y[2,U3], markersize = markersize, markershape = :star, markercolor = c3, labels = "U3")
scatter!(y[1,U4], y[2,U4], markersize = markersize, markershape = :diamond, markercolor = c4, labels = "U4")
scatter!(y[1,U5], y[2,U5], markersize = markersize, markershape = :pentagon, markercolor = c5, labels = "U5")
scatter!(y[1,U6], y[2,U6], markersize = markersize, markershape = :dtriangle, markercolor = c6, labels = "U6")
savefig("analysis/cancer/dim_0/umap.pdf")
plot(p)

In [None]:
# plot ECM & PSRH images
regions = Dict(1 => U1, 2=> U2, 3=> U3, 4=> U4, 5 => U5, 6 => U6)
gr()
plot_array = []
n = length(regions)
for i=1:n
    R = regions[i]
    for idx in R
        f = idx_files[idx]
        # get cells
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * idx_files[idx] * ".csv")
        
        # plot cancer
        df_cell = df[df.class .== "cancer", :]
        p = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                        background_color=:transparent, foreground_color=:black, 
                         c = c_cancer,
                        right_margin = -5mm) 
        
        push!(plot_array, p)
        
        
        
        p_PSRH = Images.load("data/4000x4000_combined/PSRH/" * f * ".tif")
        push!(plot_array, plot(p_PSRH, ticks = [], frame = :box, bottom_margin = -3mm, right_margin = 5mm))
        
    end
end

In [None]:
p = plot(plot_array..., layout = grid(6, 6), size = (250 * 6, 250 * 6))
savefig("analysis/cancer/dim_0/UMAP_examples.png")

## PCA

In [None]:
transformed, eigenvectors, variance_1, variance_2, _ = PI0_to_PCA(PI0_new; pratio = 0.99)


save("analysis/cancer/dim_0/PCA.jld2",
    "transformed", transformed,
    "eigenvectors", eigenvectors,
    "variance_1", variance_1,
    "variance_2", variance_2)

println("number of components: ", length(eigenvectors))
println("variance explained by 1 eigenvectors: ", variance_1)
println("variance explained by 2 eigenvectors: ", variance_2)
println("variance difference between 2 and 1:", variance_2 - variance_1)

In [None]:
# load
data = load("analysis/cancer/dim_0/PCA.jld2")
transformed = data["transformed"]
eigenvectors = data["eigenvectors"];

In [None]:
plotly()
y = transformed
n = size(y, 2)
p = scatter(y[1,:], y[2,:], 
        markercolor = "slategrey",
        markersize = 5, 
        label = "", 
        xticks = [], 
        yticks = [], 
        framestyle = :box,  
        guidefontsize = 15,
        leftmargin = 5mm,
        size = (450, 350),
        hover = 1:n,
        legend = :topright)

In [None]:
R1 = [141, 297, 195]
R2 = [319, 55, 66]
R3 = [279, 400, 228]
R4 = [42, 315, 107]

In [None]:
# plot with regions
gr()

# load PCA
y = load("analysis/cancer/dim_0/PCA.jld2")["transformed"]

markersize = 3
legendfontsize = 5
n = size(y, 2)
annotated = vcat(R1, R2, R3, R4)
nonannotated = [i for i = 1:size(y, 2) if i ∉ annotated]
p = scatter(y[1,nonannotated], y[2,nonannotated], 
        markercolor = "lightgray",
        alpha = 0.6,
        markersize = markersize, 
        markerstrokewidth = 0.1,
        label = "", 
        xaxis = "PC1 (95%)",
        yaxis = "PC2 (4%)",
        #xtickfontsize = 15,
        #ytickfontsize = 15,
        #xrotation = 45,
        xticks = (0, 0),
        yticks = (0,0),
        guidefontsize = 7,
        framestyle = :box,
        size = (200, 150),
        #leftmargin = 2mm,
        legend = :topright
        #background_color=:transparent, foreground_color=:black,
        )
scatter!(y[1,R1], y[2,R1], label = "", markersize = markersize, markershape = :rect, markercolor = c1, labels = "R1", legendfontsize = legendfontsize)
scatter!(y[1,R2], y[2,R2], label = "", markersize = markersize, markershape = :utriangle, markercolor = c2, labels = "R2")
scatter!(y[1,R3], y[2,R3], label = "", markersize = markersize, markershape = :star, markercolor = c3, labels = "R3")
scatter!(y[1,R4], y[2,R4], label = "", markersize = markersize, markershape = :diamond, markercolor = c4, labels = "R4")
savefig("analysis/cancer/dim_0/pca.pdf")
plot(p)

In [None]:
# plot ECM & PSRH images
regions = Dict(1 => R1, 2=> R2, 3=> R3, 4=> R4)
gr()
plot_array = []
n = length(regions)
for i=1:n
    R = regions[i]
    for idx in R
        f = idx_files[idx]
        # get cells
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * idx_files[idx] * ".csv")
        
        # plot cancer
        df_cell = df[df.class .== "cancer", :]
        p = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                        background_color=:transparent, foreground_color=:black, 
                         c = c_cancer,
                        right_margin = -5mm) 
        
        push!(plot_array, p)
        
        
        p_PSRH = Images.load("data/4000x4000_combined/PSRH/" * f * ".tif")
        push!(plot_array, plot(p_PSRH, ticks = [], frame = :box, bottom_margin = -3mm, right_margin = 5mm))
        
    end
end

In [None]:
p = plot(plot_array..., layout = grid(4, 6), size = (250 * 6, 250 * 4))
savefig("analysis/cancer/dim_0/PCA_examples.png")

# Cancer dim 1

In [None]:
# create indices
#files = collect(keys(PI1_cancer))
#idx_files = Dict(i => files[i] for i=1:length(files));

#save("analysis/cancer/cancer_PI1_idx_files.jld2", "idx_files", idx_files)
idx_files = load("analysis/cancer/cancer_PI1_idx_files.jld2")["idx_files"];
file_idx = Dict(v => k for (k,v) in idx_files);

PI1_new = Dict(i => PI1_cancer[idx_files[i]] for i = 1:length(file_idx));

## UMAP

In [None]:
Random.seed!(10)
PI_centered = center_PI(PI1_new);
embedding = umap(PI_centered, 2; n_neighbors = 5);

In [None]:
#writedlm("analysis/cancer/dim_1/umap.csv", embedding, ",")
y = embedding = readdlm("analysis/cancer/dim_1/umap.csv", ',');

In [None]:
gr()
n = size(embedding, 2)
p = scatter(embedding[1,:], embedding[2,:], 
        markercolor = "slategrey",
        markersize = 5, 
        label = "", 
        xticks = [], 
        yticks = [], 
        framestyle = :box,  
        xlabel = "UMAP-1",
        ylabel = "UMAP-2",
        guidefontsize = 15,
        leftmargin = 5mm,
        size = (450, 350),
        hover = 1:n,
        legend = :topright)
savefig("analysis/cancer/dim_1/umap.pdf")
plot(p)

The following is from old PI (larger PI with size 100 x 100)

In [None]:
plotly()
n = size(embedding, 2)
p = scatter(embedding[1,:], embedding[2,:], 
        markercolor = "slategrey",
        markersize = 5, 
        label = "", 
        xticks = [], 
        yticks = [], 
        framestyle = :box,  
        xlabel = "UMAP-1",
        ylabel = "UMAP-2",
        guidefontsize = 15,
        leftmargin = 5mm,
        size = (450, 350),
        hover = 1:n,
        legend = :topright)

In [None]:
U1 = [190, 88, 91]
U2 = [327, 371, 322]
U3 = [185, 274, 234]
U4 = [50, 282, 38]
U5 = [306, 120, 317]
U6 = [319, 308, 9]

In [None]:
# plot with regions
gr()

# load PCA
y = readdlm("analysis/cancer/dim_1/umap.csv", ',');

markersize = 3
legendfontsize = 5
n = size(y, 2)
annotated = vcat(U1, U2, U3, U4, U5, U6)
nonannotated = [i for i = 1:size(y, 2) if i ∉ annotated]
p = scatter(y[1,nonannotated], y[2,nonannotated], 
        markercolor = "lightgray",
        alpha = 0.6,
        markersize = markersize, 
        markerstrokewidth = 0.1,
        label = "", 
        xaxis = "UMAP-1",
        yaxis = "UMAP-2",
        #xtickfontsize = 15,
        #ytickfontsize = 15,
        #xrotation = 45,
        ticks = [],
        xlims = (-11, 13),
        guidefontsize = 7,
        framestyle = :box,
        size = (200, 150),
        #leftmargin = 2mm,
        legend = :bottomright,
        #background_color=:transparent, foreground_color=:black,
        )
scatter!(y[1,U1], y[2,U1], markersize = markersize, markershape = :rect, markercolor = c1, labels = "U1", legendfontsize = legendfontsize)
scatter!(y[1,U2], y[2,U2], markersize = markersize, markershape = :utriangle, markercolor = c2, labels = "U2")
scatter!(y[1,U3], y[2,U3], markersize = markersize, markershape = :star, markercolor = c3, labels = "U3")
scatter!(y[1,U4], y[2,U4], markersize = markersize, markershape = :diamond, markercolor = c4, labels = "U4")
scatter!(y[1,U5], y[2,U5], markersize = markersize, markershape = :pentagon, markercolor = c5, labels = "U5")
scatter!(y[1,U6], y[2,U6], markersize = markersize, markershape = :dtriangle, markercolor = c6, labels = "U6")
savefig("analysis/cancer/dim_1/umap.pdf")
plot(p)

In [None]:
# plot ECM & PSRH images
regions = Dict(1 => U1, 2=> U2, 3=> U3, 4=> U4, 5 => U5, 6 => U6)
gr()
plot_array = []
n = length(regions)
for i=1:n
    R = regions[i]
    for idx in R
        f = idx_files[idx]
        # get cells
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * idx_files[idx] * ".csv")
        
        # plot cancer
        df_cell = df[df.class .== "cancer", :]
        p = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                        background_color=:transparent, foreground_color=:black, 
                         c = c_cancer,
                         right_margin = -5mm) 
        
        push!(plot_array, p)
        
        
        
        p_PSRH = Images.load("data/4000x4000_combined/PSRH/" * f * ".tif")
        push!(plot_array, plot(p_PSRH, ticks = [], frame = :box, bottom_margin = -3mm, right_margin = 5mm))
        
    end
end

In [None]:
p = plot(plot_array..., layout = grid(6, 6), size = (250 * 6, 250 * 6))
savefig("analysis/cancer/dim_1/UMAP_examples.png")

## PCA

In [None]:
transformed, eigenvectors, variance_1, variance_2, _ = PI_to_PCA(PI1_new; pratio = 0.99)


#save("analysis/cancer/dim_1/PCA.jld2",
#    "transformed", transformed,
#    "eigenvectors", eigenvectors,
#    "variance_1", variance_1,
#    "variance_2", variance_2)

println("number of components: ", length(eigenvectors))
println("variance explained by 1 eigenvectors: ", variance_1)
println("variance explained by 2 eigenvectors: ", variance_2)
println("variance difference between 2 and 1:", variance_2 - variance_1)

In [None]:
# load
data = load("analysis/cancer/dim_1/PCA.jld2")
transformed = data["transformed"]
eigenvectors = data["eigenvectors"];

In [None]:
plotly()
y = transformed
n = size(y, 2)
p = scatter(y[1,:], y[2,:], 
        markercolor = "slategrey",
        markersize = 5, 
        label = "", 
        xticks = (0,0), 
        yticks = (0,0), 
        framestyle = :box,  
        guidefontsize = 15,
        leftmargin = 5mm,
        size = (450, 350),
        hover = 1:n,
        legend = :topright)

In [None]:
R1 = [345, 240, 143]
R2 = [181, 363, 33]
R3 = [170, 57, 342]
R4 = [44, 105, 385]

In [None]:
# plot with regions
gr()

# load PCA
y = load("analysis/cancer/dim_1/PCA.jld2")["transformed"]

markersize = 3
legendfontsize = 5
n = size(y, 2)
annotated = vcat(R1, R2, R3, R4)
nonannotated = [i for i = 1:size(y, 2) if i ∉ annotated]
p = scatter(y[1,nonannotated], y[2,nonannotated], 
        markercolor = "lightgray",
        alpha = 0.6,
        markersize = markersize, 
        markerstrokewidth = 0.1,
        label = "", 
        xaxis = "PC1 (87%)",
        yaxis = "PC2 (9%)",
        #xtickfontsize = 15,
        #ytickfontsize = 15,
        #xrotation = 45,
        xticks = (0, 0),
        yticks = (0,0),
        guidefontsize = 7,
        framestyle = :box,
        size = (200, 150),
        #leftmargin = 2mm,
        legend = :topleft
        #background_color=:transparent, foreground_color=:black,
        )
scatter!(y[1,R1], y[2,R1], label = "", markersize = markersize, markershape = :rect, markercolor = c1, labels = "R1", legendfontsize = legendfontsize)
scatter!(y[1,R2], y[2,R2], label = "", markersize = markersize, markershape = :utriangle, markercolor = c2, labels = "R2")
scatter!(y[1,R3], y[2,R3], label = "", markersize = markersize, markershape = :star, markercolor = c3, labels = "R3")
scatter!(y[1,R4], y[2,R4], label = "", markersize = markersize, markershape = :diamond, markercolor = c4, labels = "R4")
savefig("analysis/cancer/dim_1/pca.pdf")
plot(p)

In [None]:
# plot ECM & PSRH images
regions = Dict(1 => R1, 2=> R2, 3=> R3, 4=> R4)
gr()
plot_array = []
n = length(regions)
for i=1:n
    R = regions[i]
    for idx in R
        f = idx_files[idx]
        # get cells
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * idx_files[idx] * ".csv")
        
        # plot cancer
        df_cell = df[df.class .== "cancer", :]
        p = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                        background_color=:transparent, foreground_color=:black, 
                         c = c_cancer,
                        right_margin = -5mm) 
        
        push!(plot_array, p)
        
        
        p_PSRH = Images.load("data/4000x4000_combined/PSRH/" * f * ".tif")
        push!(plot_array, plot(p_PSRH, ticks = [], frame = :box, bottom_margin = -3mm, right_margin = 5mm))
        
    end
end

In [None]:
p = plot(plot_array..., layout = grid(4, 6), size = (250 * 6, 250 * 4))
savefig("analysis/cancer/dim_1/PCA_examples.png")

## PCA coordinates

In [None]:
for i = 1:4
    min_indices, max_indices = get_coordinate_min_max_examples(transformed, i)
    plot_low_high_PC_cancer_PSRH(min_indices, max_indices, idx_files, "analysis/cancer/dim_1/PCA_coordinates/PC" * string(i) * "_examples.png")
end

Plot example ROIs with high and low PC with "close to 0" coordinates for other PC

In [None]:
plot_array = []
for i = 1:4
    p = histogram(transformed[i,:], label = "", title = "PC " * string(i))
    push!(plot_array, p)
end
plot(plot_array..., layout = grid(2,2), size = (800, 500))

In [None]:
thresholds = [0.002, 0.0003, 0.00005, 0.00009]
small_1, large_1 = get_small_large_coordinate_examples(transformed, 1, thresholds; n_coordinates = 4 )
small_2, large_2 = get_small_large_coordinate_examples(transformed, 2, thresholds; n_coordinates = 4 )
small_3, large_3 = get_small_large_coordinate_examples(transformed, 3, thresholds; n_coordinates = 4 )
small_4, large_4 = get_small_large_coordinate_examples(transformed, 4, thresholds; n_coordinates = 4 );

In [None]:
plot_low_high_PC_cancer_PSRH(small_1, large_1, idx_files, "analysis/cancer/dim_1/PCA_coordinates/PC1_examples_controlled.png")
plot_low_high_PC_cancer_PSRH(small_2, large_2, idx_files, "analysis/cancer/dim_1/PCA_coordinates/PC2_examples_controlled.png")
plot_low_high_PC_cancer_PSRH(small_3, large_3, idx_files, "analysis/cancer/dim_1/PCA_coordinates/PC3_examples_controlled.png")
plot_low_high_PC_cancer_PSRH(small_4, large_4, idx_files, "analysis/cancer/dim_1/PCA_coordinates/PC4_examples_controlled.png")

# Cancer, combined dimension 0 and 1

In [None]:
# select dictionaries
dicts = [PI0_cancer,
        PI1_cancer
        ];

# get keys that are present in all dictionaries
all_keys = []
for k in keys(dicts[1])
    present = 0
    for j = 2:length(dicts)
        if k in keys(dicts[j])
            present += 1
        end
    end

    if present == length(dicts) - 1
        push!(all_keys, k)
    end
end

# combine all features
features = Dict()
for f in all_keys
     combined = vcat(
                    PI0_cancer[f], 
                    vec(PI1_cancer[f])
                    )
     features[f] = combined
    
end

In [None]:
ROIs = collect(keys(features))
idx_ROI = Dict(i => roi for (i, roi) in enumerate(ROIs));

#save("analysis/cancer/dim_01_combined/idx_ROI.jld2", "idx_ROI", idx_ROI)
#idx_ROI = load("analysis/cancer/dim_01_combined/idx_ROI.jld2")["idx_ROI"];
#ROI_idx = Dict(v => k for (k,v) in idx_ROI);

In [None]:
# prepare features array
n = length(ROIs)
features_array = hcat([features[idx_ROI[i]] for i = 1:n]...)
println("features array shape: ", size(features_array))

features_centered = features_array .- mean(features_array, dims = 2);

In [None]:
embedding = umap(features_centered, 2; n_neighbors = 5);

In [None]:
writedlm("analysis/cancer/dim_01_combined/umap.csv", embedding, ",")
#y = embedding = readdlm("analysis/cancer/dim_01_combined/umap.csv", ',');

In [None]:
n = size(embedding, 2)
p = scatter(embedding[1,:], embedding[2,:], 
        markercolor = "slategrey",
        markersize = 5, 
        label = "", 
        xticks = [], 
        yticks = [], 
        framestyle = :box,  
        xlabel = "UMAP-1",
        ylabel = "UMAP-2",
        guidefontsize = 15,
        leftmargin = 5mm,
        size = (450, 350),
        hover = 1:n,
        legend = :topright)

# Leukocyte, dimension 0

In [None]:
# create indices
#files = collect(keys(PI0_leukocytes))
#idx_files = Dict(i => files[i] for i=1:length(files));

#save("analysis/leukocytes/leukocytes_PI0_idx_files.jld2", "idx_files", idx_files)
idx_files = load("analysis/leukocytes/leukocytes_PI0_idx_files.jld2")["idx_files"];
file_idx = Dict(v => k for (k,v) in idx_files);

In [None]:
PI0_new = Dict(i => PI0_leukocytes[idx_files[i]] for i = 1:length(file_idx));

## UMAP

In [None]:
Random.seed!(10)
PI_centered = center_PI(PI0_new);
embedding = umap(PI_centered, 2; n_neighbors = 5);

In [None]:
writedlm("analysis/leukocytes/dim_0/umap.csv", embedding, ",")
y = embedding = readdlm("analysis/leukocytes/dim_0/umap.csv", ',');

In [None]:
gr()
n = size(embedding, 2)
p = scatter(embedding[1,:], embedding[2,:], 
        markercolor = "slategrey",
        markersize = 5, 
        label = "", 
        xticks = [], 
        yticks = [], 
        framestyle = :box,  
        xlabel = "UMAP-1",
        ylabel = "UMAP-2",
        guidefontsize = 15,
        leftmargin = 5mm,
        size = (450, 350),
        hover = 1:n,
        legend = :topright)
savefig("analysis/leukocytes/dim_0/umap.pdf")
plot(p)

The following is from old PI size (100 x 100)

In [None]:
plotly()
n = size(embedding, 2)
p = scatter(embedding[1,:], embedding[2,:], 
        markercolor = "slategrey",
        markersize = 5, 
        label = "", 
        xticks = [], 
        yticks = [], 
        framestyle = :box,  
        xlabel = "UMAP-1",
        ylabel = "UMAP-2",
        guidefontsize = 15,
        leftmargin = 5mm,
        size = (450, 350),
        hover = 1:n,
        legend = :topright)

In [None]:
U1 = [95, 137, 254]
U2 = [329, 370, 347]
U3 = [205, 392, 175]
U4 = [269, 355, 363]
U5 = [14, 393, 400]

In [None]:
# plot with regions
gr()

# load PCA
y = readdlm("analysis/leukocytes/dim_0/umap.csv", ',');

markersize = 3
legendfontsize = 5
n = size(y, 2)
annotated = vcat(U1, U2, U3)
nonannotated = [i for i = 1:size(y, 2) if i ∉ annotated]
p = scatter(y[1,nonannotated], y[2,nonannotated], 
        markercolor = "lightgray",
        alpha = 0.6,
        markersize = markersize, 
        markerstrokewidth = 0.1,
        label = "", 
        xaxis = "UMAP-1",
        yaxis = "UMAP-2",
        ticks = [],
        guidefontsize = 7,
        framestyle = :box,
        size = (200, 150),
        legend = :bottomleft,
        )
scatter!(y[1,U1], y[2,U1], markersize = markersize, markershape = :rect, markercolor = c1, labels = "U1", legendfontsize = legendfontsize)
scatter!(y[1,U2], y[2,U2], markersize = markersize, markershape = :utriangle, markercolor = c2, labels = "U2")
scatter!(y[1,U3], y[2,U3], markersize = markersize, markershape = :star, markercolor = c3, labels = "U3")
scatter!(y[1,U4], y[2,U4], markersize = markersize, markershape = :diamond, markercolor = c4, labels = "U4")
scatter!(y[1,U5], y[2,U5], markersize = markersize, markershape = :pentagon, markercolor = c5, labels = "U5")
#scatter!(y[1,U6], y[2,U6], markersize = markersize, markershape = :dtriangle, markercolor = c6, labels = "U6")
savefig("analysis/leukocytes/dim_0/umap.pdf")
plot(p)

In [None]:
# plot ECM & PSRH images
regions = Dict(1 => U1, 2=> U2, 3=> U3, 4 => U4, 5 => U5)
gr()
plot_array = []
n = length(regions)
for i=1:n
    R = regions[i]
    for idx in R
        f = idx_files[idx]
        # get cells
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * idx_files[idx] * ".csv")
        
        # plot cancer
        df_cell = df[df.class .== "leukocytes", :]
        p = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                        background_color=:transparent, foreground_color=:black, 
                         c = c_leukocytes,
                        right_margin = -5mm) 
        
        push!(plot_array, p)
        
        
        
        p_PSRH = Images.load("data/4000x4000_combined/PSRH/" * f * ".tif")
        push!(plot_array, plot(p_PSRH, ticks = [], frame = :box, bottom_margin = -3mm, right_margin = 5mm))
        
    end
end

In [None]:
p = plot(plot_array..., layout = grid(5, 6), size = (250 * 6, 250 * 5))
savefig("analysis/leukocytes/dim_0/UMAP_examples.png")

## PCA

In [None]:
transformed, eigenvectors, variance_1, variance_2, _ = PI0_to_PCA(PI0_new; pratio = 0.99)


save("analysis/leukocytes/dim_0/PCA.jld2",
    "transformed", transformed,
    "eigenvectors", eigenvectors,
    "variance_1", variance_1,
    "variance_2", variance_2)

println("number of components: ", length(eigenvectors))
println("variance explained by 1 eigenvectors: ", variance_1)
println("variance explained by 2 eigenvectors: ", variance_2)
println("variance difference between 2 and 1:", variance_2 - variance_1)

In [None]:
# load
data = load("analysis/leukocytes/dim_0/PCA.jld2")
transformed = data["transformed"]
eigenvectors = data["eigenvectors"];

In [None]:
plotly()
y = transformed
n = size(y, 2)
p = scatter(y[1,:], y[2,:], 
        markercolor = "slategrey",
        markersize = 5, 
        label = "", 
        xticks = (0,0), 
        yticks = (0,0), 
        framestyle = :box,  
        guidefontsize = 15,
        leftmargin = 5mm,
        size = (450, 350),
        hover = 1:n,
        legend = :topright)

In [None]:
R1 = [35, 295, 128]
R2 = [361, 311, 355]
R3 = [36, 96, 100]
R4 = [315, 42, 356]

In [None]:
# plot with regions
gr()

# load PCA
y = load("analysis/leukocytes/dim_0/PCA.jld2")["transformed"]

markersize = 3
legendfontsize = 5
n = size(y, 2)
annotated = vcat(R1, R2, R3, R4)
nonannotated = [i for i = 1:size(y, 2) if i ∉ annotated]
p = scatter(y[1,nonannotated], y[2,nonannotated], 
        markercolor = "lightgray",
        alpha = 0.6,
        markersize = markersize, 
        markerstrokewidth = 0.1,
        label = "", 
        xaxis = "PC1 (92%)",
        yaxis = "PC2 (7%)",
        xticks = (0, 0),
        yticks = (0,0),
        guidefontsize = 7,
        framestyle = :box,
        size = (200, 150),
        legend = :topright
        )
scatter!(y[1,R1], y[2,R1], label = "", markersize = markersize, markershape = :rect, markercolor = c1, labels = "R1", legendfontsize = legendfontsize)
scatter!(y[1,R2], y[2,R2], label = "", markersize = markersize, markershape = :utriangle, markercolor = c2, labels = "R2")
scatter!(y[1,R3], y[2,R3], label = "", markersize = markersize, markershape = :star, markercolor = c3, labels = "R3")
scatter!(y[1,R4], y[2,R4], label = "", markersize = markersize, markershape = :diamond, markercolor = c4, labels = "R4")
savefig("analysis/leukocytes/dim_0/pca.pdf")
plot(p)

In [None]:
# plot ECM & PSRH images
regions = Dict(1 => R1, 2=> R2, 3=> R3, 4=> R4)
gr()
plot_array = []
n = length(regions)
for i=1:n
    R = regions[i]
    for idx in R
        f = idx_files[idx]
        # get cells
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * idx_files[idx] * ".csv")
        
        # plot cancer
        df_cell = df[df.class .== "leukocytes", :]
        p = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                        background_color=:transparent, foreground_color=:black, 
                         c = c_leukocytes,
                        right_margin = -5mm) 
        
        push!(plot_array, p)
        
        
        p_PSRH = Images.load("data/4000x4000_combined/PSRH/" * f * ".tif")
        push!(plot_array, plot(p_PSRH, ticks = [], frame = :box, bottom_margin = -3mm, right_margin = 5mm))
        
    end
end

In [None]:
p = plot(plot_array..., layout = grid(4, 6), size = (250 * 6, 250 * 4))
savefig("analysis/leukocytes/dim_0/PCA_examples.png")

# Leukocyte, dim 1

In [None]:
# create indices
#files = collect(keys(PI1_leukocytes))
#idx_files = Dict(i => files[i] for i=1:length(files));

#save("analysis/leukocytes/leukocytes_PI1_idx_files.jld2", "idx_files", idx_files)
idx_files = load("analysis/leukocytes/leukocytes_PI1_idx_files.jld2")["idx_files"];
file_idx = Dict(v => k for (k,v) in idx_files);

PI1_new = Dict(i => PI1_leukocytes[idx_files[i]] for i = 1:length(file_idx));

## UMAP

In [None]:
Random.seed!(10)
PI_centered = center_PI(PI1_new);
embedding = umap(PI_centered, 2; n_neighbors = 5);

In [None]:
writedlm("analysis/leukocytes/dim_1/umap.csv", embedding, ",")
y = embedding = readdlm("analysis/leukocytes/dim_1/umap.csv", ',');

In [None]:
gr()
n = size(embedding, 2)
p = scatter(embedding[1,:], embedding[2,:], 
        markercolor = "slategrey",
        markersize = 5, 
        label = "", 
        xticks = [], 
        yticks = [], 
        framestyle = :box,  
        xlabel = "UMAP-1",
        ylabel = "UMAP-2",
        guidefontsize = 15,
        leftmargin = 5mm,
        size = (450, 350),
        hover = 1:n,
        legend = :topright)
savefig("analysis/leukocytes/dim_1/umap.pdf")
plot(p)

The following plots are from old PI size (100 x 100)

In [None]:
plotly()
n = size(embedding, 2)
p = scatter(embedding[1,:], embedding[2,:], 
        markercolor = "slategrey",
        markersize = 5, 
        label = "", 
        xticks = [], 
        yticks = [], 
        framestyle = :box,  
        xlabel = "UMAP-1",
        ylabel = "UMAP-2",
        guidefontsize = 15,
        leftmargin = 5mm,
        size = (450, 350),
        hover = 1:n,
        legend = :topright)

In [None]:
U1 = [389, 144, 231]
U2 = [316, 348, 372]
U3 = [103, 183, 396]
U4 = [305, 395, 150]

In [None]:
# plot with regions
gr()

# load PCA
y = readdlm("analysis/leukocytes/dim_1/umap.csv", ',');

markersize = 3
legendfontsize = 5
n = size(y, 2)
annotated = vcat(U1, U2, U3, U4)
nonannotated = [i for i = 1:size(y, 2) if i ∉ annotated]
p = scatter(y[1,nonannotated], y[2,nonannotated], 
        markercolor = "lightgray",
        alpha = 0.6,
        markersize = markersize, 
        markerstrokewidth = 0.1,
        label = "", 
        xaxis = "UMAP-1",
        yaxis = "UMAP-2",
        ticks = [],
       
        guidefontsize = 7,
        framestyle = :box,
        size = (200, 150),
        legend = :topright,
        )
scatter!(y[1,U1], y[2,U1], markersize = markersize, markershape = :rect, markercolor = c1, labels = "U1", legendfontsize = legendfontsize)
scatter!(y[1,U2], y[2,U2], markersize = markersize, markershape = :utriangle, markercolor = c2, labels = "U2")
scatter!(y[1,U3], y[2,U3], markersize = markersize, markershape = :star, markercolor = c3, labels = "U3")
scatter!(y[1,U4], y[2,U4], markersize = markersize, markershape = :diamond, markercolor = c4, labels = "U4")
savefig("analysis/leukocytes/dim_1/umap.pdf")
plot(p)

In [None]:
# plot ECM & PSRH images
regions = Dict(1 => U1, 2=> U2, 3=> U3, 4=> U4
)
gr()
plot_array = []
n = length(regions)
for i=1:n
    R = regions[i]
    for idx in R
        f = idx_files[idx]
        # get cells
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * idx_files[idx] * ".csv")
        
        # plot cancer
        df_cell = df[df.class .== "leukocytes", :]
        p = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                        background_color=:transparent, foreground_color=:black, 
                         c = c_leukocytes,
                         right_margin = -5mm) 
        
        push!(plot_array, p)
        
        
        
        p_PSRH = Images.load("data/4000x4000_combined/PSRH/" * f * ".tif")
        push!(plot_array, plot(p_PSRH, ticks = [], frame = :box, bottom_margin = -3mm, right_margin = 5mm))
        
    end
end

In [None]:
p = plot(plot_array..., layout = grid(4, 6), size = (250 * 6, 250 * 4))
savefig("analysis/leukocytes/dim_1/UMAP_examples.png")

## PCA

In [None]:
transformed, eigenvectors, variance_1, variance_2, _ = PI_to_PCA(PI1_new; pratio = 0.99)


#save("analysis/leukocytes/dim_1/PCA.jld2",
#    "transformed", transformed,
#    "eigenvectors", eigenvectors,
#    "variance_1", variance_1,
#    "variance_2", variance_2)

println("number of components: ", length(eigenvectors))
println("variance explained by 1 eigenvectors: ", variance_1)
println("variance explained by 2 eigenvectors: ", variance_2)
println("variance difference between 2 and 1:", variance_2 - variance_1)

In [None]:
# load
data = load("analysis/leukocytes/dim_1/PCA.jld2")
transformed = data["transformed"]
eigenvectors = data["eigenvectors"];

In [None]:
plotly()
y = transformed
n = size(y, 2)
p = scatter(y[1,:], y[2,:], 
        markercolor = "slategrey",
        markersize = 5, 
        label = "", 
        xticks = (0,0), 
        yticks = (0,0), 
        framestyle = :box,  
        guidefontsize = 15,
        leftmargin = 5mm,
        size = (450, 350),
        hover = 1:n,
        legend = :topright)

In [None]:
R1 = [160, 400, 125]
R2 = [128, 389, 35]
R3 = [239, 182, 102]
R4 = [83, 259, 127]

In [None]:
# plot with regions
gr()

# load PCA
y = load("analysis/leukocytes/dim_1/PCA.jld2")["transformed"]

markersize = 3
legendfontsize = 5
n = size(y, 2)
annotated = vcat(R1, R2, R3, R4)
nonannotated = [i for i = 1:size(y, 2) if i ∉ annotated]
p = scatter(y[1,nonannotated], y[2,nonannotated], 
        markercolor = "lightgray",
        alpha = 0.6,
        markersize = markersize, 
        markerstrokewidth = 0.1,
        label = "", 
        xaxis = "PC1 (75%)",
        yaxis = "PC2 (14%)",
        xticks = (0, 0),
        yticks = (0,0),
        guidefontsize = 7,
        framestyle = :box,
        size = (200, 150),
        legend = :topleft
        )
scatter!(y[1,R1], y[2,R1], label = "", markersize = markersize, markershape = :rect, markercolor = c1, labels = "R1", legendfontsize = legendfontsize)
scatter!(y[1,R2], y[2,R2], label = "", markersize = markersize, markershape = :utriangle, markercolor = c2, labels = "R2")
scatter!(y[1,R3], y[2,R3], label = "", markersize = markersize, markershape = :star, markercolor = c3, labels = "R3")
scatter!(y[1,R4], y[2,R4], label = "", markersize = markersize, markershape = :diamond, markercolor = c4, labels = "R4")
savefig("analysis/leukocytes/dim_1/pca.pdf")
plot(p)

In [None]:
# plot ECM & PSRH images
regions = Dict(1 => R1, 2=> R2, 3=> R3, 4=> R4)
gr()
plot_array = []
n = length(regions)
for i=1:n
    R = regions[i]
    for idx in R
        f = idx_files[idx]
        # get cells
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * idx_files[idx] * ".csv")
        
        # plot cancer
        df_cell = df[df.class .== "leukocytes", :]
        p = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                        background_color=:transparent, foreground_color=:black, 
                         c = c_leukocytes,
                        right_margin = -5mm) 
        
        push!(plot_array, p)
        
        
        p_PSRH = Images.load("data/4000x4000_combined/PSRH/" * f * ".tif")
        push!(plot_array, plot(p_PSRH, ticks = [], frame = :box, bottom_margin = -3mm, right_margin = 5mm))
        
    end
end

In [None]:
p = plot(plot_array..., layout = grid(4, 6), size = (250 * 6, 250 * 4))
savefig("analysis/leukocytes/dim_1/PCA_examples.png")

## Plot example ROIs with high and low PC

In [None]:
for i = 1:4
    min_indices, max_indices = get_coordinate_min_max_examples(transformed, i)
    plot_low_high_PC_leukocytes_PSRH(min_indices, max_indices, idx_files, "analysis/leukocytes/dim_1/PCA_coordinates/PC" * string(i) * "_examples.png")
end

Plot example ROIs with high and low PC with "close to 0" coordinates for other PC

In [None]:
plot_array = []
for i = 1:4
    p = histogram(transformed[i,:], label = "", title = "PC " * string(i))
    push!(plot_array, p)
end
plot(plot_array..., layout = grid(2,2), size = (800, 500))

In [None]:
thresholds = [0.005, 0.001, 0.001, 0.0001]
small_1, large_1 = get_small_large_coordinate_examples(transformed, 1, thresholds; n_coordinates = 4 )
small_2, large_2 = get_small_large_coordinate_examples(transformed, 2, thresholds; n_coordinates = 4 )
small_3, large_3 = get_small_large_coordinate_examples(transformed, 3, thresholds; n_coordinates = 4 )
small_4, large_4 = get_small_large_coordinate_examples(transformed, 4, thresholds; n_coordinates = 4 );

In [None]:
plot_low_high_PC_leukocytes_PSRH(small_1, large_1, idx_files, "analysis/leukocytes/dim_1/PCA_coordinates/PC1_examples_controlled.png")
plot_low_high_PC_leukocytes_PSRH(small_2, large_2, idx_files, "analysis/leukocytes/dim_1/PCA_coordinates/PC2_examples_controlled.png")
plot_low_high_PC_leukocytes_PSRH(small_3, large_3, idx_files, "analysis/leukocytes/dim_1/PCA_coordinates/PC3_examples_controlled.png")
plot_low_high_PC_leukocytes_PSRH(small_4, large_4, idx_files, "analysis/leukocytes/dim_1/PCA_coordinates/PC4_examples_controlled.png")

# Leukocyte, combined dimension 0 and 1 

In [None]:
# load PD
PD = load("data/4000x4000_combined/cells_PD/PD.jld2")
PD0_leukocytes = PD["PD0_leukocytes"]
PD1_leukocytes = PD["PD1_leukocytes"]

# compute PI (with smaller size)
PI0_leukocytes = compute_PI2(PD0_leukocytes)
PI1_leukocytes = compute_PI2(PD1_leukocytes)

# load PD_max
PD_max = load("data/4000x4000_combined/cells_PD/PD_max.jld2")
leukocytes0_max = PD_max["leukocytes0_max"]
leukocytes1_max = PD_max["leukocytes1_max"];

In [None]:
# select dictionaries
dicts = [PI0_leukocytes,
        PI1_leukocytes
        ];

# get keys that are present in all dictionaries
all_keys = []
for k in keys(dicts[1])
    present = 0
    for j = 2:length(dicts)
        if k in keys(dicts[j])
            present += 1
        end
    end

    if present == length(dicts) - 1
        push!(all_keys, k)
    end
end

# combine all features
features = Dict()
for f in all_keys
     combined = vcat(
                    PI0_leukocytes[f], 
                    vec(PI1_leukocytes[f])
                    )
     features[f] = combined
end

In [None]:
#ROIs = collect(keys(features))
#idx_ROI = Dict(i => roi for (i, roi) in enumerate(ROIs));

#save("analysis/leukocytes/dim_01_combined/idx_ROI.jld2", "idx_ROI", idx_ROI)
idx_ROI = load("analysis/leukocytes/dim_01_combined/idx_ROI.jld2")["idx_ROI"];
ROI_idx = Dict(v => k for (k,v) in idx_ROI);

In [None]:
# prepare features array
n = length(idx_ROI)
features_array = hcat([features[idx_ROI[i]] for i = 1:n]...)
println("features array shape: ", size(features_array))

features_centered = features_array .- mean(features_array, dims = 2);

In [None]:
embedding = umap(features_centered, 2; n_neighbors = 5);

In [None]:
writedlm("analysis/leukocytes/dim_01_combined/umap.csv", embedding, ",")
#y = embedding = readdlm("analysis/leukocytes/dim_01_combined/umap.csv", ',');

In [None]:
gr()
n = size(embedding, 2)
p = scatter(embedding[1,:], embedding[2,:], 
        markercolor = "slategrey",
        markersize = 5, 
        label = "", 
        xticks = [], 
        yticks = [], 
        framestyle = :box,  
        xlabel = "UMAP-1",
        ylabel = "UMAP-2",
        guidefontsize = 15,
        leftmargin = 5mm,
        size = (450, 350),
        #hover = 1:n,
        legend = :topright)
#savefig("analysis/leukocytes/dim_01_combined/umap.svg")

# Combination - cancer, leukocytes, dim 0, dim 1

In [None]:
# load cancer & leukocyte features
PI = load("data/4000x4000_combined/cells_PD/PI.jld2")
cancer_PI0 = PI["PI0_cancer"]
cancer_PI1 = PI["PI1_cancer"]
leukocytes_PI0 = PI["PI0_leukocytes"]
leukocytes_PI1 = PI["PI1_leukocytes"];

In [None]:
# select dictionaries
dicts = [cancer_PI0,
        cancer_PI1,
        leukocytes_PI0,
        leukocytes_PI1,
        ];

# get keys that are present in all dictionaries
all_keys = []
for k in keys(dicts[1])
    present = 0
    for j = 2:length(dicts)
        if k in keys(dicts[j])
            present += 1
        end
    end

    if present == length(dicts) - 1
        push!(all_keys, k)
    end
end

# combine all features
features = Dict()
for f in all_keys
     combined = vcat(
                    cancer_PI0[f], 
                    leukocytes_PI0[f], 
                    vec(cancer_PI1[f]),
                    vec(leukocytes_PI1[f])
                    )
     features[f] = combined
    
end

In [None]:
ROIs = collect(keys(features))
idx_ROI = Dict(i => roi for (i, roi) in enumerate(ROIs));

In [None]:
#save("analysis/combined_cancer_leukocytes/idx_ROI.jld2", "idx_ROI", idx_ROI)
idx_files = load("analysis/combined_cancer_leukocytes/idx_ROI.jld2")["idx_ROI"];
file_idx = Dict(v => k for (k,v) in idx_files);

In [None]:
# prepare features array
n = length(idx_files)
features_array = hcat([features[idx_files[i]] for i = 1:n]...)
println("features array shape: ", size(features_array))

features_centered = features_array .- mean(features_array, dims = 2);

In [None]:
# save features_array
#save("analysis/combined_cancer_leukocytes/features.jld2", "features", features_array)
features_array = load("analysis/combined_cancer_leukocytes/features.jld2")["features"];

## UMAP

In [None]:
embedding = umap(features_centered, 2; n_neighbors = 5);

In [None]:
#writedlm("analysis/combined_cancer_leukocytes/umap.csv", embedding, ",")
y = embedding = readdlm("analysis/combined_cancer_leukocytes/umap.csv", ',');

In [None]:
gr()
n = size(embedding, 2)
p = scatter(embedding[1,:], embedding[2,:], 
        markercolor = "slategrey",
        markersize = 5, 
        label = "", 
        xticks = [], 
        yticks = [], 
        framestyle = :box,  
        xlabel = "UMAP-1",
        ylabel = "UMAP-2",
        guidefontsize = 15,
        leftmargin = 5mm,
        size = (450, 350),
        hover = 1:n,
        legend = :topright)
savefig("analysis/combined_cancer_leukocytes/umap.svg")

In [None]:
U1 = [384, 281, 24]
U2 = [231, 288, 254]
U3 = [189, 385, 347]
U4 = [357, 115, 308]
U5 = [133, 299, 170]
U6 = [171, 87, 215]
#U6 = [54, 123, 131]
U7 = [150, 101, 274]
U8 = [237, 346, 275]
U9 = [341, 370, 152]


In [None]:

gr()

n = size(embedding, 2)

markersize = 3
legendfontsize = 5
n = size(y, 2)
annotated = vcat(U1, U2, U3, U4, U5, U6, U7, U8)
nonannotated = [i for i = 1:size(y, 2) if i ∉ annotated]
p = scatter(y[1,nonannotated], y[2,nonannotated], 
        markercolor = "lightgray",
        alpha = 0.6,
        markersize = markersize, 
        markerstrokewidth = 3,
        label = "", 
        xaxis = "UMAP-1",
        yaxis = "UMAP-2",
        ticks = [],
        guidefontsize = 7,
        framestyle = :box,
        size = (200, 150),
        legend = :bottomleft,
        background_color=:transparent, foreground_color=:black,
        )
scatter!(y[1,U1], y[2,U1], markersize = markersize, markershape = :rect, markercolor = c1, labels = "U1", legendfontsize = legendfontsize)
scatter!(y[1,U2], y[2,U2], markersize = markersize, markershape = :utriangle, markercolor = c2, labels = "U2")
scatter!(y[1,U3], y[2,U3], markersize = markersize, markershape = :star, markercolor = c3, labels = "U3")
scatter!(y[1,U4], y[2,U4], markersize = markersize, markershape = :pentagon, markercolor = c4, labels = "U4")
scatter!(y[1,U5], y[2,U5], markersize = markersize, markershape = :diamond, markercolor = c5, labels = "U5")
scatter!(y[1,U6], y[2,U6], markersize = markersize, markershape = :dtriangle, markercolor = c6, labels = "U6")
scatter!(y[1,U7], y[2,U7], markersize = markersize, markershape = :star8, markercolor = c7, labels = "U7")
scatter!(y[1,U8], y[2,U8], markersize = markersize, markershape = :octagon, markercolor = c8, labels = "U8")
scatter!(y[1,U9], y[2,U9], markersize = markersize, markershape = :star4, markercolor = c9, labels = "U9")
savefig("analysis/combined_cancer_leukocytes/umap.svg")
plot(p)

In [None]:
# plot cancer, leukocytes, & PSRH images
regions = Dict(1 => U1, 2=> U2, 3=> U3, 4=> U4, 5 => U5, 6 => U6, 7 => U7, 8 => U8, 9 => U9)
gr()
plot_array = []
n = length(regions)
for i=1:n
    R = regions[i]
    for idx in R
        f = idx_files[idx]
        
        # get cells
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * idx_files[idx] * ".csv")
        
        # plot cancer
        df_cell = df[df.class .== "cancer", :]
        p = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                        background_color=:transparent, foreground_color=:black, 
                         c = c_cancer,
                        right_margin = -5mm) 
        push!(plot_array, p)
        
        # plot leukocytes
        df_cell = df[df.class .== "leukocytes", :]
        p = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                        background_color=:transparent, foreground_color=:black, 
                         c = c_leukocytes,
                        right_margin = -5mm) 
        push!(plot_array, p)
        
        # plot PSRH
        p_PSRH = Images.load("data/4000x4000_combined/PSRH/" * f * ".tif")  
        push!(plot_array, plot(p_PSRH, ticks = [], frame = :box, bottom_margin = -3mm, right_margin = 5mm))
    end
end


In [None]:
p = plot(plot_array..., layout = grid(n, 3 * 3), size = (250 * 3 * 3, 250 * n))
savefig("analysis/combined_cancer_leukocytes/UMAP_examples.png")

## PCA

In [None]:
# prepare features array
n = length(idx_files)
#features_array = hcat([features[idx_files[i]] for i = 1:n]...)
#println("features array shape: ", size(features_array))
features_array = load("analysis/combined_cancer_leukocytes/features.jld2")["features"];

features_centered = features_array .- mean(features_array, dims = 2);

In [None]:
# variance explained with 1 component
M = fit(PCA, features_centered, maxoutdim = 1)
transformed = MultivariateStats.transform(M, features_centered)
variance_1 = principalratio(M)

# variance explained with 2 components
M = fit(PCA, features_centered, maxoutdim = 2)
transformed = MultivariateStats.transform(M, features_centered)
variance_2 = principalratio(M)

# variance explained with 4 components
M = fit(PCA, features_centered, maxoutdim = 4)
transformed = MultivariateStats.transform(M, features_centered)
variance_4 = principalratio(M)

# perform PCA
M = fit(PCA, features_centered, pratio = 0.99)
transformed = MultivariateStats.transform(M, features_centered)

In [None]:
save("analysis/combined_cancer_leukocytes/PCA.jld2",
    "transformed", transformed,
    "variance_1", variance_1,
    "variance_2", variance_2)

println("variance explained by 1 eigenvectors: ", variance_1)
println("variance explained by 2 eigenvectors: ", variance_2)
println("variance difference between 2 and 1:", variance_2 - variance_1)

In [None]:
y = load("analysis/combined_cancer_leukocytes/PCA.jld2")["transformed"];

In [None]:
# save to csv for plotting
df_pca1 = convert(DataFrame, Array(Transpose(y)))
col_names = ["pca_coord_" * string(i) for i = 1:5];
idx_ROI_list = [idx_files[i] for i = 1:401];
rename!(df_pca1, col_names)
df_pca1[:, :idx_ROI] = idx_ROI_list;
#CSV.write("analysis/combined_cancer_leukocytes/PCA.csv", df_pca1)

In [None]:
plotly()
n = size(y, 2)

p = scatter(y[1,:], y[2,:], 
        markercolor = "slategrey",
        markersize = 5, 
        label = "", 
        xticks = [], 
        yticks = [], 
        framestyle = :box,  
        xlabel = "UMAP-1",
        ylabel = "UMAP-2",
        guidefontsize = 15,
        leftmargin = 5mm,
        size = (450, 350),
        hover = 1:n,
        legend = :topright)

In [None]:
R1 = [384, 211, 239]
R2 = [363, 15, 221]
R3 = [67, 298, 12]
R4 = [172, 143, 134]

In [None]:
for R in [R1, R2, R3, R4]
    println(string(R))
    for f in R
        println(idx_files[f])
    end
end

In [None]:
# plot with regions
gr()


markersize = 3
legendfontsize = 5
n = size(y, 2)
annotated = vcat(R1, R2, R3, R4)
nonannotated = [i for i = 1:size(y, 2) if i ∉ annotated]
p = scatter(y[1,nonannotated], y[2,nonannotated], 
        markercolor = "lightgray",
        alpha = 0.6,
        markersize = markersize, 
        markerstrokewidth = 1,
        label = "", 
        xaxis = "PC1 (60%)",
        yaxis = "PC2 (34%)",
        #xtickfontsize = 15,
        #ytickfontsize = 15,
        #xrotation = 45,     
        xticks = (0, 0),
        yticks = (0,0),
        guidefontsize = 7,
        framestyle = :box,
        size = (200, 150),
        #leftmargin = 2mm,
        legend = :bottomleft,
        #background_color=:transparent, foreground_color=:black,
        )
scatter!(y[1,R1], y[2,R1], label = "", markersize = markersize, markercolor = c1, labels = "R1", legendfontsize = legendfontsize)
scatter!(y[1,R2], y[2,R2], label = "", markersize = markersize, markercolor = c2, labels = "R2")
scatter!(y[1,R3], y[2,R3], label = "", markersize = markersize, markercolor = c3, labels = "R3")
scatter!(y[1,R4], y[2,R4], label = "", markersize = markersize, markercolor = c4, labels = "R4")
#savefig("analysis/combined_cancer_leukocytes/pca.svg")
plot(p)

In [None]:
# plot cancer, leukocytes, & PSRH images
regions = Dict(1 => R1, 2=> R2, 3=> R3, 4=> R4)
gr()
plot_array = []
n = length(regions)
for i=1:n
    R = regions[i]
    for idx in R
        f = idx_files[idx]
        
        # get cells
        df = CSV.read("data/4000x4000_combined/subregion_cells/" * idx_files[idx] * ".csv")
        
        # plot cancer
        df_cell = df[df.class .== "cancer", :]
        p = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                        background_color=:transparent, foreground_color=:black, 
                         c = c_cancer,
                        right_margin = -5mm) 
        push!(plot_array, p)
        
        # plot leukocytes
        df_cell = df[df.class .== "leukocytes", :]
        p = scatter(df_cell.x, df_cell.y,
                         markersize = 1.5,
                         yflip = true,
                         label = "",
                         markerstrokewidth = 0.2,
                         frame = :box,
                         ticks = [],
                        aspect_ratio = :equal,
                        size = (150,150),
                        background_color=:transparent, foreground_color=:black, 
                         c = c_leukocytes,
                        right_margin = -5mm) 
        push!(plot_array, p)
        
        # plot PSRH
        p_PSRH = Images.load("data/4000x4000_combined/PSRH/" * f * ".tif")  
        push!(plot_array, plot(p_PSRH, ticks = [], frame = :box, bottom_margin = -3mm, right_margin = 5mm))
    end
end


In [None]:
p = plot(plot_array..., layout = grid(n, 3 * 3), size = (250 * 3 * 3, 250 * n))
savefig("analysis/combined_cancer_leukocytes/PCA_examples.png")