# Image resolution sensitivity analysis

In [2]:
include("../src/ECM_TDA.jl")
using .ECM_TDA

using Images
using CSV
using Plots
using JLD2
using Random
using StatsBase
using Distributions
using DataFrames
using PersistenceDiagrams
using LinearAlgebra
using Measures
using StatsPlots

In [3]:
c_ECM = "#259ea1" 

"#259ea1"

## Comparison: PI difference between two ROIs
* To get an idea of what the PI difference means, we compute the difference in PI between two ROIs:
* ROI1: LTX050_Da316_idx2
* ROI2: LTX001_Da232_idx1

In [None]:
# load PD
PD = load("../main_analysis/data/4000x4000_combined/ECM_PD/PD.jld2")
PD0 = PD["PD0_ECM"]
PD1 = PD["PD1_ECM"]

#recompute coarser PI
PH0_dict = Dict(k => ECM_TDA.array_to_ripsererPD(v) for (k,v) in PD0 if v != nothing);
PH1_dict = Dict(k => ECM_TDA.array_to_ripsererPD(v) for (k,v) in PD1 if v != nothing);

PI0 = PersistenceImage([PH0_dict[k] for k in keys(PH0_dict)], sigma=50, size = 20)
PI1 = PersistenceImage([PH1_dict[k] for k in keys(PH1_dict)], sigma=50, size = 20)


ECM_PI0_original = Dict()
for i in keys(PH0_dict)
    ECM_PI0_original[i] = PI0(PH0_dict[i])
end

ECM_PI1_original = Dict()
for i in keys(PH1_dict)
    ECM_PI1_original[i] = PI1(PH1_dict[i])
end


In [None]:
ex1 = "LTX001_Da232_idx1"
ex2 = "LTX050_Da316_idx2"

pi1 = ECM_PI1_original["LTX001_Da232_idx1"]
pi2 = ECM_PI1_original["LTX050_Da316_idx2"];

# load PD
original_PD1 = load("../main_analysis/data/4000x4000_combined/ECM_PD/PD.jld2")["PD1_ECM"]

ex1_pd1 = original_PD1[ex1]
ex2_pd1 = original_PD1[ex2];

In [None]:
# print the difference in PI between the two ROIs 
norm(pi1-pi2)

# Run analysis for 100 randomly selected ECM images
1. Compute the distancees for each factor
2. Compute the average distance from different samples (as comparison)

In [None]:
# get all ROIs
dir = "../main_analysis/data/4000x4000_combined/ECM_sampled/points_CSV/"
csv_files = [item for item in walkdir(dir)][1][3:end][1]

# randomly select 100 ROIs
ROIs = sample(csv_files, 100; replace = false);

# parameters for sampling points from ECM image
low = 0.00063768
high = 0.4025769
min_sample = 100
max_sample = 5000

# directories
ECM_directory = "../main_analysis/data/4000x4000_combined/subregion_ECM/"
sample_CSV_directory = "image_resolution_sensitivity/";

# factors to consider
factors = vcat([1], collect(4:4:16));

#save("100_ROIs.jld2", "ROI", ROIs)

In [10]:
# load the 100 ROIs
ROIs = load("100_ROIs.jld2")["ROI"];

In [None]:
# for each factor, create downsampled images & compute PD
for i = 1:100
    roi = ROIs[i]
    filename = roi[1:end-4]

    PD0_ECM = Dict()
    PD1_ECM = Dict()
    img = Array(Images.load(ECM_directory * filename * ".tif"))

    for f in factors
        # get downsampled images
        img_small = imresize(img, ratio=1/f)
        img_re = imresize(img_small, size(img_small).*f)

        # sample points
        img_mean_inv = 1- mean(Float64.(img_re))

        # compute number of points to sample
        n_sample = Int64(round(ECM_TDA.n_ECM_samples(img_mean_inv, low, high, min_sample, max_sample)))
        if n_sample != 0
            # sample points
            resampled, points, sampled, island_idx = sample_ECM_points(img_re, vicinity = 100, n_points = 5, n_samples = n_sample)

            # save sampled points to CSV
            df = DataFrame(resampled, [:x, :y])
            # CSV.write("sampled_ECM/" * filename *  "_factor_" * string(f) * ".csv", df)

            #df = CSV.read("sampled_ECM/"* filename * "_factor_" * string(f) * ".csv")
            PD0, PD1 = run_PH(df)

            PD0_ECM[f] = PD0;
            PD1_ECM[f] = PD1;
        else
            PD0_ECM[f] = nothing;
            PD1_ECM[f] = nothing;
        end
    end
    # save
    save("PD_PI/" * filename * "_PD.jld2", "PD0", PD0_ECM, "PD1", PD1_ECM)
end

In [36]:
# the PD here have factors as additional key
example_PD = load("PD_PI/LTX001_Da256_idx1_PD.jld2")
keys(example_PD["PD1"])

KeySet for a Dict{Any, Any} with 5 entries. Keys:
  4
  16
  12
  8
  1

Compute persistence images for all 100 ROIs and their factors

In [None]:
for roi in ROIs

    PD =  load("PD_PI/"* roi[1:end-4] * "_PD.jld2")
    PD0 = PD["PD0"]
    PD1 = PD["PD1"]
    # add the PD used in the default analysis
    PD0["default"] = load("../main_analysis/data/4000x4000_combined/ECM_PD/PD.jld2")["PD0_ECM"][roi[1:end-4]]
    PD1["default"] = load("../main_analysis/data/4000x4000_combined/ECM_PD/PD.jld2")["PD1_ECM"][roi[1:end-4]]

    # convert array to Ripserer PD
    PH0 = Dict(k => ECM_TDA.array_to_ripsererPD(v) for (k,v) in PD0 if v != nothing)
    PH1 = Dict(k => ECM_TDA.array_to_ripsererPD(v) for (k,v) in PD1 if v != nothing)

    if (PH0 != Dict{}()) & (PH1 != Dict{}())


        # compute PI (using the same parameters of PIs in the origin 400 ROIs)
        PI0 = PersistenceImage((PI0_ymin, PI0_ymax),(PI0_xmin, PI0_xmax), sigma= 50, size = (20,1))
        PI1 = PersistenceImage((PI1_ymin, PI1_ymax),(PI1_xmin, PI1_xmax), sigma= 50, size = (20,20))
        

        ECM_PI0 = Dict()
        for i in keys(PH0)
            ECM_PI0[i] = PI0(PH0[i])
        end 

        ECM_PI1 = Dict()
        for i in keys(PH1)
            ECM_PI1[i] = PI1(PH1[i])
        end

        save("PD_PI/" * roi[1:end-4] * "_PI.jld2", "PI0", ECM_PI0, "PI1", ECM_PI1)
    else
        save("PD_PI/" * roi[1:end-4] * "_PI.jld2", "PI0", nothing, "PI1", nothing)
    end


    # add the PD used in the default analysis
    
    # compute PI
    # if (PH0 != Dict{}()) & (PH1 != Dict{}())
    #     PI0 = ECM_TDA.compute_PI(PH0);
    #     PI1 = ECM_TDA.compute_PI(PH1);

    #     save(sample_CSV_directory * roi[1:end-4] * "_PI.jld2", 
    #         "PI0", PI0,
    #         "PI1", PI1)
    # else
    #     save(sample_CSV_directory * roi[1:end-4] * "_PI.jld2", 
    #         "PI0", nothing,
    #         "PI1", nothing)
    # end
end

compute the difference in persistence images
* Here, we compute the structure similarity (SSIM) and Frobenius norm between persistence images.
* In Supp Fig, we report the differences via Frobenius norm

In [None]:
df = DataFrame([[],[],[],[]], [:factor,:roi, :diff, :SSIM])
for f in factors
    for roi in ROIs
        if roi ∉ ["LTX097_Da113_idx2.csv"] # outlier
            PI1 = load("PD_PI/"* roi[1:end-4] * "_PI.jld2")["PI1"]
            if (PI1 != nothing)  
                if (f in keys(PI1))
                    ssim = assess_ssim(PI1["default"], PI1[f])
                    push!(df, [f,roi[1:end-4], norm(PI1["default"] - PI1[f]), ssim]) 
                end
            end
        end
    end
end

In [None]:
# factors to consider
factors = vcat([1], collect(4:4:128));

In [None]:
factors

In [None]:
# print the average difference for each factor 
means = [mean(df[df.factor .== f, :diff]) for f in factors]

In [None]:
gr()
df_sub = df[df[:factor] .< 60,:]
@df df_sub boxplot(:factor, :diff, legend=false, 
                #ylims = [0, 0.005], 
                size = (900, 300),
                frame = :box,
                leftmargin = 5mm,
                bottommargin = 5mm,
                xlabel = "1 / scale factor",
                ylabel = "persistence image difference",
                c = "lightgrey")
#plot!(factors, means, lw = 5, c = "#FF007F")
#savefig("sensitivity_plot.png")