# Plotting Betti curves

This notebook contains all the code used to create plots of Betti curves within the main text and supplement.

In [None]:
# Import packages

script_start_time = time()
println("\nimporting packages...")

using Pkg
using Statistics
using LinearAlgebra
using Eirene
using StatsBase
using JLD
using Plots
Pkg.add("ColorSchemes")
using ColorSchemes
using JSON

include("helper_functions.jl")
println("packages and functions imported")
printstyled("Elapsed time = $(time() - script_start_time) seconds \n \n", color = :yellow)

## Set main parameters

The following will extract parameters set within the configuration file.

In [None]:
# Colors
betti_colors = [["#233A4C"] ["#406372"] ["#66919C"] ["#99C2C6"]]

# Read from config file
config_file = "config101220.json"
config = read_config("$(homedir())/configs/$(config_file)")

# Parameters for all graphs
const NNODES = config["NNODES"]
const MAXDIM = config["MAXDIM"]
const NREPS = config["NREPS"]
const DATE_STRING = config["DATE_STRING"]


### Locate data and find nametags
read_dir = "../processed_data/results/$(NNODES)nodes"
betti_files = filter(x->occursin("_bettis",x), readdir(read_dir))
betti_files = filter(x->occursin(DATE_STRING, x), betti_files)
betti_files = filter(x->!occursin("dsi", x), betti_files)

println("Located the following graph files:")
for betti_file in betti_files
    println(betti_file)
end


# Extract the nametags - a nametage is the type of experiment/graph. For example "forward", "randomized", 
# "thresh_025_threshold" are all nametags.
nametags = []
for betti_file in betti_files
    println(betti_file)
    tag = split(split(betti_file, "$(DATE_STRING)_")[2], "_bettis")[1]
    nametags = [nametags; tag]
end

nametags = unique(nametags)

for nametag in nametags
    namefiles = filter(x->occursin(nametag,x), betti_files)

    println(nametag)
    println(length(namefiles))

end

nametags

## Plot Betti curves for each value of $\rho_T$ (for example, Fig. 3a)

The code below runs through all files (after an optional filtering step) and plots Bettis for all included files with a given nametag. For example, one could filter to only include $m$-clique and clique models and then would find one pdf for each $\rho_T$, one for "forward", one for "randomized", and so on.

Be sure to update the filters within the first few lines to avoid having dozens of plots within the same file!

In [None]:
# Plot Betti curves for each type of run (nametag)

###### ------ Optional filtering step ----- ########
# betti_files_main = filter(x -> occursin("Triangle",x), betti_files)
betti_files_main = filter(x -> !occursin("clique",x), betti_files_main)
betti_files_main = filter(x -> !occursin("Triangle",x), betti_files_main)

# Set colors
betti_colors = [["#243a4c"] ["#406372"] ["#66939e"] ["#9bc3c6"]]

# Loop over nametags and generate plots
bettisAllAvg = []
for (j,nametag) in enumerate(nametags)
    
    # Find betti_files with a given nametag
    betti_files_nametag = filter(x -> occursin("$(nametag)",x), betti_files_main)

    # Extract model names
    model_names = [split(betti_file_nametag,"_")[1] for betti_file_nametag in betti_files_nametag]
    
    nModels = length(betti_files_nametag)
    nEdges = binomial(NNODES, 2)
    bettisAll = zeros(NREPS, nEdges, MAXDIM, nModels)
    
    # Loop over files and create a large array for plotting
    for (i,betti_file_nametag) in enumerate(betti_files_nametag)

        # Read in Betti curves
        betti_dict = load("$(read_dir)/$(betti_file_nametag)")
        if maximum(betti_dict["bettisArray"]) == 0
            print("no bettis")
        end
        bettisAll[:,:,:, i] = betti_dict["bettisArray"]

    end

    # Calculate statistics
    bettisAllAvg = dropdims(mean(bettisAll, dims=1), dims=1)
    bettisAllStd_upper = dropdims(std(bettisAll, dims=1), dims=1)
    bettisAllStd_lower = copy(bettisAllStd_upper)
    bettisAllStd_lower[bettisAllStd_lower.>bettisAllAvg] .= bettisAllAvg[bettisAllStd_lower.>bettisAllAvg]
    
    # Check size
    println("Size bettisAllAvg is $(size(bettisAllAvg))")

    # Plot highest dimension first
    p1a = plot(bettisAllAvg[:,MAXDIM,:], 
        layout = 12, 
        size = (3000,2000), 
        title = ["$(model_names[i])" for j=1:1, i=1:nModels],
        ribbon = (bettisAllStd_lower[:,MAXDIM,:], bettisAllStd_upper[:,MAXDIM,:]),
        c = betti_colors[MAXDIM],
        fontfamily = :Times)
    
    # Plot the rest of the dimensions on top
    for d=(MAXDIM-1):-1:1
        plot!(bettisAllAvg[:,d,:],legend = nothing, xlabel = "Edges added", ylabel="Beta_k",
            ribbon = (bettisAllStd_lower[:,d,:], bettisAllStd_upper[:,d,:]),
            c = betti_colors[d],
            xticks=([(i/5)*nEdges for i=0:5],[(i/5) for i=0:5]), ylims=[0,Inf],
            framestyle = :box,
            fontfamily = :Times,
            guidefontcolor= :pink,
            tickfontcolor= :blue)

    end
    
    # If the file is of threshold type, add the threshold line
    if occursin("threshold", nametag)
        
        # Get threshold edge
        thresh_string = split(split(nametag,"edge")[2],"_")[1]
        threshold_edge = parse(Int, thresh_string)
        println("processing threshold edge $(threshold_edge)")
        
        # Plot the vertical line
        vline!(transpose([threshold_edge for i=1:nModels]), color=:gray)

    end


    savefig("../figures/all_bettis_$(nametag)_$(DATE_STRING).pdf")
    
    
    
end






## Plot Betti curves with all values of $\rho_T$ overlaid (for example, Fig. 3c)

Next we create plots showing all Betti curves across all values of $\rho_T$ on one subplot. Importantly these can only be created for files that have the "thresh" tag.

Consider filtering the files to include only the main models, for example.

In [None]:
 ##### -------- Optional filtering -------- ######
thresh_files = filter(x->occursin("thresh",x), betti_files)
thresh_files = filter(x-> !occursin("noiseOnly",x), thresh_files)
thresh_files = filter(x -> !occursin("randomized",x), thresh_files)
thresh_files = filter(x -> !occursin("Triangle",x), thresh_files)
thresh_files = filter(x -> !occursin("clique",x), thresh_files)
thresh_files = filter(x -> !occursin("cliques",x), thresh_files)

# Define save tag to mark the saved pdf based on filtering choices
save_tag = "main"


# Locate the thresh nametags
thresh_nametags = []
for thresh_file in thresh_files
    tag = split(split(thresh_file, "$(DATE_STRING)_")[2], "_bettis")[1]
    thresh_nametags = [thresh_nametags; tag]
end

thresh_nametags = unique(thresh_nametags)

# Extract values of rho_T and save them in the rho array
rhos = []
for (i,thresh_nametag) in enumerate(thresh_nametags)
    rho = split(split(thresh_nametag, "thresh")[2], "_")[1]
            rho = replace(rho, "0" => "0.")
            rho = parse(Float64, rho)

            rhos = [rhos; rho]
end

println(rhos)
println(sortperm(rhos))
println(rhos[sortperm(rhos)])

# Sort thresh_nametags
thresh_nametags = thresh_nametags[sortperm(rhos)]

# Extract the model names
model_names = []
for thresh_file in thresh_files
    name = split(thresh_file, "_thresh")[1]
    model_names = [model_names; name]
end

model_names = unique(model_names)
model_names_short = [split(model_name,"_")[1] for model_name in model_names]


In [None]:
# Generate one pdf file with all Betti curves across rho_T for all models.

# Preparation
nEdges = binomial(NNODES, 2)
plot_array3 = Any[]

# Loop over graph model names
for (j,model_name) in enumerate(model_names)

    p3 = plot()

    rho_vec = []
    max_val_k =[]
    mean_bettis = []
    max_locs = Array{Float64}(undef, (0,MAXDIM))
    max_vals = Array{Float64}(undef, (0,MAXDIM))
    bettis = []
    model = split(model_name, "_")[1]

    # Find the location of the Betti curve peaks
    betti_peaks_all = zeros(NREPS, MAXDIM, length(thresh_nametags))


    # Loop over values of rho_T
    for (i,thresh_nametag) in enumerate(thresh_nametags)
        
        # Load data
        bettis_dict = load("$(read_dir)/$(model_name)_$(thresh_nametag)_bettis.jld")
        bettis = bettis_dict["bettisArray"]
        
        # Extract rho_T as a float and add to rho_vec
        rho = split(split(thresh_nametag, "thresh")[2], "_")[1]
        rho = replace(rho, "0" => "0.")
        rho = parse(Float64, rho)
        rho_vec = [rho_vec; rho]
        
        # Calculate mean Betti curves
        mean_bettis = dropdims(mean(bettis, dims=1), dims=1)
    
        # Find max and location of max peak
        max_val_k = dropdims(maximum(mean_bettis, dims=1), dims=1)
        max_vals = [max_vals; transpose(max_val_k)]
    
        # Find locations of max Betti curve value
        max_loc = zeros(1,4)
        for dim in collect(1:MAXDIM)
            max_loc[1, dim] = findfirst(mean_bettis[:, dim].==max_val_k[dim])
        end
        max_locs = [max_locs; max_loc]
        

    
        plot!([1:nEdges],mean_bettis, c=betti_colors, alpha = (rho+0.1), legend =false, lw=4,
            xticks=([(i/5)*nEdges for i=0:5],[(i/5) for i=0:5]), ylims=[0,Inf], framestyle = :box)
    
        # Uncomment the following to add circles at the Betti curve peaks
#         scatter!(max_loc, transpose(max_val_k),lw=0, m=:dot, markeralpha = (rho+0.1), alpha = (rho+0.1), 
#         markerstrokecolor=:match, markerstrokewidth=0, c=betti_colors,
#         xticks=([(i/5)*nEdges for i=0:5],[(i/5) for i=0:5]), ylims=[0,Inf], framestyle = :box)
        
        title!("$(model)")
        xlabel!("rho")
        ylabel!("Beta_k")
        
        
    end

    plot(p3)

    # Add plot to array of plots
    push!(plot_array3,p3)


    
end

plot(plot_array3..., size = (3000,2000))
savefig("../figures/all_bcurves$(save_tag)_$(DATE_STRING).pdf")


In [None]:
## Write to json
open("../templates/foo4_triangle.json", "w") do f
    JSON.print(f, big_dict)
end
println("done saving")