# Write data for use in interactive visualizations

This notebook formats data from the main results and saves them for easy use in the [supplementary site](https://asizemore.github.io/noise_and_tda_supplement/). See also the [supplementary site repo](https://github.com/asizemore/noise_and_tda_supplement) for more details.

In [None]:
# Import packages

script_start_time = time()
println("\nimporting packages...")

using Pkg
using Statistics
using LinearAlgebra
using Eirene
using StatsBase
using CSV
using JLD
using DataFrames
using JSON
using MAT

include("helper_functions.jl")
println("packages and functions imported")
printstyled("Elapsed time = $(time() - script_start_time) seconds \n \n", color = :yellow)

In [None]:
### Set parameters

betti_colors = [["#243a4c"] ["#406372"] ["#66939e"] ["#9bc3c6"]]

# Read from config file
config_file = "config101220.json"
config = read_config("$(homedir())/configs/$(config_file)")

# Parameters for all graphs
const NNODES = config["NNODES"]
const MAXDIM = config["MAXDIM"]
const NREPS = config["NREPS"]
const DATE_STRING = config["DATE_STRING"]


### Locate data and find nametags
read_dir = "../processed_data/results/$(NNODES)nodes"
betti_files = filter(x->occursin("_bettis",x), readdir(read_dir))
betti_files = filter(x->occursin(DATE_STRING, x), betti_files)
betti_files = filter(x->!occursin("dsi", x), betti_files)

println("Located the following graph files:")
# for betti_file in betti_files
#     println(betti_file)
# end


# Locate the nametags
nametags = []
for betti_file in betti_files
    println(betti_file)
    tag = split(split(betti_file, "$(DATE_STRING)_")[2], "_bettis")[1]
    nametags = [nametags; tag]
end

nametags = unique(nametags)


for nametag in nametags
    namefiles = filter(x->occursin(nametag,x), betti_files)
    if length(namefiles)<12
    println(nametag)
    println(length(namefiles))
    end
end

nametags

In [None]:
# Writing to JSON
using JSON

## Filter to include JUST the threshold files
thresh_files = filter(x -> occursin("threshold",x),betti_files)
thresh_files = filter(x -> !occursin("randomized",x),thresh_files)
thresh_files = filter(x -> !occursin("clique",x),thresh_files)
thresh_files = filter(x -> !occursin("forward",x),thresh_files)
thresh_files = filter(x -> !occursin("cliques",x),thresh_files)
thresh_files = filter(x -> !occursin("Triangle",x),thresh_files)
thresh_files = filter(x -> !occursin("noiseOnly",x),thresh_files)

# print(thresh_files)

## Filter to only include every kth data point - for faster visualization loading
k = 4

# Extract model names
model_names = []
for thresh_file in thresh_files
    name = split(thresh_file, "_")[1]
    model_names = [model_names; name]
end

unique!(model_names)

# Use betti_names for the main networks
betti_names = ["IID","assortative","coreperiphery", "cosineGeometric","disassortative", "discreteUniform","dotProduct", "geometricConf", "randomGeometric", "ringLattice", "rmsd", "squaredEuclidean" ];

bettis_dict = Dict()
df = DataFrame(edge = collect(1:binomial(NNODES,2)))

# Loop over models and store in a dictionary
for (i,model_name) in enumerate(model_names)

    model_dict = Dict()

    # Run through all the threshold files and extract Betti curves

    model_thresh_files = filter(x->startswith(x,model_name), thresh_files)

    for model_thresh_file in model_thresh_files
        

        rho_dict = Dict()

        bettisArray = load("../processed_data/results/70nodes/$(model_thresh_file)","bettisArray")
        bettisArrayAvg = dropdims(mean(bettisArray, dims=1), dims=1)
        bettisArrayStd = dropdims(std(bettisArray,dims=1), dims=1)
        
        # Keep only ever kth datapoint
        nEdges = size(bettisArray)[2]
        keep_data = collect(1:k:nEdges)

        
        # Extract the rho value or edge number and add to dictionary
        thresh_edge = parse(Int,split(split(model_thresh_file,"edge")[2], "_")[1])
        rho = replace(split(split(model_thresh_file,"_edge")[1], "_thresh")[2], "."=> "")



        for dim in 1:MAXDIM
            rho_dict["dim$(dim)"] = bettisArrayAvg[keep_data,dim]
            rho_dict["std$(dim)"] = bettisArrayStd[keep_data, dim]
            df[!, "$(model_name)_thresh$(rho)_dim$(dim)"] = bettisArrayAvg[:, dim]
        end




        model_dict["$(thresh_edge)"] = rho_dict

    end


    bettis_dict["$(betti_names[i])"] = model_dict  ### Use this line for main models
#     bettis_dict["$(model_name)"] = model_dict  ### Use this line for clique, triangle models
    
    
    println("finished $(model_name)")
    
end

bettis_dict


In [None]:
# Write to json
open("../templates/main_k$(k)_stdev.json", "w") do f
    JSON.print(f, bettis_dict)
end
println("done saving")

## Loading and wrapping classification mat files


In [None]:
# Read in .mat files

model_names = ["IID","assortative","coreperiphery", "cosineGeometric","disassortative", "discreteUniform",
    "dotProduct", "geometricConf", "randomGeometric", "ringLattice", "rmsd", "squaredEuclidean" ]

mat_dir = "../processed_data/classification/"
mat_files = readdir(mat_dir)
mat_files = filter(x -> occursin(".mat",x), mat_files)

# Filter based on experiment. For example "all", "postnoise", "noiseOnly", "crossover", etc.
experiment = "all"
mat_files = filter(x-> occursin(experiment,x), mat_files)
mat_files = filter(x-> occursin("NO",x), mat_files)
mat_files = filter(x-> !occursin("Rand",x), mat_files)

In [None]:
# Format the classification accuracy results as a data frame

# Prepare the true_predicted column
df = DataFrame(TP = ["$(b)_$(a)" for a in model_names for b in model_names])

# Fill with results
for mat_file in mat_files
                    
    # Read in file and extract threshold
    mat = matread("$(mat_dir)/$(mat_file)")["C"]
    thresh = "thresh$(replace(split(split(mat_file, ".mat")[1], "_")[3], "."=>""))"
    
    # Store in df
    df_temp = stack(DataFrame(mat),1:12)
    println(thresh)
    df[!, thresh] = df_temp.value
end
df

In [None]:
CSV.write("../templates/classification_no_101220.csv",df)