In [1]:
using Distributions
using AlphaStableDistributions
using Statistics
using Plots
using StatsPlots
using StatsFuns
using NPZ

In [2]:
parent_folder = dirname(dirname(pwd()))
source_path = parent_folder * "\\src\\julia"

include("$source_path/01_priors.jl")
include("$source_path/02_diffusion.jl")
include("$source_path/03_experiment.jl")
include("$source_path/04_datasets.jl");

In [3]:
"Helper function to save simulated datasets as npy files."
function save_to_npy(dataset::Array{Float64, 4}, name::String)
    target_folder = "c:\\Users\\lasse\\documents\\hierarchical_model_comparison_project\\data\\03_levy_flight_application"
    filename = "$name.npy"
    target_path = joinpath(target_folder, filename)

    npzwrite(target_path, dataset)
end;

# Generate training data sets

In [4]:
# Settings

goal = "finetuning"
n_clusters = 40;

In [5]:
if goal == "pretraining"

    n_datasets_per_model = 10000 
    n_trials = 100

    indices_names = "train_indices_100_trials"
    datasets_names = "train_datasets_100_trials"

end


if goal == "finetuning"

    n_datasets_per_model = 2000 
    n_trials = 900

    indices_names = "train_indices_900_trials"
    datasets_names = "train_datasets_900_trials"

end


if goal == "validation"

    n_datasets_per_model = 2000
    n_trials = 900

    indices_names = "val_indices_900_trials"
    datasets_names = "val_datasets_900_trials"

end;

In [6]:
# Basic diffusion model
indices_m1, datasets_m1 = generate_levy_batch(1, n_datasets_per_model, n_clusters, n_trials);

In [7]:
# Basic Lévy flight model
indices_m2, datasets_m2 = generate_levy_batch(2, n_datasets_per_model, n_clusters, n_trials);

In [8]:
# Full diffusion model
indices_m3, datasets_m3 = generate_levy_batch(3, n_datasets_per_model, n_clusters, n_trials);

In [9]:
# Full Lévy flight model
indices_m4, datasets_m4 = generate_levy_batch(4, n_datasets_per_model, n_clusters, n_trials);

In [10]:
# Concatenate data sets
indices = vcat(indices_m1, indices_m2, indices_m3, indices_m4)
datasets = vcat(datasets_m1, datasets_m2, datasets_m3, datasets_m4);

In [11]:
# Reshape indices to fulfill requirements of save_to_npy()
indices = Float64.(cat(indices, dims=4));

Simulation times

pre-training: 60 minutes

fine-tuning: 110 minutes

## Save training datasets

In [12]:
save_to_npy(indices, indices_names)
save_to_npy(datasets, datasets_names)