In [1]:
using Pkg; Pkg.activate("/Users/adrocampos/covid19/env_2")
using Plots
using CSV
using DataFrames
using OrdinaryDiffEq
using Optim
using DataFrames
using DiffEqFlux
using Flux
using Random
using LinearAlgebra
using Dates
using ModelingToolkit
using DataDrivenDiffEq
using SciMLBase
using Plots
using JLD
using Interpolations
using DataDrivenSparse

[32m[1m  Activating[22m[39m project at `~/covid19/env_2`


In [2]:
function mse(pred, x_test)
    sum(abs2, x_test .- pred) / size(x_test, 2)
end

mse (generic function with 1 method)

In [3]:
mobility = ["border", "inv_dist", "neighbor"][2]
data_dir = "/Users/adrocampos/covid19/synth_data/"
file_name = "SIR_" * string(10) * "_regions_" * mobility * "_" * string(init) * ".csv"
    
csv_reader = CSV.File(data_dir * file_name, types=Float64) 
df = DataFrame(csv_reader)
X = Matrix(df)[:,2:end] ## Filtering out t
mask = (1:10:5001) ## Defines resolution of input data
X = X[mask, :]'
println(size(X))

## Selecting training and testing sets
tspan_train = 1:251
tspan_test  = 1:501
    

init = 16

mse_sindy_all_regions = Float64[]
mse_uode_all_regions  = Float64[]


for target_region in 1:10

    ####################################################################
    ############################## Set up ##############################
    ####################################################################
    
    folder_name = "SIR_" * string(10) * "_regions_" * mobility * "_" * string(tspan_train[end]) *  "/"
    save_dir = "/Users/adrocampos/covid19/SIR_discovery/" * folder_name * "init_" * lpad(init,2,"0")  * "/region_" * lpad(target_region,2,"0") * "/"
    println("save_dir = ", save_dir)

    universe = range(1, size(X)[1], step=1)
    index_target = (target_region - 1) * 3 + 1
    targets = [index_target, index_target+1, index_target+2]
    adjacents = setdiff(universe , targets)
    
    x_target = X[targets,:]
    x_adjacent = X[adjacents,:]
    
    x_test  = x_target[:,tspan_test]
    x_train = x_target[:,tspan_train]
    
    u0 = x_train[:, 1]
    
    ## Linear interpolation of the SIR model of adjacent region
    interpolation_adjacent = interpolate(Array(x_adjacent), BSpline(Linear()))

    
    #####################################################################
    ########################## Sindy Model ##############################
    #####################################################################
    
    res_UODE_2 = load(save_dir * "3_UODE_params_2.jld", "UODE_params")
    uode_pred  = load(save_dir * "3_UODE_pred.jld", "UODE_pred")
    ann_input  = load(save_dir * "4_ann_input.jld", "ann_input")
    ann_output = load(save_dir * "4_ann_output.jld", "ann_output")

    problem = DirectDataDrivenProblem(ann_input, ann_output)

    @variables v[1:27]
    h = monomial_basis(v,2)
    basis = Basis(h, v)
    
    λs = exp10.(-5:0.1:5)
    opt = STLSQ(λs)
    sindy_res = solve(problem, basis, opt, progress=true, normalize=false, denoise=true)
    
    res_basis  = get_basis(sindy_res)
    res_params = get_parameter_values(res_basis)
    
    println(res_basis)
    
    function approx(du, u, p, t)
    
        β = res_UODE_2[1]
        γ = res_UODE_2[2]
    
        S, I, R = u
        N = sum(u)
    
        adjacent_SIR = interpolation_adjacent[:,t]
        si = sindy_res(adjacent_SIR./sum(adjacent_SIR), p) 
    
        du[1] = (-β * I / N) * S - si[1]
        du[2] =  (β * I / N) * S + si[1] - (γ * I)   
        du[3] =  (γ * I)      
    
    end
    
    sindy_prob = ODEProblem(approx, u0, (1,501), res_params)
    pred_sindy = solve(sindy_prob, Tsit5(), u0=u0, tspan=(1,501), saveat=1)

    ## Saving results
    save_dir_sindy = "/Users/adrocampos/covid19/sindy/init_" * lpad(init,2,"0") * "/"
    save(save_dir_sindy * "4_SINDY_pred_region_"   * lpad(target_region,2,"0") * ".jld", "SINDY_pred", Array(pred_sindy))
    save(save_dir_sindy * "4_SINDY_params_region_" * lpad(target_region,2,"0") * ".jld", "SINDY_params", Array(res_params))
    
    plt = plot(pred_sindy', labels=["SInDy pred S" "SInDy pred I" "SInDy pred R"], lw=2, ls=:dot, palette=:Dark2_3)
    plt = plot!(x_test', labels=["S" "I" "R"], lw=1, palette=:Dark2_3, linealpha=.5)
    plt = plot!(x_train', labels=["Train S" " Train I" "Train R"], lw=3, palette=:Dark2_3)
    plt = plot!(legend=:right)
    savefig(plt, save_dir_sindy * "4_SINDY_pred_region_" * lpad(target_region,2,"0") * ".pdf")  

end

LoadError: ArgumentError: "/Users/adrocampos/covid19/synth_data/SIR_10_regions_inv_dist_init.csv" is not a valid file or doesn't exist