In [1]:
using CSV
using DataFrames
using Dates
using Distributions
using Serialization
using LinearAlgebra: diagm
using JSON

In [2]:
ENV["COLUMNS"] = 1000;

In [3]:
data_dir = "../data/";
outputdatapath = "../data/";

In [4]:
SCENARIOS = [:moderate];
BEDTYPES  = [:allbeds, :icu, :acute];

In [5]:
los_dist = (
    allbeds = Gamma(2.244, 4.4988),
    acute = Gamma(2.601, 3.8046),
    icu = Gamma(1.77595, 5.9512),
);

In [6]:
hist_data = DataFrame(CSV.File("../data/data_historical.csv"));
initial_data = DataFrame(CSV.File("../data/data_initial.csv"));
forecast_data = DataFrame(CSV.File("../data/data_forecast.csv"));
capacity_data = DataFrame(CSV.File("../data/capacity.csv"));
metadata = DataFrame(CSV.File("../data/hospital_meta.csv"));

In [7]:
start_date = minimum(forecast_data.date);
end_date   = maximum(forecast_data.date);
date_range = collect(start_date : Day(1) : end_date);
T = length(date_range);
@show (start_date, end_date);

(start_date, end_date) = (Date("2021-01-01"), Date("2021-06-30"))


In [8]:
filter!(row -> row.in_forecast, metadata)
sort!(metadata, :hospital)
hospital_ids = metadata.hospital_id
N = length(hospital_ids)
@show N;

N = 45


In [9]:
capacity_names_full = ["Base Capacity"];
capacity_names_abbrev = ["baselinecap"];

In [10]:
function load_capacity(hospitals, bedtype, capacity_levels=[:baseline])
    beds_dict = Dict(row.hospital_id => Dict(
        "icu" => row.capacity_icu,
        "acute" => row.capacity_acute,
        "allbeds" => row.capacity_allbeds,
    ) for row in eachrow(capacity_data))

    if capacity_levels isa Symbol
        capacity = [beds_dict[h][string(bedtype)] for h in hospital_ids]
    elseif capacity_levels isa AbstractArray
        capacity = hcat([[beds_dict[h][string(bedtype)] for h in hospital_ids] for l in capacity_levels]...)
    else
        error("Invalid capacity_levels")
    end

    return capacity
end;

In [11]:
function estimate_admitted(active, los_dist)
    T = length(active)
    
    initial = active[1]
    discharged = initial .* (pdf.(los_dist, 0:T-1))

    L = 1.0 .- cdf.(los_dist, 0:T)

    A = [(t′ ≤ t) ? L[t-t′+1] : 0 for t in 1:T, t′ in 1:T]
    b = [active[t] - (initial - sum(discharged[1:t])) for t in 1:T]
    admitted = A \ b
    
    return admitted
end;

function estimate_active(initial::Real, admitted::Array{<:Real,1}, los_dist)
    T = length(admitted)

    discharged = initial .* (pdf.(los_dist, 0:T-1))

    L = 1.0 .- cdf.(los_dist, 0:T)

    active = [(
        initial
        - sum(discharged[1:t])
        + sum(L[t-t₁+1] * admitted[t₁] for t₁ in 1:t)
    ) for t in 1:T]
    
    return active
end;

In [12]:
function estimate_active(initial::Array{<:Real,1}, admitted::Array{<:Real,2}, los_dist)
    N, T = size(admitted)
    active = Array{Float64,2}(undef, N, T)
    for i in 1:N
        active[i,:] = estimate_active(initial[i], admitted[i,:], los_dist)
    end
    return active
end;

In [13]:
isbad(x) = isnothing(x) || ismissing(x) || isnan(x) || isinf(x);
isnbad(x) = !(isbad(x));

In [14]:
skipbad(xs) = filter(isnbad, xs);

In [15]:
firstval(xs) = xs[findfirst(isnbad, xs)];
lastval(xs) = xs[findlast(isnbad, xs)];

In [16]:
function interpolate_missing(xs::Array{Union{Float64,Missing},2})
    output = Array{Float64,2}(undef, size(xs)...)
    for i in 1:size(xs,1)
        output[i,:] = interpolate_missing(xs[i,:])
    end
    return output
end;

function interpolate_missing(xs::Array{Union{Float64,Missing},1})
    if all(isbad.(xs))
        return zeros(Float64, length(xs))
    end
    
    xs = deepcopy(xs)
    for i in 1:length(xs)
        if isbad(xs[i])
            a = findprev(isnbad, xs, i)
            b = findnext(isnbad, xs, i)
            
            a = isnothing(a) ? b : a
            b = isnothing(b) ? a : b
            
            m = (a==b) ? 0 : ((xs[b]-xs[a]) / (b-a))
            xs[i] = (m * (i-a)) + xs[a]
        end
    end
    return xs
end;

In [17]:
function load_data(scenario, bedtype)
    @assert(bedtype in [:icu, :acute, :allbeds])
    @assert(scenario in [:optimistic, :moderate, :pessimistic, :catastrophic])

    forecast_dict = Dict((row.hospital_id, row.date) => row["admissions_$(bedtype)"] for row in eachrow(forecast_data))
    initial_dict = Dict(row.hospital_id => row["active_$(bedtype)"] for row in eachrow(initial_data))
    
    admitted = [forecast_dict[(h,d)] for h in hospital_ids, d in date_range]
    initial = [initial_dict[h] for h in hospital_ids]
    
    active = estimate_active(initial, admitted, los_dist[bedtype])

    admitted_uncertainty = 0.15 .* admitted

    beds = load_capacity(hospital_ids, bedtype, :baseline)
    capacity = load_capacity(hospital_ids, bedtype, [:baseline,])

    data = (
        scenario = scenario,
        bedtype = bedtype,

        los_dist = los_dist[bedtype],

        active = active,
        admitted = admitted,
        admitted_uncertainty = admitted_uncertainty,

        beds = beds,
        capacity = capacity,
    )

    return data
end;

In [18]:
maindata = Dict()
for scenario in SCENARIOS, bedtype in BEDTYPES
    @show (scenario, bedtype)
    maindata[(scenario,bedtype)] = load_data(scenario, bedtype)
end

(scenario, bedtype) = (:moderate, :allbeds)
(scenario, bedtype) = (:moderate, :icu)
(scenario, bedtype) = (:moderate, :acute)


In [19]:
hospital_meta = [(
    name = row.hospital,
    id = row.hospital_id,
    index = findfirst(==(row.hospital_id), hospital_ids),
    state = row.state,
    state_abbrev = row.state_abbrev,
    zipcode = row.zipcode,
    city = row.city,
    county = row.county,
    lat = row.lat, long = row.long,
    hsa_name = row.hsa_name,
    hsa_id = string(row.hsa_id),
    hrr_name = row.hrr_name,
    hrr_id = string(row.hrr_id),
    healthcare_region = row.healthcare_region,
) for row in eachrow(metadata)];

In [20]:
hospital_names = [h.name for h in hospital_meta];
hospital_identifiers = [h.id for h in hospital_meta];

In [21]:
hospital_positions = Dict(h.id => (
        lat  = h.lat,
        long = h.long,
    )
    for h in hospital_meta
);

In [22]:
completedata = (
    location_ids = hospital_identifiers,
    location_names = hospital_names,
    location_meta = hospital_meta,
    start_date = start_date,
    end_date = end_date,
    locations_latlong = hospital_positions,
    casesdata = maindata,
);

In [23]:
serialize(joinpath(outputdatapath, "data.jlser"), completedata);