In [1]:
using CSV
using DataFrames
using Dates
using Distributions
using Serialization
using LinearAlgebra: diagm
using JSON

In [2]:
ENV["COLUMNS"] = 1000;

In [3]:
data_dir = "../data/";
outputdatapath = "../data/";

In [4]:
forecast_date = "2020_12_10";

In [5]:
capacity_data = DataFrame(CSV.File("../data/capacity_maryland.csv"));

In [6]:
SCENARIOS = [:optimistic, :moderate, :pessimistic];
BEDTYPES  = [:allbeds, :icu, :acute];

In [7]:
los_dist = (
    icu = Weibull(1.58, 13.32),
    acute = Weibull(1.38, 12.88),
    allbeds = Weibull(1.38, 12.88),
);

In [8]:
start_date = Date(2020, 03, 27);
end_date   = Date(2021, 04, 01);
date_range = collect(start_date : Day(1) : end_date);
T = length(date_range);

In [9]:
hospitals = sort(capacity_data.hospital);
N = length(hospitals);

In [10]:
adj = BitArray(ones(N,N) - diagm(ones(N)));

In [11]:
dist_matrix = diagm(fill(Inf, N));

In [12]:
capacity_names_full = ["Base Capacity"];
capacity_names_abbrev = ["baselinecap"];

In [13]:
function load_capacity_md(hospitals, bedtype, capacity_levels=[:baseline])
    beds_dict = Dict(row.hospital => Dict(
        "icu" => row.capacity_icu,
        "acute" => row.capacity_acute,
        "allbeds" => row.capacity_icu + row.capacity_acute,
    ) for row in eachrow(capacity_data))

    if capacity_levels isa Symbol
        capacity = [beds_dict[h][string(bedtype)] for h in hospitals]
    elseif capacity_levels isa AbstractArray
        capacity = hcat([[beds_dict[h][string(bedtype)] for h in hospitals] for l in capacity_levels]...)
    else
        error("Invalid capacity_levels")
    end

    return capacity
end;

In [14]:
function estimate_admitted(active, los_dist)
    T = length(active)
    
    initial = active[1]
    discharged = initial .* (pdf.(los_dist, 0:T-1))

    L = 1.0 .- cdf.(los_dist, 0:T)

    A = [(t′ ≤ t) ? L[t-t′+1] : 0 for t in 1:T, t′ in 1:T]
    b = [active[t] - (initial - sum(discharged[1:t])) for t in 1:T]
    admitted = A \ b
    
    return admitted
end;

In [15]:
md_data = DataFrame(CSV.File("../rawdata/md_data_2020_12_01.csv", dateformat="mm/dd/yy", missingstring="null"))
rename!(md_data,
    "Facility Name" => :hospital,
    "Modified Date" => :date,
    "COVID-19 Patients in Acute Care Beds" => :active_covid_acute,
    "COVID-19 Patients in ICU" => :active_covid_icu,
)
md_data.date .+= Year(2000)
md_data_dict = Dict((row.hospital,row.date) => 
        Dict(
            "active_icu" => coalesce.(row.active_covid_icu, 0),
            "active_acute" => coalesce.(row.active_covid_acute, 0),
            "active_allbeds" => coalesce.(row.active_covid_icu, 0) + coalesce.(row.active_covid_acute, 0),
        )
    for row in eachrow(md_data)
);

In [16]:
forecast = DataFrame(CSV.File(joinpath(data_dir, "forecast_fake_$(forecast_date).csv")));

In [17]:
function load_forecast_maryland(scenario, bedtype)
    @assert(bedtype in [:icu, :acute, :allbeds])
    @assert(scenario in [:optimistic, :moderate, :pessimistic, :catastrophic])

    forecast_dict = Dict((row.hospital, row.date) => (
        admitted = row["admitted_$(bedtype)"],
        active = row["active_$(bedtype)"],
    ) for row in eachrow(forecast))
    
    hist_dict = Dict(k => v["active_$(bedtype)"] for (k,v) in pairs(md_data_dict))

    hist_date_range = sort(intersect(date_range, md_data.date))
    forecast_date_range = sort(setdiff(date_range, hist_date_range))

    hist_date_range_t = [findfirst(date_range .== d) for d in hist_date_range]
    forecast_date_range_t = [findfirst(date_range .== d) for d in forecast_date_range]

    forecast_admitted = [forecast_dict[(h,d)].admitted for h in hospitals, d in forecast_date_range]
    forecast_active   = [forecast_dict[(h,d)].active for h in hospitals, d in forecast_date_range]

    hist_active = [haskey(hist_dict, (h,d)) ? hist_dict[(h,d)] : 0 for h in hospitals, d in hist_date_range]
    hist_admitted = Array{Float64,2}(undef,N,length(hist_date_range))
    for i in 1:N
        hist_admitted[i,:] = estimate_admitted(hist_active[i,:], los_dist[bedtype])
    end
    
    active = zeros(Float64, N, T)
    active[:,forecast_date_range_t] = forecast_active
    active[:,hist_date_range_t] = hist_active
    
    admitted = zeros(Float64, N, T)
    admitted[:,forecast_date_range_t] = forecast_admitted
    admitted[:,hist_date_range_t] = hist_admitted

    admitted_uncertainty = 0.1 .* admitted

    beds = load_capacity_md(hospitals, bedtype, :baseline)
    capacity = load_capacity_md(hospitals, bedtype, [:baseline,])

    data = (
        scenario = scenario,
        bedtype = bedtype,

        los_dist = los_dist[bedtype],

        active = active,
        admitted = admitted,
        admitted_uncertainty = admitted_uncertainty,

        beds = beds,
        capacity = capacity,
    )

    return data
end;

In [18]:
maindata = Dict()
for scenario in SCENARIOS, bedtype in BEDTYPES
    @show (scenario, bedtype)
    maindata[(scenario,bedtype)] = load_forecast_maryland(scenario, bedtype)
end

(scenario, bedtype) = (:optimistic, :allbeds)
(scenario, bedtype) = (:optimistic, :icu)
(scenario, bedtype) = (:optimistic, :acute)
(scenario, bedtype) = (:moderate, :allbeds)
(scenario, bedtype) = (:moderate, :icu)
(scenario, bedtype) = (:moderate, :acute)
(scenario, bedtype) = (:pessimistic, :allbeds)
(scenario, bedtype) = (:pessimistic, :icu)
(scenario, bedtype) = (:pessimistic, :acute)


In [19]:
hospital_positions_raw = JSON.parsefile("../data/md_hospital_locs.json");
hospital_positions = Dict(h => (
    lat  = hospital_positions_raw[h]["lat"],
    long = hospital_positions_raw[h]["lng"],
) for h in hospitals);

In [20]:
completedata = (
    location_names = hospitals,
    location_names_short = hospitals,
    start_date = start_date,
    end_date = end_date,
    counties = nothing,
    states = nothing,
    dist_matrix = dist_matrix,
    locations_latlong = hospital_positions,
    casesdata = maindata,
);

In [21]:
serialize(joinpath(outputdatapath, "data_maryland.jlser"), completedata);