In [1]:
using CSV
using DataFrames
using Dates

In [2]:
ENV["COLUMNS"] = 1000;

In [3]:
rawdata = DataFrame(CSV.File("../rawdata/reported_hospital_capacity_admissions_facility_level_weekly_average_timeseries_20201221_0.csv"));

In [4]:
data_weekly = select(rawdata,
    :hospital_name => ByRow(titlecase) => :hospital,
    :collection_week => :date,
    :previous_day_admission_adult_covid_suspected_7_day_sum => ByRow(x -> (ismissing(x) || x == -999999) ? missing : x) => :admissions_weekly,
    :total_adult_patients_hospitalized_confirmed_and_suspected_covid_7_day_sum => ByRow(x -> (ismissing(x) || x == -999999) ? missing : x) => :active_weekly,
);
sort!(data_weekly, [:hospital, :date]);

In [5]:
first(data_weekly,5)

Unnamed: 0_level_0,hospital,date,admissions_weekly,active_weekly
Unnamed: 0_level_1,String,Date,Int64?,Int64?
1,Abbeville Area Medical Center,2020-07-31,0,22
2,Abbeville Area Medical Center,2020-08-07,missing,31
3,Abbeville Area Medical Center,2020-08-14,5,missing
4,Abbeville Area Medical Center,2020-08-21,missing,15
5,Abbeville Area Medical Center,2020-08-28,4,15


In [6]:
function interpolate_timeseries_linear(xs, ys)
    @assert all(sort(xs) .== xs)
    @assert length(xs) == length(ys)
    tx = length(xs)
    
    d_start, d_end = xs[1], xs[end]
    ds = collect(d_start : Day(1) : d_end)
    
    td = length(ds)
    zs = zeros(Union{Float64,Missing}, td)
    
    for (i,d) in enumerate(ds)
        x1_ind = findfirst(xs .<= d)
        x1_ind = isnothing(x1_ind) ? tx : x1_ind
        
        x1 = xs[x1_ind]
        x2 = xs[min(x1_ind+1, tx)]
        
        y1 = ys[x1_ind]
        y2 = ys[min(x1_ind+1, tx)]
        
        m = (y2-y1) / (x2-x1).value
        z = (m * (d-x1).value) + y1
        zs[i] = max(0, z)
    end
    
    return zs
end;

In [7]:
data_daily_list = []
for h in unique(data_weekly.hospital)
    loc_df = filter(row -> row.hospital == h, data_weekly)
    loc_df = sort(loc_df, :date)
    
    dates_w = loc_df.date
    admissions_w = loc_df.admissions_weekly ./ 7
    active_w = loc_df.active_weekly ./ 7
    
    admissions_d = interpolate_timeseries_linear(dates_w, admissions_w)
    active_d = interpolate_timeseries_linear(dates_w, active_w)
    
    start_date = dates_w[1]
    end_date   = dates_w[end]
    dates_d = collect(start_date : Day(1) : end_date)
    
    t = length(dates_d)
    loc_df_daily = DataFrame(
        hospital = fill(h, t),
        date = dates_d,
        admissions_icu = admissions_d .* 0.3,
        admissions_acute = admissions_d .* 0.7,
        admissions_allbeds = admissions_d,
        active_icu = active_d .* 0.3,
        active_acute = active_d .* 0.7,
        active_allbeds = active_d,
    )
    push!(data_daily_list, loc_df_daily)
end
data_daily = vcat(data_daily_list...);

In [8]:
data_daily |> CSV.write("../data/hhs_data_2020_12_24.csv");