In [27]:
using CSV, DataFrames
using Dates

In [29]:
ENV["COLUMNS"] = 200;

## CDC Ground Truth

In [17]:
gt_raw = CSV.read("../../data/forecasts/cdc/gt/2020-05-26.csv");

In [41]:
unique(gt_raw.Notes)

11-element Array{Union{Missing, String},1}:
 "This file contains National and State representative estimates from the CDC National Healthcare Safety Network (NHSN)."
 "These estimates are based on data retrieved on 05-27-2020 at 5:30 am."
 "Statistical methods were used to generate estimates of patient impact and hospital capacity measures that are representative at the national level."
 "The estimates are based on data submitted by acute care hospitals to the NHSN COVID-19 Module."
 "The statistical methods include weighting (to account for non-response), multiple imputation (to account for missing data), and a running 7-day smoothing technique (to account for daily fluctuations and updated responses in reporting to NHSN)."
 "The estimates (number and percentage) are shown along with 95% confidence intervals (denoted by the suffixes LoCI or UpCI) that reflect the statistical error that is primarily due to non-response."
 "If estimates for the United States are required, the estimates 

In [39]:
col_descriptions = Dict(zip(keys(gt_raw[1,:]), values(gt_raw[1,:])))

Dict{Symbol,String} with 24 entries:
  :ICUBedsOccAnyPat__N_ICUBeds_UpCI => "ICU bed occupancy, upper 95% CI (percent of ICU beds)"
  :state                            => "Two-letter state abbreviation"
  :InpatBeds_Occ_AnyPat_UpCI        => "Hospital inpatient bed occupancy, upper 95% CI"
  :InpatBeds_Occ_AnyPat_Est_Avail   => "Hospital inpatient beds available, estimate"
  :ICUBeds_Occ_AnyPat_LoCI          => "ICU bed occupancy, lower 95% CI"
  :InBedsOccCOVID__Numbeds_Est      => "Number of patients in an inpatient care location who have suspected or confirmed COVID-19, percent estimate (percent of inpatient beds)"
  :collectionDate                   => "Day for which estimate is made"
  :InBedsOccCOVID__Numbeds_UpCI     => "Number of patients in an inpatient care location who have suspected or confirmed COVID-19, upper 95% CI (percent of inpatient beds)"
  :ICUBeds_Occ_AnyPat_Est_Avail     => "ICU beds available, estimate"
  :ICUBedsOccAnyPat__N_ICUBeds_Est  => "ICU bed occupancy, 

In [47]:
good_cols = Dict(
    :state => :state,
    :date  => :date,
    
    :ICUBeds_Occ_AnyPat_Est  => :icu_active,
    :ICUBeds_Occ_AnyPat_LoCI => :icu_active_lb,
    :ICUBeds_Occ_AnyPat_UpCI => :icu_active_ub,
    
    :InpatBeds_Occ_AnyPat_Est  => :all_active,
    :InpatBeds_Occ_AnyPat_LoCI => :all_active_lb,
    :InpatBeds_Occ_AnyPat_UpCI => :all_active_ub,
    
    :InpatBeds_Occ_COVID_Est  => :all_covid_active,
    :InpatBeds_Occ_COVID_LoCI => :all_covid_active_lb,
    :InpatBeds_Occ_COVID_UpCI => :all_covid_active_ub,
);

In [63]:
gt = gt_raw[2:end,:]
gt.date = map(d -> Date(d, "dduuuyyyy"), gt.collectionDate)
gt = gt[:,sort(collect(keys(good_cols)),rev=true)]
rename!(gt, good_cols)
last(gt, 10)

Unnamed: 0_level_0,state,date,all_covid_active_ub,all_covid_active_lb,all_covid_active,all_active_ub,all_active_lb,all_active,icu_active_ub,icu_active_lb,icu_active
Unnamed: 0_level_1,String,Date,String,String,String,String,String,String,String,String,String
1,WY,2020-05-17,50,1,26,942,176,559,92,10,51
2,WY,2020-05-18,39,0,19,956,180,568,93,7,50
3,WY,2020-05-19,26,0,13,995,202,598,110,2,56
4,WY,2020-05-20,30,4,17,970,165,567,83,3,43
5,WY,2020-05-21,37,7,22,915,307,611,80,14,44
6,WY,2020-05-22,26,6,16,621,204,413,62,7,30
7,WY,2020-05-23,16,0,7,474,80,277,44,2,18
8,WY,2020-05-24,32,0,15,464,69,267,65,2,25
9,WY,2020-05-25,24,0,12,453,69,261,65,2,23
10,WY,2020-05-26,19,0,9,444,69,257,51,2,19


## CDC Compiled Hospitalization Forecasts

In [75]:
forecast = CSV.read("../../data/forecasts/cdc/forecast/state-hosp-2020-05-25.csv")
forecast.forecast_date = map(d -> Date(d, "m/d/yyyy"), forecast.forecast_date)
forecast.date = map(row -> row.forecast_date + Day(parse(Int, split(row.target, ' ')[1])), eachrow(forecast))
first(forecast, 10)

Unnamed: 0_level_0,model,forecast_date,target,location_name,point,quantile_0.025,quantile_0.975,date
Unnamed: 0_level_1,String,Date,String,String,Int64,Int64,Int64,Date
1,CU-select,2020-05-25,1 day ahead inc hosp,AK,0,0,0,2020-05-26
2,CU-select,2020-05-25,1 day ahead inc hosp,AL,41,27,51,2020-05-26
3,CU-select,2020-05-25,1 day ahead inc hosp,Alabama,41,27,51,2020-05-26
4,CU-select,2020-05-25,1 day ahead inc hosp,Alaska,0,0,0,2020-05-26
5,CU-select,2020-05-25,1 day ahead inc hosp,AR,18,11,27,2020-05-26
6,CU-select,2020-05-25,1 day ahead inc hosp,Arizona,52,38,64,2020-05-26
7,CU-select,2020-05-25,1 day ahead inc hosp,Arkansas,18,11,27,2020-05-26
8,CU-select,2020-05-25,1 day ahead inc hosp,AZ,52,38,64,2020-05-26
9,CU-select,2020-05-25,1 day ahead inc hosp,CA,289,256,318,2020-05-26
10,CU-select,2020-05-25,1 day ahead inc hosp,California,289,256,318,2020-05-26
