In [1]:
using CSV
using DataFrames
using Dates
using Gadfly

In [2]:
ENV["COLUMNS"] = 1000;

In [3]:
rawdata = DataFrame(CSV.File("../rawdata/md_data_2020_12_01.csv", missingstring="null"));

In [4]:
get_hospital_id(h) = h |> hash |> string |> (x -> "h" * x[end-7:end]);

In [5]:
convert_date(d) = Date(d, dateformat"m/d/y") + Year(2000);

In [6]:
data = select(rawdata,
    "Facility Name" => :hospital,
    "Facility Name" => ByRow(get_hospital_id) => :hospital_id,
    "Modified Date" => ByRow(convert_date) => :date,
    
    "Staffed Adult Acute Care Beds" => :beds_acute,
    "Staffed Adult ICU Beds" => :beds_icu,
    
    "Physical Adult Acute Care Beds" => :beds_physical_acute,
    "Physical Adult ICU Beds" => :beds_physical_icu,
);
data.beds_allbeds = data.beds_acute + data.beds_icu
sort!(data, [:hospital, :date]);

In [7]:
start_date = Date(2020,  6, 1)
end_date   = Date(2020, 11, 1)
date_range = collect(start_date : Day(1) : end_date);

In [8]:
function mean_capacity(xs, dates)
    xs = [x for (x,d) in zip(xs,dates) if d in date_range]
    if all(ismissing.(xs))
        return 0
    else
        xs = filter(x -> !ismissing(x), xs)
        return round(Int, sum(xs) / length(xs))
    end
end;

function max_capacity(xs)
    if all(ismissing.(xs))
        return 0
    else
        return maximum(skipmissing(xs))
    end
end;

function min_capacity(xs)
    if all(ismissing.(xs))
        return 0
    else
        return minimum(skipmissing(xs))
    end
end;

In [9]:
capacity_data = combine(groupby(data, [:hospital, :hospital_id]), [
    :beds_icu => min_capacity => :beds_icu_min,
    :beds_acute => min_capacity => :beds_acute_min,
    :beds_allbeds => min_capacity => :beds_allbeds_min,

    [:beds_icu, :date] => mean_capacity => :beds_icu_mean,
    [:beds_acute, :date] => mean_capacity => :beds_acute_mean,
    [:beds_allbeds, :date] => mean_capacity => :beds_allbeds_mean,
        
    :beds_icu => max_capacity => :beds_icu_max,
    :beds_acute => max_capacity => :beds_acute_max,
    :beds_allbeds => max_capacity => :beds_allbeds_max,
]);

In [10]:
capacity_names_icu = [:beds_icu_min, :beds_icu_mean, :beds_icu_max];
capacity_names_acute = [:beds_acute_min, :beds_acute_mean, :beds_acute_max];
capacity_names_allbeds = [:beds_allbeds_min, :beds_allbeds_mean, :beds_allbeds_max];

In [11]:
sort!(capacity_data, [:hospital, :hospital_id]);

In [12]:
first(capacity_data, 5)

Unnamed: 0_level_0,hospital,hospital_id,beds_icu_min,beds_acute_min,beds_allbeds_min,beds_icu_mean,beds_acute_mean,beds_allbeds_mean,beds_icu_max,beds_acute_max,beds_allbeds_max
Unnamed: 0_level_1,String,String,Int64,Int64,Int64,Int64,Int64,Int64,Int64,Int64,Int64
1,Anne Arundel Medical Center,h39740093,30,181,211,40,284,324,45,293,331
2,Atlantic General Hospital,h92616877,6,35,43,9,43,51,15,67,82
3,Baltimore Convention Center Field Hospital,h94720559,0,7,7,0,26,23,0,44,44
4,Baltimore Washington Medical Center,h75227743,19,144,180,30,206,236,45,250,282
5,Bowie Medical Center,h26261837,0,0,0,0,10,10,0,20,20


In [13]:
capacity_data_long = stack(capacity_data, Not([:hospital, :hospital_id]), variable_name=:capacity_name, value_name=:capacity_value);

In [14]:
capacity_data_long_icu = filter(row -> Symbol(row.capacity_name) in capacity_names_icu, capacity_data_long);
capacity_data_long_acute = filter(row -> Symbol(row.capacity_name) in capacity_names_acute, capacity_data_long);
capacity_data_long_allbeds = filter(row -> Symbol(row.capacity_name) in capacity_names_allbeds, capacity_data_long);

In [15]:
capacity_data_output = select(capacity_data, :hospital, :hospital_id, :beds_icu_mean => :capacity_icu, :beds_acute_mean => :capacity_acute, :beds_allbeds_mean => :capacity_allbeds);

In [16]:
# filter!(row -> row.capacity_icu + row.capacity_acute + row.capacity_allbeds > 0, capacity_data_output);

In [17]:
capacity_data_output |> CSV.write("../data/capacity.csv");