In [1]:
using CSV
using Dates
using DataFrames

In [2]:
ENV["LINES"] = 60;

In [3]:
function num_nurses_employment(states::Array{String,1})
    @assert states == sort(states)
    @assert !("HI" in states)
    
    nurse_data = CSV.read("../../data/nurses/deaggregated_by_hospital_beds.csv", copycols=true)

    nurse_data = by(nurse_data, :state, :weighted_emp_distribution => sum)
    filter!(row -> row.state in states, nurse_data)
    sort!(nurse_data, :state)

    return Float32.(nurse_data.weighted_emp_distribution_sum) .* (1_713_120 / 2_982_280)
end;

In [4]:
function num_nurses_bybeds(states::Array{String,1}; nurse_hrs_per_week::Real=36)

    beds_data_alt = CSV.read("../../data/hospitals/Definitive_Healthcare__USA_Hospital_Beds.csv", copycols=true)
    filter!(row -> !(row.HOSPITAL_TYPE in ["Psychiatric Hospital", "Rehabilitation Hospital"]), beds_data_alt)
    filter!(row -> !(ismissing(row.NUM_STAFFED_BEDS) || ismissing(row.HQ_STATE) || ismissing(row.NUM_ICU_BEDS)), beds_data_alt)
    filter!(row -> row.NUM_STAFFED_BEDS > 0, beds_data_alt)
    filter!(row -> row.HQ_STATE in states, beds_data_alt)

    beds_by_state_alt = by(beds_data_alt, :HQ_STATE, [:NUM_STAFFED_BEDS => sum, :NUM_ICU_BEDS => sum])
    beds_by_state_alt.non_icu_beds_sum = beds_by_state_alt.NUM_STAFFED_BEDS_sum - beds_by_state_alt.NUM_ICU_BEDS_sum
    beds_by_state_alt.est_nurses = (0.5 * beds_by_state_alt.NUM_ICU_BEDS_sum) + (0.2 * beds_by_state_alt.non_icu_beds_sum)
    sort!(beds_by_state_alt, :HQ_STATE)

    return Float32.(beds_by_state_alt.est_nurses) * (24*7 / nurse_hrs_per_week)
end;

In [5]:
state_data = CSV.read("../../data/geography/state_names.csv", copycols=true)
sort!(state_data, :Abbreviation)
all_states = collect(state_data.Abbreviation)
states = filter(s -> s != "HI", all_states)
@show states;

states = ["AK", "AL", "AR", "AZ", "CA", "CO", "CT", "DC", "DE", "FL", "GA", "IA", "ID", "IL", "IN", "KS", "KY", "LA", "MA", "MD", "ME", "MI", "MN", "MO", "MS", "MT", "NC", "ND", "NE", "NH", "NJ", "NM", "NV", "NY", "OH", "OK", "OR", "PA", "RI", "SC", "SD", "TN", "TX", "UT", "VA", "VT", "WA", "WI", "WV", "WY"]


In [6]:
print("Total nurses by state, estimated two ways")
nurses_from_beds = num_nurses_bybeds(states)
nurses_from_employment = num_nurses_employment(states)
nurses = DataFrame(
    state=states,
    from_beds=nurses_from_beds,
    from_employment=nurses_from_employment,
    diff=nurses_from_employment-nurses_from_beds,
    diff_pct=(nurses_from_employment-nurses_from_beds)./nurses_from_beds
)

Total nurses by state, estimated two ways

Unnamed: 0_level_0,state,from_beds,from_employment,diff,diff_pct
Unnamed: 0_level_1,String,Float64,Float64,Float64,Float64
1,AK,1634.73,2854.93,1220.2,0.746421
2,AL,15163.4,24611.0,9447.61,0.623053
3,AR,9200.33,10690.2,1489.86,0.161936
4,AZ,14626.7,28525.8,13899.0,0.950249
5,CA,79151.3,101102.0,21950.6,0.277324
6,CO,10383.8,26244.1,15860.3,1.52741
7,CT,8221.27,12120.0,3898.7,0.474221
8,DC,2972.67,51.1245,-2921.54,-0.982802
9,DE,2257.27,3234.06,976.791,0.432732
10,FL,56092.4,101334.0,45241.6,0.806555


In [7]:
@show total_from_beds = sum(nurses.from_beds);
@show total_from_employment = sum(nurses.from_employment);
@show total_difference = total_from_employment - total_from_beds;
@show total_difference_pct = (total_from_employment - total_from_beds) / total_from_beds;

total_from_beds = sum(nurses.from_beds) = 783580.4588623047
total_from_employment = sum(nurses.from_employment) = 1.4025757844333868e6
total_difference = total_from_employment - total_from_beds = 618995.3255710821
total_difference_pct = (total_from_employment - total_from_beds) / total_from_beds = 0.789957583258027
