# Merge Ventilator data and HGHI State-level data

This notebook reads in the ventilator dataset and merges it with the state-level HGHI data.

From https://www.cambridge.org/core/journals/disaster-medicine-and-public-health-preparedness/article/mechanical-ventilators-in-us-acute-care-hospitals/F1FDBACA53531F2A150D6AD8E96F144D





In [None]:
import pandas as pd
import geopandas as gpd

from covidcaremap.data import read_census_data_df, external_data_path, processed_data_path

In [None]:
vents_path = external_data_path('ventilators_by_state.csv')
vents_df = pd.read_csv(vents_path, encoding='utf-8')

# Rename columns to be explicit that this is older estimate data.
vent_renames = {
    'Estimated No. Full-Featured Mechanical Ventilators': (
        'Estimated No. Full-Featured Mechanical Ventilators (2010 study estimate)'        
    ),
    'Estimated No. Full-Featured Mechanical Ventilators per 100,000 Population': (
        'Estimated No. Full-Featured Mechanical Ventilators per 100,000 Population (2010 study estimate)'
    ),
    'Estimated No. Pediatrics-Capable Full-Feature Mechanical Ventilators': (
        'Estimated No. Pediatrics-Capable Full-Feature Mechanical Ventilators (2010 study estimate)'
    ),
    'Estimated No. Full-Feature Mechanical Ventilators, Pediatrics Capable per 100,000 Population <14 y': (
        'Estimated No. Full-Feature Mechanical Ventilators, Pediatrics Capable per 100,000 Population <14 y (2010 study estimate)'
    )
}

for column in vent_renames:
    assert column in vents_df

vents_df = vents_df.rename(columns=vent_renames)

In [None]:
vents_df

In [None]:
hghi_state_gdf = gpd.read_file(processed_data_path('hghi_state_data.geojson'), 
                               encoding='utf-8')

In [None]:
merged_df = hghi_state_gdf.set_index('State').join(vents_df.set_index('State Abbrv')).reset_index()
merged_df = merged_df.drop(columns=['Location'])


In [None]:
merged_gdf = gpd.GeoDataFrame(merged_df, crs=4326)

## Add per-capita numbers

In [None]:
census_df = read_census_data_df()
census2018_df = census_df[census_df['YEAR'] == 11]

In [None]:
state_pop_all = census2018_df[census2018_df['AGEGRP']==0].groupby(
    ['STNAME'])['TOT_POP'].sum().reset_index().rename({'STNAME': 'State Name'}, axis=1)

In [None]:
with_pop_gdf = merged_gdf.merge(state_pop_all, on='State Name')

In [None]:
columns_to_calc_per_capita = [
    'Total Hospital Beds',
    'Total ICU Beds',
    'Available Hospital Beds',
    'Potentially Available Hospital Beds*',  
    'Available ICU Beds', 
    'Potentially Available ICU Beds*',
]

for c in columns_to_calc_per_capita:
    with_pop_gdf['{} per 100,000 people (2018 population estimate)'.format(c)] = \
        (with_pop_gdf[c] / (with_pop_gdf['TOT_POP'] / 100000)).round(3)
    

In [None]:
with_pop_gdf.to_file(processed_data_path('hghi_state_data_with_vents.geojson'), 
                  encoding='utf-8', 
                  driver='GeoJSON')