# Generate CareModel Regional Data

This rolls up CareModel data from the facility data based on three regions: county, state, and Hospital Referral Region (HRR).

Most of the work is done in the `covidcaremap.geo` package, in the `sum_per_region` method. See that code for specifics.

## Methods

- Take the facility data, spatially join to regional data, and sum count properties for each region.
- Based on population counts, create "per 1000" versions of each column for each of total population, adult population, and elderly population.
- Save the three aggregated files as GeoJSON.

### Notes on aggregation of occupancy rates

Occupancy rates are a weighted average based on the the number of beds (or icu beds for ICU Occupancy) contributing to the total amount of beds for that aggregation.

In [None]:
import geopandas as gpd

from covidcaremap.constants import *
from covidcaremap.data import (read_facility_gdf, 
                               read_us_hrr_gdf,
                               read_us_states_gdf,
                               read_us_counties_gdf,
                               processed_data_path)
from covidcaremap.geo import sum_per_region

In [None]:
facility_gdf = gpd.read_file(processed_data_path('CareModel_data-facility-CovidCareMap.geojson'),
                         encoding='utf-8')

In [None]:
# By HRR
hrr_fname = 'CareModel_data-hrr-CovidCareMap'
hrr_geojson_path = processed_data_path('{}.geojson'.format(hrr_fname))
hrr_csv_path = processed_data_path('{}.csv'.format(hrr_fname))

hrr_gdf = read_us_hrr_gdf()
hrr_gdf = hrr_gdf.drop(columns=['HRR_BDRY_I', 'HRRNUM'])

hosp_hrr_gdf = sum_per_region(facility_gdf,
                          hrr_gdf,
                          groupby_columns=['HRRCITY'],
                          region_id_column='HRRCITY',
                          facility_count_columns=CCM_FACILITY_COUNT_COLUMNS + CAREMODEL_CAPACITY_COLUMNS())

hosp_hrr_gdf.to_file(hrr_geojson_path, driver='GeoJSON')

hosp_hrr_df = hosp_hrr_gdf.drop(columns=['geometry']).sort_values(by='HRRCITY')
hosp_hrr_df.to_csv(hrr_csv_path, index=False)

In [None]:
# By State
# TODO: Get Puerto Rico population numbers.

state_fname = 'CareModel_data-state-CovidCareMap'
state_geojson_path = processed_data_path('{}.geojson'.format(state_fname))
state_csv_path = processed_data_path('{}.csv'.format(state_fname))

state_gdf = read_us_states_gdf()
filtered_state_gdf = state_gdf[['State', 
                                'geometry',
                                'Population',
                                'Population (20+)',
                                'Population (65+)']]

facility_without_state_gdf = facility_gdf.drop(columns=['State'])

hosp_state_gdf = sum_per_region(facility_without_state_gdf,
                          filtered_state_gdf,
                          groupby_columns=['State'],
                          region_id_column='State',
                          facility_count_columns=CCM_FACILITY_COUNT_COLUMNS + CAREMODEL_CAPACITY_COLUMNS())

state_gdf = state_gdf.rename(columns={'NAME': 'State Name'})
hosp_state_gdf = gpd.GeoDataFrame(
    state_gdf[['State', 'State Name']].merge(hosp_state_gdf, on='State'),
    crs='EPSG:4326'
)

hosp_state_gdf.to_file(state_geojson_path, driver='GeoJSON')

state_hrr_df = hosp_state_gdf.drop(columns=['geometry']).sort_values(by='State')
state_hrr_df.to_csv(state_csv_path, index=False)

In [None]:
# By County
county_fname = CareModel_data-county-CovidCareMap'
county_geojson_path = processed_data_path('{}.geojson'.format(county_fname))
county_csv_path = processed_data_path('{}.csv'.format(county_fname))

county_gdf = read_us_counties_gdf()
filtered_county_gdf = county_gdf[['GEO_ID', 
                                  'geometry',
                                  'Population',
                                  'Population (20+)',
                                  'Population (65+)']]

hosp_county_gdf = sum_per_region(facility_gdf,
                          filtered_county_gdf,
                          groupby_columns=['GEO_ID'],
                          region_id_column='GEO_ID',
                          facility_count_columns=CCM_FACILITY_COUNT_COLUMNS + CAREMODEL_CAPACITY_COLUMNS())

merged_county_gdf = county_gdf[['GEO_ID', 'State', 'County Name']] \
    .merge(hosp_county_gdf, on='GEO_ID') \
    .drop(columns=['GEO_ID'])

hosp_county_gdf = gpd.GeoDataFrame(merged_county_gdf, crs='EPSG:4326')

hosp_county_gdf.to_file(county_geojson_path, driver='GeoJSON')

hosp_county_df = hosp_county_gdf.drop(columns=['geometry']).sort_values(by=['State',
                                                                            'County Name'])
hosp_county_df.to_csv(county_csv_path, index=False)