In [32]:
import pandas as pd
import numpy as np 
import altair as alt

alt.data_transformers.enable('json')

DataTransformerRegistry.enable('json')

In [33]:
def retrieve_appregate_data():
    county_infection_data = pd.read_csv('relevant_data/us-counties.csv')
    county_population_data = pd.read_csv('relevant_data/county-population.csv')
    
    county_infection_popu_data = county_infection_data.merge(
        county_population_data, left_on=['county', 'state'], right_on=['county', 'state']
    )
    
    return county_infection_popu_data

In [34]:
data = retrieve_appregate_data()

In [35]:
data[data['cases'] == 0]

Unnamed: 0,date,county,state,fips,cases,deaths,population,land_area_km,density_km


In [36]:
data

Unnamed: 0,date,county,state,fips,cases,deaths,population,land_area_km,density_km
0,2020-01-21,Snohomish,Washington,53061.0,1,0,822083,5413,151.872
1,2020-01-22,Snohomish,Washington,53061.0,1,0,822083,5413,151.872
2,2020-01-23,Snohomish,Washington,53061.0,1,0,822083,5413,151.872
3,2020-01-24,Snohomish,Washington,53061.0,1,0,822083,5413,151.872
4,2020-01-25,Snohomish,Washington,53061.0,1,0,822083,5413,151.872
...,...,...,...,...,...,...,...,...,...
89495,2020-04-26,Emmet,Iowa,19063.0,1,0,10302,1026,10.041
89496,2020-04-26,Jefferson,Nebraska,31095.0,1,0,7560,1484,5.094
89497,2020-04-26,Ransom,North Dakota,38073.0,1,0,5457,2235,2.442
89498,2020-04-26,Hand,South Dakota,46059.0,1,0,3431,3722,0.922


In [37]:
def count_days(series):
    time_series = pd.to_datetime(series)
    first_date = time_series.iloc[0]
    last_date = time_series.iloc[-1]
    
    return (last_date - first_date).days

In [43]:
def group_data(data):
    grouped_data = data.groupby(['state', 'county']).agg(
        population=('population', lambda x: x.iloc[0]),
        days_counted=('date', count_days),
        case_sum=('cases', lambda x: x.iloc[-1]),
        death_sum=('deaths', lambda x: x.iloc[-1])
    )
    
    grouped_data['confirmed_infection_perc'] = grouped_data['case_sum']/grouped_data['population']*100
    grouped_data['death_perc'] = grouped_data['death_sum']/grouped_data['case_sum']*100
    grouped_data['confirmed_infection_perc_avg_daily'] = grouped_data['confirmed_infection_perc']/grouped_data['days_counted']
    grouped_data = grouped_data[(grouped_data['days_counted'] >= 15) & (grouped_data['confirmed_infection_perc_avg_daily'] != float("inf"))]
    
    return grouped_data

In [44]:
grouped_data = group_data(data)

In [45]:
grouped_data

Unnamed: 0_level_0,Unnamed: 1_level_0,population,days_counted,case_sum,death_sum,confirmed_infection_perc,death_perc,confirmed_infection_perc_avg_daily
state,county,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Alabama,Autauga,54571,33,37,2,0.067802,5.405405,0.002055
Alabama,Baldwin,182265,43,161,3,0.088333,1.863354,0.002054
Alabama,Barbour,27457,23,33,0,0.120188,0.000000,0.005226
Alabama,Bibb,22915,27,38,0,0.165830,0.000000,0.006142
Alabama,Blount,57322,32,34,0,0.059314,0.000000,0.001854
...,...,...,...,...,...,...,...,...
Wyoming,Sublette,10368,29,1,0,0.009645,0.000000,0.000333
Wyoming,Sweetwater,45267,33,10,0,0.022091,0.000000,0.000669
Wyoming,Teton,21675,39,64,1,0.295271,1.562500,0.007571
Wyoming,Uinta,21025,24,6,0,0.028537,0.000000,0.001189


In [46]:
grouped_data.sample(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,population,days_counted,case_sum,death_sum,confirmed_infection_perc,death_perc,confirmed_infection_perc_avg_daily
state,county,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Wisconsin,Polk,44205,16,4,0,0.009049,0.0,0.000566
Pennsylvania,Huntingdon,45913,29,22,0,0.047917,0.0,0.001652
West Virginia,Wayne,42481,17,82,6,0.193027,7.317073,0.011355
Idaho,Minidoka,20069,27,7,0,0.03488,0.0,0.001292
Georgia,Brantley,18587,20,21,2,0.112982,9.52381,0.005649
Oregon,Marion,333950,49,436,15,0.130558,3.440367,0.002664
Missouri,Carter,6265,31,3,1,0.047885,33.333333,0.001545
South Dakota,Sanborn,2355,15,3,0,0.127389,0.0,0.008493
Missouri,Montgomery,12236,32,7,0,0.057208,0.0,0.001788
Pennsylvania,Columbia,67295,35,269,8,0.399733,2.973978,0.011421


In [47]:
county_health = pd.read_csv('relevant_data/us-county-health-rankings-2020.csv')

In [48]:
county_health = county_health.dropna(subset=['county'])

In [49]:
county_health

Unnamed: 0,fips,state,county,num_deaths,years_of_potential_life_lost_rate,95percent_ci_low,95percent_ci_high,quartile,ypll_rate_aian,ypll_rate_aian_95percent_ci_low,...,percent_hispanic,num_non_hispanic_white,percent_non_hispanic_white,num_not_proficient_in_english,percent_not_proficient_in_english,95percent_ci_low_39,95percent_ci_high_39,percent_female,num_rural,percent_rural
1,1001,Alabama,Autauga,791.0,8128.591190,7283.340731,8973.841649,1.0,,,...,2.965774,41316,74.308016,426,0.820225,0.347891,1.292558,51.448715,22921.0,42.002162
2,1003,Alabama,Baldwin,2967.0,7354.122530,6918.554269,7789.690790,1.0,,,...,4.646779,181201,83.111337,1068,0.543517,0.347271,0.739763,51.538377,77060.0,42.279099
3,1005,Alabama,Barbour,472.0,10253.573403,8782.217281,11724.929524,2.0,,,...,4.276355,11356,45.641252,398,1.631683,0.824903,2.438462,47.216752,18613.0,67.789635
4,1007,Alabama,Bibb,471.0,11977.539484,10344.064842,13611.014126,3.0,,,...,2.625000,16708,74.589286,57,0.268210,0.000000,0.807504,46.781250,15663.0,68.352607
5,1009,Alabama,Blount,1085.0,11335.071134,10288.871387,12381.270881,3.0,,,...,9.571231,50255,86.886238,934,1.724520,1.198129,2.250911,50.726141,51562.0,89.951502
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3188,56037,Wyoming,Sweetwater,532.0,7831.827655,6915.944384,8747.710925,4.0,,,...,16.083250,34145,79.312908,669,1.633221,0.993813,2.272629,48.544749,4782.0,10.916313
3189,56039,Wyoming,Teton,109.0,2730.603992,1991.644111,3469.563872,1.0,,,...,14.878038,18812,81.504268,945,4.314477,2.787340,5.841615,48.403449,9887.0,46.430920
3190,56041,Wyoming,Uinta,256.0,7330.683549,6011.148684,8650.218415,3.0,,,...,9.236908,17741,87.398394,133,0.696080,0.000000,1.429709,49.332479,9101.0,43.095937
3191,56043,Wyoming,Washakie,110.0,6586.049959,4500.924119,8671.175799,2.0,,,...,14.051997,6498,82.409639,25,0.324886,0.000000,1.376766,49.422955,3068.0,35.954529


In [50]:
county_health.columns[0:40]

Index(['fips', 'state', 'county', 'num_deaths',
       'years_of_potential_life_lost_rate', '95percent_ci_low',
       '95percent_ci_high', 'quartile', 'ypll_rate_aian',
       'ypll_rate_aian_95percent_ci_low', 'ypll_rate_aian_95percent_ci_high',
       'ypll_rate_asian', 'ypll_rate_asian_95percent_ci_low',
       'ypll_rate_asian_95percent_ci_high', 'ypll_rate_black',
       'ypll_rate_black_95percent_ci_low', 'ypll_rate_black_95percent_ci_high',
       'ypll_rate_hispanic', 'ypll_rate_hispanic_95percent_ci_low',
       'ypll_rate_hispanic_95percent_ci_high', 'ypll_rate_white',
       'ypll_rate_white_95percent_ci_low', 'ypll_rate_white_95percent_ci_high',
       'percent_fair_or_poor_health', '95percent_ci_low_2',
       '95percent_ci_high_2', 'quartile_2',
       'average_number_of_physically_unhealthy_days', '95percent_ci_low_3',
       '95percent_ci_high_3', 'quartile_3',
       'average_number_of_mentally_unhealthy_days', '95percent_ci_low_4',
       '95percent_ci_high_4', 'quar