In [282]:
import pandas as pd
import numpy as np 
import altair as alt

alt.data_transformers.enable('json')

DataTransformerRegistry.enable('json')

In [283]:
def retrieve_appregate_data():
    county_infection_data = pd.read_csv('relevant_data/us-counties.csv')
    county_population_data = pd.read_excel('relevant_data/county-population.xlsx', skiprows=3)
    
    county_population_data = county_population_data[1:].dropna()
    county_population_data['County'] = county_population_data['Unnamed: 0'].str.extract(r'\.(.*?)\ County')
    county_population_data['State'] = county_population_data['Unnamed: 0'].str.extract(r'\, (.*)')
    county_population_data = county_population_data[['County', 'State', 2019]]
    county_population_data.columns = ['County', 'State', '2019 Population']
    county_population_data['2019 Population'] = county_population_data['2019 Population'].astype('int')
    
    county_infection_popu_data = county_infection_data.merge(
        county_population_data, left_on=['county', 'state'], right_on=['County', 'State']
    ).drop(['County', 'State'], axis=1)
    
    return county_infection_popu_data

In [284]:
data = retrieve_appregate_data()

In [285]:
data[data['cases'] == 0]

Unnamed: 0,date,county,state,fips,cases,deaths,2019 Population


In [286]:
data

Unnamed: 0,date,county,state,fips,cases,deaths,2019 Population
0,2020-01-21,Snohomish,Washington,53061.0,1,0,822083
1,2020-01-22,Snohomish,Washington,53061.0,1,0,822083
2,2020-01-23,Snohomish,Washington,53061.0,1,0,822083
3,2020-01-24,Snohomish,Washington,53061.0,1,0,822083
4,2020-01-25,Snohomish,Washington,53061.0,1,0,822083
...,...,...,...,...,...,...,...
87461,2020-04-26,Emmet,Iowa,19063.0,1,0,9208
87462,2020-04-26,Jefferson,Nebraska,31095.0,1,0,7046
87463,2020-04-26,Ransom,North Dakota,38073.0,1,0,5218
87464,2020-04-26,Hand,South Dakota,46059.0,1,0,3191


In [287]:
def count_days(series):
    time_series = pd.to_datetime(series)
    first_date = time_series.iloc[0]
    last_date = time_series.iloc[-1]
    
    return (last_date - first_date).days

In [304]:
def group_data(data):
    grouped_data = data.groupby(['state', 'county']).agg(
        population_2019=('2019 Population', lambda x: x.iloc[0]),
        days_counted=('date', count_days),
        case_sum=('cases', lambda x: x.iloc[-1]),
        death_sum=('deaths', lambda x: x.iloc[-1])
    )
    
    grouped_data['confirmed_infection_perc'] = grouped_data['case_sum']/grouped_data['population_2019']*100
    grouped_data['death_perc'] = grouped_data['death_sum']/grouped_data['case_sum']*100
    grouped_data['confirmed_infection_perc_avg_daily'] = grouped_data['confirmed_infection_perc']/grouped_data['days_counted']
    grouped_data = grouped_data[(grouped_data['days_counted'] >= 15) & (grouped_data['confirmed_infection_perc_avg_daily'] != float("inf"))]
    
    return grouped_data

In [305]:
grouped_data = group_data(data)

In [306]:
grouped_data

Unnamed: 0_level_0,Unnamed: 1_level_0,population_2019,days_counted,case_sum,death_sum,confirmed_infection_perc,death_perc,confirmed_infection_perc_avg_daily
state,county,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Alabama,Autauga,55869,33,37,2,0.066226,5.405405,0.002007
Alabama,Baldwin,223234,43,161,3,0.072122,1.863354,0.001677
Alabama,Barbour,24686,23,33,0,0.133679,0.000000,0.005812
Alabama,Bibb,22394,27,38,0,0.169688,0.000000,0.006285
Alabama,Blount,57826,32,34,0,0.058797,0.000000,0.001837
...,...,...,...,...,...,...,...,...
Wyoming,Sublette,9831,29,1,0,0.010172,0.000000,0.000351
Wyoming,Sweetwater,42343,33,10,0,0.023617,0.000000,0.000716
Wyoming,Teton,23464,39,64,1,0.272758,1.562500,0.006994
Wyoming,Uinta,20226,24,6,0,0.029665,0.000000,0.001236


In [307]:
grouped_data.sample(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,population_2019,days_counted,case_sum,death_sum,confirmed_infection_perc,death_perc,confirmed_infection_perc_avg_daily
state,county,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Iowa,Guthrie,10689,27,6,0,0.056132,0.0,0.002079
Missouri,St. Clair,9397,17,2,0,0.021283,0.0,0.001252
South Dakota,Beadle,18453,47,21,2,0.113803,9.52381,0.002421
Indiana,Madison,129569,39,394,45,0.304085,11.42132,0.007797
Oklahoma,Stephens,43143,32,20,1,0.046357,5.0,0.001449
Kentucky,Knox,31145,17,5,0,0.016054,0.0,0.000944
New York,Orange,384940,45,8121,203,2.109679,2.499692,0.046882
South Dakota,Brown,38839,35,30,0,0.077242,0.0,0.002207
Iowa,Mahaska,22095,31,9,1,0.040733,11.111111,0.001314
Minnesota,Carver,105089,49,17,0,0.016177,0.0,0.00033


In [308]:
grouped_data.sort_values(by=['confirmed_infection_perc'], ascending=False).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,population_2019,days_counted,case_sum,death_sum,confirmed_infection_perc,death_perc,confirmed_infection_perc_avg_daily
state,county,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Arkansas,Lincoln,13024,41,727,0,5.582002,0.0,0.136146
Tennessee,Bledsoe,15064,29,588,0,3.903346,0.0,0.134598
New York,Rockland,325789,51,11256,349,3.454997,3.100569,0.067745
Ohio,Marion,65093,37,2178,4,3.345982,0.183655,0.090432
New York,Westchester,967506,53,27664,1054,2.85931,3.810006,0.053949
Ohio,Pickaway,58457,32,1658,5,2.836273,0.301568,0.088634
New York,Nassau,1356924,52,34522,1962,2.544137,5.683332,0.048926
Iowa,Louisa,11035,23,267,2,2.419574,0.749064,0.105199
Georgia,Randolph,6778,37,153,19,2.257303,12.418301,0.061008
New Jersey,Passaic,501826,49,11137,426,2.219295,3.825088,0.045292


In [309]:
grouped_data.sort_values(by=['confirmed_infection_perc'], ascending=False).tail(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,population_2019,days_counted,case_sum,death_sum,confirmed_infection_perc,death_perc,confirmed_infection_perc_avg_daily
state,county,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Missouri,Phelps,44573,20,2,0,0.004487,0.0,0.000224
Minnesota,Itasca,45130,27,2,0,0.004432,0.0,0.000164
Oklahoma,Carter,48111,32,2,0,0.004157,0.0,0.00013
Oregon,Crook,24404,23,1,0,0.004098,0.0,0.000178
South Dakota,Meade,28332,32,1,0,0.00353,0.0,0.00011
Wyoming,Park,29194,40,1,0,0.003425,0.0,8.6e-05
Missouri,Polk,32149,19,1,0,0.003111,0.0,0.000164
Wisconsin,Wood,72999,41,2,0,0.00274,0.0,6.7e-05
Pennsylvania,Warren,39191,32,1,1,0.002552,100.0,8e-05
California,Tehama,65084,23,1,0,0.001536,0.0,6.7e-05


In [310]:
grouped_data.sort_values(by=['death_perc'], ascending=False).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,population_2019,days_counted,case_sum,death_sum,confirmed_infection_perc,death_perc,confirmed_infection_perc_avg_daily
state,county,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Iowa,Appanoose,12426,31,2,2,0.016095,100.0,0.000519
North Dakota,McHenry,5745,31,1,1,0.017406,100.0,0.000561
New Mexico,Catron,3527,25,1,1,0.028353,100.0,0.001134
Pennsylvania,Warren,39191,32,1,1,0.002552,100.0,8e-05
North Dakota,Emmons,3241,21,1,1,0.030855,100.0,0.001469
Michigan,Dickinson,25239,30,3,2,0.011886,66.666667,0.000396
North Carolina,Macon,35858,24,2,1,0.005578,50.0,0.000232
Oklahoma,Major,7629,23,2,1,0.026216,50.0,0.00114
Wisconsin,Iron,5687,30,2,1,0.035168,50.0,0.001172
Colorado,Crowley,6061,38,2,1,0.032998,50.0,0.000868


In [311]:
grouped_data.sort_values(by=['death_perc'], ascending=False).tail(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,population_2019,days_counted,case_sum,death_sum,confirmed_infection_perc,death_perc,confirmed_infection_perc_avg_daily
state,county,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Minnesota,Traverse,3259,26,2,0,0.061369,0.0,0.00236
Minnesota,Swift,9266,19,1,0,0.010792,0.0,0.000568
Minnesota,Steele,36649,36,12,0,0.032743,0.0,0.00091
Minnesota,Stearns,161075,45,38,0,0.023591,0.0,0.000524
Minnesota,Sibley,14865,31,1,0,0.006727,0.0,0.000217
Minnesota,Sherburne,97238,34,16,0,0.016454,0.0,0.000484
Minnesota,Roseau,15165,19,1,0,0.006594,0.0,0.000347
Minnesota,Renville,14548,43,3,0,0.020621,0.0,0.00048
Minnesota,Redwood,15170,20,1,0,0.006592,0.0,0.00033
Wyoming,Washakie,7805,30,4,0,0.051249,0.0,0.001708


In [312]:
grouped_data.sort_values(by=['confirmed_infection_perc_avg_daily'], ascending=False).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,population_2019,days_counted,case_sum,death_sum,confirmed_infection_perc,death_perc,confirmed_infection_perc_avg_daily
state,county,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Arkansas,Lincoln,13024,41,727,0,5.582002,0.0,0.136146
Tennessee,Bledsoe,15064,29,588,0,3.903346,0.0,0.134598
Iowa,Louisa,11035,23,267,2,2.419574,0.749064,0.105199
Minnesota,Nobles,21629,16,352,1,1.627445,0.284091,0.101715
Ohio,Marion,65093,37,2178,4,3.345982,0.183655,0.090432
Ohio,Pickaway,58457,32,1658,5,2.836273,0.301568,0.088634
Kansas,Seward,21428,19,336,0,1.568042,0.0,0.082529
New York,Rockland,325789,51,11256,349,3.454997,3.100569,0.067745
Georgia,Randolph,6778,37,153,19,2.257303,12.418301,0.061008
Indiana,Cass,37689,26,586,1,1.55483,0.170648,0.059801


In [313]:
grouped_data.sort_values(by=['confirmed_infection_perc_avg_daily'], ascending=False).tail(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,population_2019,days_counted,case_sum,death_sum,confirmed_infection_perc,death_perc,confirmed_infection_perc_avg_daily
state,county,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Missouri,Polk,32149,19,1,0,0.003111,0.0,0.000164
Michigan,Chippewa,37349,34,2,0,0.005355,0.0,0.000157
Ohio,Athens,65327,31,3,1,0.004592,33.333333,0.000148
Oklahoma,Carter,48111,32,2,0,0.004157,0.0,0.00013
South Dakota,Meade,28332,32,1,0,0.00353,0.0,0.00011
Iowa,Carroll,20165,45,1,0,0.004959,0.0,0.00011
Wyoming,Park,29194,40,1,0,0.003425,0.0,8.6e-05
Pennsylvania,Warren,39191,32,1,1,0.002552,100.0,8e-05
Wisconsin,Wood,72999,41,2,0,0.00274,0.0,6.7e-05
California,Tehama,65084,23,1,0,0.001536,0.0,6.7e-05


In [328]:
county_health = pd.read_csv('relevant_data/us-county-health-rankings-2020.csv')

In [329]:
county_health = county_health.dropna(subset=['county'])

In [330]:
county_health

Unnamed: 0,fips,state,county,num_deaths,years_of_potential_life_lost_rate,95percent_ci_low,95percent_ci_high,quartile,ypll_rate_aian,ypll_rate_aian_95percent_ci_low,...,percent_hispanic,num_non_hispanic_white,percent_non_hispanic_white,num_not_proficient_in_english,percent_not_proficient_in_english,95percent_ci_low_39,95percent_ci_high_39,percent_female,num_rural,percent_rural
1,1001,Alabama,Autauga,791.0,8128.591190,7283.340731,8973.841649,1.0,,,...,2.965774,41316,74.308016,426,0.820225,0.347891,1.292558,51.448715,22921.0,42.002162
2,1003,Alabama,Baldwin,2967.0,7354.122530,6918.554269,7789.690790,1.0,,,...,4.646779,181201,83.111337,1068,0.543517,0.347271,0.739763,51.538377,77060.0,42.279099
3,1005,Alabama,Barbour,472.0,10253.573403,8782.217281,11724.929524,2.0,,,...,4.276355,11356,45.641252,398,1.631683,0.824903,2.438462,47.216752,18613.0,67.789635
4,1007,Alabama,Bibb,471.0,11977.539484,10344.064842,13611.014126,3.0,,,...,2.625000,16708,74.589286,57,0.268210,0.000000,0.807504,46.781250,15663.0,68.352607
5,1009,Alabama,Blount,1085.0,11335.071134,10288.871387,12381.270881,3.0,,,...,9.571231,50255,86.886238,934,1.724520,1.198129,2.250911,50.726141,51562.0,89.951502
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3188,56037,Wyoming,Sweetwater,532.0,7831.827655,6915.944384,8747.710925,4.0,,,...,16.083250,34145,79.312908,669,1.633221,0.993813,2.272629,48.544749,4782.0,10.916313
3189,56039,Wyoming,Teton,109.0,2730.603992,1991.644111,3469.563872,1.0,,,...,14.878038,18812,81.504268,945,4.314477,2.787340,5.841615,48.403449,9887.0,46.430920
3190,56041,Wyoming,Uinta,256.0,7330.683549,6011.148684,8650.218415,3.0,,,...,9.236908,17741,87.398394,133,0.696080,0.000000,1.429709,49.332479,9101.0,43.095937
3191,56043,Wyoming,Washakie,110.0,6586.049959,4500.924119,8671.175799,2.0,,,...,14.051997,6498,82.409639,25,0.324886,0.000000,1.376766,49.422955,3068.0,35.954529


In [331]:
county_health.columns[0:40]

Index(['fips', 'state', 'county', 'num_deaths',
       'years_of_potential_life_lost_rate', '95percent_ci_low',
       '95percent_ci_high', 'quartile', 'ypll_rate_aian',
       'ypll_rate_aian_95percent_ci_low', 'ypll_rate_aian_95percent_ci_high',
       'ypll_rate_asian', 'ypll_rate_asian_95percent_ci_low',
       'ypll_rate_asian_95percent_ci_high', 'ypll_rate_black',
       'ypll_rate_black_95percent_ci_low', 'ypll_rate_black_95percent_ci_high',
       'ypll_rate_hispanic', 'ypll_rate_hispanic_95percent_ci_low',
       'ypll_rate_hispanic_95percent_ci_high', 'ypll_rate_white',
       'ypll_rate_white_95percent_ci_low', 'ypll_rate_white_95percent_ci_high',
       'percent_fair_or_poor_health', '95percent_ci_low_2',
       '95percent_ci_high_2', 'quartile_2',
       'average_number_of_physically_unhealthy_days', '95percent_ci_low_3',
       '95percent_ci_high_3', 'quartile_3',
       'average_number_of_mentally_unhealthy_days', '95percent_ci_low_4',
       '95percent_ci_high_4', 'quar