In [67]:
import pandas as pd
import numpy as np 
import altair as alt

alt.data_transformers.enable('json')

DataTransformerRegistry.enable('json')

In [68]:
def retrieve_appregate_data():
    county_infection_data = pd.read_csv('relevant_data/us-counties.csv')
    county_population_data = pd.read_csv('relevant_data/county-population.csv')
    state_party_line_data = pd.read_csv('relevant_data/state_party_line.csv')
    
    county_infection_popu_data = county_infection_data.merge(
        county_population_data, left_on=['county', 'state'], right_on=['county', 'state']
    ).merge(
        state_party_line_data, left_on=['state'], right_on=['state']
    )
    
    return county_infection_popu_data

In [69]:
data = retrieve_appregate_data()

In [70]:
data

Unnamed: 0,date,county,state,fips,cases,deaths,population,land_area_km,density_km,state_house_blue_perc
0,2020-01-21,Snohomish,Washington,53061.0,1,0,822083,5413,151.872,59.18
1,2020-01-22,Snohomish,Washington,53061.0,1,0,822083,5413,151.872,59.18
2,2020-01-23,Snohomish,Washington,53061.0,1,0,822083,5413,151.872,59.18
3,2020-01-24,Snohomish,Washington,53061.0,1,0,822083,5413,151.872,59.18
4,2020-01-25,Snohomish,Washington,53061.0,1,0,822083,5413,151.872,59.18
...,...,...,...,...,...,...,...,...,...,...
120588,2020-05-04,Washington,Rhode Island,44009.0,363,25,126979,852,149.036,86.84
120589,2020-05-05,Washington,Rhode Island,44009.0,363,25,126979,852,149.036,86.84
120590,2020-05-06,Washington,Rhode Island,44009.0,363,25,126979,852,149.036,86.84
120591,2020-05-07,Washington,Rhode Island,44009.0,363,25,126979,852,149.036,86.84


In [71]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 120593 entries, 0 to 120592
Data columns (total 10 columns):
 #   Column                 Non-Null Count   Dtype  
---  ------                 --------------   -----  
 0   date                   120593 non-null  object 
 1   county                 120593 non-null  object 
 2   state                  120593 non-null  object 
 3   fips                   120593 non-null  float64
 4   cases                  120593 non-null  int64  
 5   deaths                 120593 non-null  int64  
 6   population             120593 non-null  int64  
 7   land_area_km           120593 non-null  int64  
 8   density_km             120593 non-null  float64
 9   state_house_blue_perc  120593 non-null  float64
dtypes: float64(3), int64(4), object(3)
memory usage: 10.1+ MB


In [72]:
def count_days(series):
    time_series = pd.to_datetime(series)
    first_date = time_series.iloc[0]
    last_date = time_series.iloc[-1]
    
    return (last_date - first_date).days

def case_count_county_days(series, days = 45):
    if len(series) < days:
        return series.iloc[-1]
    else:
        return series.iloc[days - 1]

In [73]:
def group_data(data):
    grouped_data = data.groupby(['state', 'county']).agg(
        population=('population', lambda x: x.iloc[0]),
        state_house_blue_perc=('state_house_blue_perc', lambda x: x.iloc[-1]),
        density_km=('density_km', lambda x: x.iloc[0]),
        days_counted=('date', count_days),
        case_sum=('cases', lambda x: x.iloc[-1]),
        death_sum=('deaths', lambda x: x.iloc[-1]),
        case_count_28_days=('cases', case_count_county_days),
        death_count_28_days=('deaths', case_count_county_days)
    )
    
    grouped_data = grouped_data[grouped_data['days_counted'] >= 45]
    grouped_data['confirmed_infection(%_of_pop)'] = grouped_data['case_sum']/grouped_data['population']*100
    grouped_data['death_perc(%_of_case)'] = grouped_data['death_sum']/grouped_data['case_sum']*100
    grouped_data['avg_daily_case_increase(%_of_pop)'] = grouped_data['confirmed_infection(%_of_pop)']/grouped_data['days_counted']
    grouped_data = grouped_data[grouped_data['confirmed_infection(%_of_pop)'] != float("inf")]
    grouped_data['case_count_28_days(%_of_pop)'] = grouped_data['case_count_28_days']/grouped_data['population']*100
    grouped_data['death_count_28_days(%_of_case)'] = grouped_data['death_count_28_days']/grouped_data['case_count_28_days']*100
    
    return grouped_data.reset_index()

In [74]:
grouped_data = group_data(data)

In [75]:
grouped_data

Unnamed: 0,state,county,population,state_house_blue_perc,density_km,days_counted,case_sum,death_sum,case_count_28_days,death_count_28_days,confirmed_infection(%_of_pop),death_perc(%_of_case),avg_daily_case_increase(%_of_pop),case_count_28_days(%_of_pop),death_count_28_days(%_of_case)
0,Alabama,Autauga,54571,22.86,35.436,45,67,4,61,3,0.122776,5.970149,0.002728,0.111781,4.918033
1,Alabama,Baldwin,182265,22.86,44.261,55,208,5,168,3,0.114120,2.403846,0.002075,0.092173,1.785714
2,Alabama,Calhoun,118572,22.86,75.572,51,123,3,93,3,0.103734,2.439024,0.002034,0.078433,3.225806
3,Alabama,Chambers,34215,22.86,22.146,50,311,21,294,21,0.908958,6.752412,0.018179,0.859272,7.142857
4,Alabama,Cullman,80406,22.86,42.252,49,62,0,58,0,0.077109,0.000000,0.001574,0.072134,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1284,Wyoming,Natrona,78621,10.00,5.684,49,48,0,48,0,0.061052,0.000000,0.001246,0.061052,0.000000
1285,Wyoming,Park,28702,10.00,1.596,52,1,0,1,0,0.003484,0.000000,0.000067,0.003484,0.000000
1286,Wyoming,Sheridan,29596,10.00,4.529,58,16,0,16,0,0.054061,0.000000,0.000932,0.054061,0.000000
1287,Wyoming,Sweetwater,45267,10.00,1.676,45,19,0,19,0,0.041973,0.000000,0.000933,0.041973,0.000000


In [76]:
grouped_data.sample(10)

Unnamed: 0,state,county,population,state_house_blue_perc,density_km,days_counted,case_sum,death_sum,case_count_28_days,death_count_28_days,confirmed_infection(%_of_pop),death_perc(%_of_case),avg_daily_case_increase(%_of_pop),case_count_28_days(%_of_pop),death_count_28_days(%_of_case)
1262,Wisconsin,Jefferson,83686,42.42,58.075,49,50,0,46,0,0.059747,0.0,0.001219,0.054967,0.0
585,Michigan,Montcalm,63342,42.11,33.927,57,46,1,29,1,0.072622,2.173913,0.001274,0.045783,3.448276
927,Oklahoma,Canadian,115541,18.75,49.567,53,107,3,97,3,0.092608,2.803738,0.001747,0.083953,3.092784
695,Missouri,Clinton,20743,29.41,19.118,46,14,0,13,0,0.067493,0.0,0.001467,0.062672,0.0
481,Louisiana,Calcasieu,192768,30.77,68.044,50,481,38,449,31,0.249523,7.900208,0.00499,0.232922,6.904232
1142,Texas,Oldham,2114,38.71,0.544,48,3,1,3,1,0.141911,33.333333,0.002956,0.141911,33.333333
781,New York,Cortland,49336,63.49,37.951,47,29,0,28,0,0.058781,0.0,0.001251,0.056754,0.0
904,Ohio,Knox,60921,27.27,44.631,46,20,1,20,1,0.032829,5.0,0.000714,0.032829,5.0
575,Michigan,Kalamazoo,250331,42.11,166.665,46,573,25,539,23,0.228897,4.363002,0.004976,0.215315,4.267161
355,Indiana,Hamilton,274569,20.0,269.185,54,913,82,735,60,0.332521,8.98138,0.006158,0.267692,8.163265


In [77]:
county_health = pd.read_csv('relevant_data/us-county-health-rankings-2020.csv')

In [78]:
county_health = county_health.dropna(subset=['county'])

In [79]:
county_health

Unnamed: 0,fips,state,county,num_deaths,years_of_potential_life_lost_rate,95percent_ci_low,95percent_ci_high,quartile,ypll_rate_aian,ypll_rate_aian_95percent_ci_low,...,percent_hispanic,num_non_hispanic_white,percent_non_hispanic_white,num_not_proficient_in_english,percent_not_proficient_in_english,95percent_ci_low_39,95percent_ci_high_39,percent_female,num_rural,percent_rural
1,1001,Alabama,Autauga,791.0,8128.591190,7283.340731,8973.841649,1.0,,,...,2.965774,41316,74.308016,426,0.820225,0.347891,1.292558,51.448715,22921.0,42.002162
2,1003,Alabama,Baldwin,2967.0,7354.122530,6918.554269,7789.690790,1.0,,,...,4.646779,181201,83.111337,1068,0.543517,0.347271,0.739763,51.538377,77060.0,42.279099
3,1005,Alabama,Barbour,472.0,10253.573403,8782.217281,11724.929524,2.0,,,...,4.276355,11356,45.641252,398,1.631683,0.824903,2.438462,47.216752,18613.0,67.789635
4,1007,Alabama,Bibb,471.0,11977.539484,10344.064842,13611.014126,3.0,,,...,2.625000,16708,74.589286,57,0.268210,0.000000,0.807504,46.781250,15663.0,68.352607
5,1009,Alabama,Blount,1085.0,11335.071134,10288.871387,12381.270881,3.0,,,...,9.571231,50255,86.886238,934,1.724520,1.198129,2.250911,50.726141,51562.0,89.951502
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3188,56037,Wyoming,Sweetwater,532.0,7831.827655,6915.944384,8747.710925,4.0,,,...,16.083250,34145,79.312908,669,1.633221,0.993813,2.272629,48.544749,4782.0,10.916313
3189,56039,Wyoming,Teton,109.0,2730.603992,1991.644111,3469.563872,1.0,,,...,14.878038,18812,81.504268,945,4.314477,2.787340,5.841615,48.403449,9887.0,46.430920
3190,56041,Wyoming,Uinta,256.0,7330.683549,6011.148684,8650.218415,3.0,,,...,9.236908,17741,87.398394,133,0.696080,0.000000,1.429709,49.332479,9101.0,43.095937
3191,56043,Wyoming,Washakie,110.0,6586.049959,4500.924119,8671.175799,2.0,,,...,14.051997,6498,82.409639,25,0.324886,0.000000,1.376766,49.422955,3068.0,35.954529


In [80]:
county_health.columns[:100]

Index(['fips', 'state', 'county', 'num_deaths',
       'years_of_potential_life_lost_rate', '95percent_ci_low',
       '95percent_ci_high', 'quartile', 'ypll_rate_aian',
       'ypll_rate_aian_95percent_ci_low', 'ypll_rate_aian_95percent_ci_high',
       'ypll_rate_asian', 'ypll_rate_asian_95percent_ci_low',
       'ypll_rate_asian_95percent_ci_high', 'ypll_rate_black',
       'ypll_rate_black_95percent_ci_low', 'ypll_rate_black_95percent_ci_high',
       'ypll_rate_hispanic', 'ypll_rate_hispanic_95percent_ci_low',
       'ypll_rate_hispanic_95percent_ci_high', 'ypll_rate_white',
       'ypll_rate_white_95percent_ci_low', 'ypll_rate_white_95percent_ci_high',
       'percent_fair_or_poor_health', '95percent_ci_low_2',
       '95percent_ci_high_2', 'quartile_2',
       'average_number_of_physically_unhealthy_days', '95percent_ci_low_3',
       '95percent_ci_high_3', 'quartile_3',
       'average_number_of_mentally_unhealthy_days', '95percent_ci_low_4',
       '95percent_ci_high_4', 'quar

In [81]:
excluded_column_words = [
    'quartile',
    'ci_high',
    'ci_low',
    'unreliabe',
    'petitioned',
    'fips',
    'num',
    'unreliable',
    'denominator',
    'ratio',
    'population',
    'hour_blue_perc'
]


filtered_columns = county_health.columns[~county_health.columns.str.contains('|'.join(excluded_column_words))]

In [82]:
len(filtered_columns)

187

In [83]:
filtered_columns[:100]

Index(['state', 'county', 'years_of_potential_life_lost_rate',
       'ypll_rate_aian', 'ypll_rate_asian', 'ypll_rate_black',
       'ypll_rate_hispanic', 'ypll_rate_white', 'percent_fair_or_poor_health',
       'percent_low_birthweight', 'percent_lbw_aian', 'percent_lbw_asian',
       'percent_lbw_black', 'percent_lbw_hispanic', 'percent_lbw_white',
       'percent_smokers', 'percent_adults_with_obesity',
       'food_environment_index', 'percent_physically_inactive',
       'percent_with_access_to_exercise_opportunities',
       'percent_excessive_drinking',
       'percent_driving_deaths_with_alcohol_involvement', 'chlamydia_rate',
       'teen_birth_rate', 'teen_birth_rate_aian', 'teen_birth_rate_asian',
       'teen_birth_rate_black', 'teen_birth_rate_hispanic',
       'teen_birth_rate_white', 'percent_uninsured',
       'primary_care_physicians_rate', 'dentist_rate',
       'mental_health_provider_rate', 'preventable_hospitalization_rate',
       'preventable_hosp_rate_aian', 'pr

In [84]:
filtered_columns[100:]

Index(['child_mortality_rate_aian', 'child_mortality_rate_asian',
       'child_mortality_rate_black', 'child_mortality_rate_hispanic',
       'child_mortality_rate_white', 'infant_mortality_rate',
       'infant_mortality_rate_aian', 'infant_mortality_rate_asian',
       'infant_mortality_rate_black', 'infant_mortality_rate_hispanic',
       'infant_mortality_rate_white', 'percent_frequent_physical_distress',
       'percent_frequent_mental_distress', 'percent_adults_with_diabetes',
       'hiv_prevalence_rate', 'percent_food_insecure',
       'percent_limited_access_to_healthy_foods',
       'drug_overdose_mortality_rate', 'drug_overdose_mortality_rate_aian',
       'drug_overdose_mortality_rate_asian',
       'drug_overdose_mortality_rate_black',
       'drug_overdose_mortality_rate_hispanic',
       'drug_overdose_mortality_rate_white', 'motor_vehicle_mortality_rate',
       'mv_mortality_rate_aian', 'mv_mortality_rate_asian',
       'mv_mortality_rate_black', 'mv_mortality_rate_hi

In [85]:
filtered_county_health = county_health[filtered_columns]

In [86]:
filtered_county_health

Unnamed: 0,state,county,years_of_potential_life_lost_rate,ypll_rate_aian,ypll_rate_asian,ypll_rate_black,ypll_rate_hispanic,ypll_rate_white,percent_fair_or_poor_health,percent_low_birthweight,...,percent_65_and_over,percent_black,percent_american_indian_alaska_native,percent_asian,percent_native_hawaiian_other_pacific_islander,percent_hispanic,percent_non_hispanic_white,percent_not_proficient_in_english,percent_female,percent_rural
1,Alabama,Autauga,8128.591190,,,10201.389930,,7885.665369,20.882987,8.619529,...,15.562670,19.343177,0.480207,1.224798,0.111509,2.965774,74.308016,0.820225,51.448715,42.002162
2,Alabama,Baldwin,7354.122530,,,9890.505276,3570.475265,7435.761186,17.509134,8.345003,...,20.443350,8.783976,0.772399,1.150343,0.066966,4.646779,83.111337,0.543517,51.538377,42.279099
3,Alabama,Barbour,10253.573403,,,12422.166370,,8140.174177,29.591802,11.474559,...,19.420441,48.032635,0.659137,0.454162,0.184880,4.276355,45.641252,1.631683,47.216752,67.789635
4,Alabama,Bibb,11977.539484,,,13085.045008,,12241.384199,19.439724,10.308710,...,16.473214,21.120536,0.437500,0.236607,0.116071,2.625000,74.589286,0.268210,46.781250,68.352607
5,Alabama,Blount,11335.071134,,,,,,21.745293,7.604563,...,18.236515,1.462656,0.653527,0.319848,0.121024,9.571231,86.886238,1.724520,50.726141,89.951502
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3188,Wyoming,Sweetwater,7831.827655,,,,5475.015989,8246.109906,14.813082,9.705248,...,12.111217,1.145153,1.544680,1.010429,0.157952,16.083250,79.312908,1.633221,48.544749,10.916313
3189,Wyoming,Teton,2730.603992,,,,,,11.914358,7.736721,...,15.415277,0.632555,0.914172,1.412417,0.151640,14.878038,81.504268,4.314477,48.403449,46.430920
3190,Wyoming,Uinta,7330.683549,,,,,,15.537464,10.110206,...,14.079511,0.684763,1.418789,0.472930,0.147791,9.236908,87.398394,0.696080,49.332479,43.095937
3191,Wyoming,Washakie,6586.049959,,,,,,15.955971,7.028754,...,21.686747,0.469245,1.813570,0.824350,0.101458,14.051997,82.409639,0.324886,49.422955,35.954529


In [87]:
filtered_county_health.info(max_cols=200)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3142 entries, 1 to 3192
Data columns (total 187 columns):
 #   Column                                              Non-Null Count  Dtype  
---  ------                                              --------------  -----  
 0   state                                               3142 non-null   object 
 1   county                                              3142 non-null   object 
 2   years_of_potential_life_lost_rate                   2849 non-null   float64
 3   ypll_rate_aian                                      175 non-null    float64
 4   ypll_rate_asian                                     224 non-null    float64
 5   ypll_rate_black                                     1030 non-null   float64
 6   ypll_rate_hispanic                                  646 non-null    float64
 7   ypll_rate_white                                     1283 non-null   float64
 8   percent_fair_or_poor_health                         3142 non-null   float64
 

In [88]:
complete_data = grouped_data.merge(
    filtered_county_health, left_on=['county', 'state'], right_on=['county', 'state']
)

In [89]:
complete_data.info(max_cols=200)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1288 entries, 0 to 1287
Data columns (total 200 columns):
 #   Column                                              Non-Null Count  Dtype  
---  ------                                              --------------  -----  
 0   state                                               1288 non-null   object 
 1   county                                              1288 non-null   object 
 2   population                                          1288 non-null   int64  
 3   state_house_blue_perc                               1288 non-null   float64
 4   density_km                                          1288 non-null   float64
 5   days_counted                                        1288 non-null   int64  
 6   case_sum                                            1288 non-null   int64  
 7   death_sum                                           1288 non-null   int64  
 8   case_count_28_days                                  1288 non-null   int64  
 

In [90]:
complete_data.dropna(thresh=1260, axis=1).info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1288 entries, 0 to 1287
Data columns (total 80 columns):
 #   Column                                              Non-Null Count  Dtype  
---  ------                                              --------------  -----  
 0   state                                               1288 non-null   object 
 1   county                                              1288 non-null   object 
 2   population                                          1288 non-null   int64  
 3   state_house_blue_perc                               1288 non-null   float64
 4   density_km                                          1288 non-null   float64
 5   days_counted                                        1288 non-null   int64  
 6   case_sum                                            1288 non-null   int64  
 7   death_sum                                           1288 non-null   int64  
 8   case_count_28_days                                  1288 non-null   int64  
 9

In [95]:
final_data = complete_data.dropna(thresh=1260, axis=1)

In [96]:
final_data.shape

(1288, 80)

In [97]:
final_data.dropna()

Unnamed: 0,state,county,population,state_house_blue_perc,density_km,days_counted,case_sum,death_sum,case_count_28_days,death_count_28_days,...,percent_65_and_over,percent_black,percent_american_indian_alaska_native,percent_asian,percent_native_hawaiian_other_pacific_islander,percent_hispanic,percent_non_hispanic_white,percent_not_proficient_in_english,percent_female,percent_rural
0,Alabama,Autauga,54571,22.86,35.436,45,67,4,61,3,...,15.562670,19.343177,0.480207,1.224798,0.111509,2.965774,74.308016,0.820225,51.448715,42.002162
1,Alabama,Baldwin,182265,22.86,44.261,55,208,5,168,3,...,20.443350,8.783976,0.772399,1.150343,0.066966,4.646779,83.111337,0.543517,51.538377,42.279099
2,Alabama,Calhoun,118572,22.86,75.572,51,123,3,93,3,...,17.717476,20.850215,0.539916,0.964324,0.110258,3.910673,72.024992,0.991376,51.946586,33.696826
3,Alabama,Chambers,34215,22.86,22.146,50,311,21,294,21,...,19.521047,39.565670,0.306411,1.326789,0.044623,2.561357,55.231296,0.112938,52.125539,49.148034
4,Alabama,Cullman,80406,22.86,42.252,49,62,0,58,0,...,18.511062,1.205628,0.645958,0.672323,0.086287,4.433019,91.855421,0.578012,50.570456,73.239559
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1283,Wyoming,Natrona,78621,10.00,5.684,49,48,0,48,0,...,15.395311,1.161600,1.466220,0.827909,0.098591,8.581179,86.599254,0.731123,49.617645,14.449304
1284,Wyoming,Park,28702,10.00,1.596,52,1,0,1,0,...,23.206247,0.682035,0.944619,0.821852,0.057973,5.623380,90.915291,0.627835,50.194380,44.240383
1285,Wyoming,Sheridan,29596,10.00,4.529,58,16,0,16,0,...,20.993616,0.797142,1.366057,0.823603,0.112460,4.342937,91.482817,0.348727,49.793272,35.478775
1286,Wyoming,Sweetwater,45267,10.00,1.676,45,19,0,19,0,...,12.111217,1.145153,1.544680,1.010429,0.157952,16.083250,79.312908,1.633221,48.544749,10.916313


In [98]:
wrangled_data = final_data.dropna()

In [99]:
print(wrangled_data.columns)

Index(['state', 'county', 'population', 'state_house_blue_perc', 'density_km',
       'days_counted', 'case_sum', 'death_sum', 'case_count_28_days',
       'death_count_28_days', 'confirmed_infection(%_of_pop)',
       'death_perc(%_of_case)', 'avg_daily_case_increase(%_of_pop)',
       'case_count_28_days(%_of_pop)', 'death_count_28_days(%_of_case)',
       'years_of_potential_life_lost_rate', 'percent_fair_or_poor_health',
       'percent_low_birthweight', 'percent_smokers',
       'percent_adults_with_obesity', 'food_environment_index',
       'percent_physically_inactive',
       'percent_with_access_to_exercise_opportunities',
       'percent_excessive_drinking',
       'percent_driving_deaths_with_alcohol_involvement', 'chlamydia_rate',
       'teen_birth_rate', 'percent_uninsured', 'primary_care_physicians_rate',
       'dentist_rate', 'mental_health_provider_rate',
       'preventable_hospitalization_rate', 'percent_with_annual_mammogram',
       'percent_vaccinated', 'high_sch

In [100]:
wrangled_data = wrangled_data.drop(['population', 'case_sum', 'death_sum'], axis=1)

In [101]:
wrangled_data

Unnamed: 0,state,county,state_house_blue_perc,density_km,days_counted,case_count_28_days,death_count_28_days,confirmed_infection(%_of_pop),death_perc(%_of_case),avg_daily_case_increase(%_of_pop),...,percent_65_and_over,percent_black,percent_american_indian_alaska_native,percent_asian,percent_native_hawaiian_other_pacific_islander,percent_hispanic,percent_non_hispanic_white,percent_not_proficient_in_english,percent_female,percent_rural
0,Alabama,Autauga,22.86,35.436,45,61,3,0.122776,5.970149,0.002728,...,15.562670,19.343177,0.480207,1.224798,0.111509,2.965774,74.308016,0.820225,51.448715,42.002162
1,Alabama,Baldwin,22.86,44.261,55,168,3,0.114120,2.403846,0.002075,...,20.443350,8.783976,0.772399,1.150343,0.066966,4.646779,83.111337,0.543517,51.538377,42.279099
2,Alabama,Calhoun,22.86,75.572,51,93,3,0.103734,2.439024,0.002034,...,17.717476,20.850215,0.539916,0.964324,0.110258,3.910673,72.024992,0.991376,51.946586,33.696826
3,Alabama,Chambers,22.86,22.146,50,294,21,0.908958,6.752412,0.018179,...,19.521047,39.565670,0.306411,1.326789,0.044623,2.561357,55.231296,0.112938,52.125539,49.148034
4,Alabama,Cullman,22.86,42.252,49,58,0,0.077109,0.000000,0.001574,...,18.511062,1.205628,0.645958,0.672323,0.086287,4.433019,91.855421,0.578012,50.570456,73.239559
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1283,Wyoming,Natrona,10.00,5.684,49,48,0,0.061052,0.000000,0.001246,...,15.395311,1.161600,1.466220,0.827909,0.098591,8.581179,86.599254,0.731123,49.617645,14.449304
1284,Wyoming,Park,10.00,1.596,52,1,0,0.003484,0.000000,0.000067,...,23.206247,0.682035,0.944619,0.821852,0.057973,5.623380,90.915291,0.627835,50.194380,44.240383
1285,Wyoming,Sheridan,10.00,4.529,58,16,0,0.054061,0.000000,0.000932,...,20.993616,0.797142,1.366057,0.823603,0.112460,4.342937,91.482817,0.348727,49.793272,35.478775
1286,Wyoming,Sweetwater,10.00,1.676,45,19,0,0.041973,0.000000,0.000933,...,12.111217,1.145153,1.544680,1.010429,0.157952,16.083250,79.312908,1.633221,48.544749,10.916313


In [102]:
wrangled_data.to_csv('./relevant_data/wrangled_data.csv', index=False)

Maybe more data is better.