In [118]:
import pandas as pd
import numpy as np 
import altair as alt

alt.data_transformers.enable('json')

DataTransformerRegistry.enable('json')

In [119]:
def retrieve_appregate_data():
    county_infection_data = pd.read_csv('relevant_data/us-counties.csv')
    county_population_data = pd.read_csv('relevant_data/county-population.csv')
    
    county_infection_popu_data = county_infection_data.merge(
        county_population_data, left_on=['county', 'state'], right_on=['county', 'state']
    )
    
    return county_infection_popu_data

In [120]:
data = retrieve_appregate_data()

In [121]:
data

Unnamed: 0,date,county,state,fips,cases,deaths,population,land_area_km,density_km
0,2020-01-21,Snohomish,Washington,53061.0,1,0,822083,5413,151.872
1,2020-01-22,Snohomish,Washington,53061.0,1,0,822083,5413,151.872
2,2020-01-23,Snohomish,Washington,53061.0,1,0,822083,5413,151.872
3,2020-01-24,Snohomish,Washington,53061.0,1,0,822083,5413,151.872
4,2020-01-25,Snohomish,Washington,53061.0,1,0,822083,5413,151.872
...,...,...,...,...,...,...,...,...,...
108846,2020-05-03,Lawrence,Missouri,29109.0,1,0,38634,1588,24.329
108847,2020-05-03,Sullivan,Missouri,29211.0,1,0,6714,1686,3.982
108848,2020-05-03,Holt,Nebraska,31089.0,1,0,10449,6250,1.672
108849,2020-05-03,Yancey,North Carolina,37199.0,1,0,17605,811,21.708


In [122]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 108851 entries, 0 to 108850
Data columns (total 9 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   date          108851 non-null  object 
 1   county        108851 non-null  object 
 2   state         108851 non-null  object 
 3   fips          108851 non-null  float64
 4   cases         108851 non-null  int64  
 5   deaths        108851 non-null  int64  
 6   population    108851 non-null  int64  
 7   land_area_km  108851 non-null  int64  
 8   density_km    108851 non-null  float64
dtypes: float64(2), int64(4), object(3)
memory usage: 8.3+ MB


In [123]:
def count_days(series):
    time_series = pd.to_datetime(series)
    first_date = time_series.iloc[0]
    last_date = time_series.iloc[-1]
    
    return (last_date - first_date).days

def case_count_county_days(series, days = 28):
    if len(series) < days:
        return series.iloc[-1]
    else:
        return series.iloc[days - 1]

In [124]:
def group_data(data):
    grouped_data = data.groupby(['state', 'county']).agg(
        population=('population', lambda x: x.iloc[0]),
        density_km=('density_km', lambda x: x.iloc[0]),
        days_counted=('date', count_days),
        case_sum=('cases', lambda x: x.iloc[-1]),
        death_sum=('deaths', lambda x: x.iloc[-1]),
        case_count_28_days=('cases', case_count_county_days),
        death_count_28_days=('deaths', case_count_county_days)
    )
    
    grouped_data = grouped_data[grouped_data['days_counted'] >= 28]
    grouped_data['confirmed_infection(%_of_pop)'] = grouped_data['case_sum']/grouped_data['population']*100
    grouped_data['death_perc(%_of_case)'] = grouped_data['death_sum']/grouped_data['case_sum']*100
    grouped_data['avg_daily_case_increase(%_of_pop)'] = grouped_data['confirmed_infection(%_of_pop)']/grouped_data['days_counted']
    grouped_data = grouped_data[grouped_data['confirmed_infection(%_of_pop)'] != float("inf")]
    grouped_data['case_count_28_days(%_of_pop)'] = grouped_data['case_count_28_days']/grouped_data['population']*100
    grouped_data['death_count_28_days(%_of_case)'] = grouped_data['death_count_28_days']/grouped_data['case_count_28_days']*100
    
    return grouped_data.reset_index()

In [125]:
grouped_data = group_data(data)

In [126]:
grouped_data

Unnamed: 0,state,county,population,density_km,days_counted,case_sum,death_sum,case_count_28_days,death_count_28_days,confirmed_infection(%_of_pop),death_perc(%_of_case),avg_daily_case_increase(%_of_pop),case_count_28_days(%_of_pop),death_count_28_days(%_of_case)
0,Alabama,Autauga,54571,35.436,40,48,3,28,1,0.087959,6.250000,0.002199,0.051309,3.571429
1,Alabama,Baldwin,182265,44.261,50,187,4,59,1,0.102598,2.139037,0.002052,0.032370,1.694915
2,Alabama,Barbour,27457,11.979,30,45,1,39,1,0.163893,2.222222,0.005463,0.142040,2.564103
3,Alabama,Bibb,22915,14.215,34,43,0,38,0,0.187650,0.000000,0.005519,0.165830,0.000000
4,Alabama,Blount,57322,34.325,39,40,0,26,0,0.069781,0.000000,0.001789,0.045358,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2365,Wyoming,Sublette,10368,0.820,36,1,0,1,0,0.009645,0.000000,0.000268,0.009645,0.000000
2366,Wyoming,Sweetwater,45267,1.676,40,11,0,10,0,0.024300,0.000000,0.000608,0.022091,0.000000
2367,Wyoming,Teton,21675,2.088,46,67,1,57,0,0.309112,1.492537,0.006720,0.262976,0.000000
2368,Wyoming,Uinta,21025,3.899,31,6,0,6,0,0.028537,0.000000,0.000921,0.028537,0.000000


In [127]:
grouped_data.sample(10)

Unnamed: 0,state,county,population,density_km,days_counted,case_sum,death_sum,case_count_28_days,death_count_28_days,confirmed_infection(%_of_pop),death_perc(%_of_case),avg_daily_case_increase(%_of_pop),case_count_28_days(%_of_pop),death_count_28_days(%_of_case)
2353,Wyoming,Carbon,15666,0.766,43,5,0,4,0,0.031916,0.0,0.000742,0.025533,0.0
360,Georgia,Crisp,23606,33.248,40,168,3,125,2,0.711683,1.785714,0.017792,0.529526,1.6
1383,New Mexico,Sandoval,134259,13.972,50,416,19,177,3,0.309849,4.567308,0.006197,0.131835,1.694915
764,Kansas,Linn,9441,6.087,45,6,0,6,0,0.063553,0.0,0.001412,0.063553,0.0
1775,Pennsylvania,Potter,17457,6.235,44,4,0,4,0,0.022913,0.0,0.000521,0.022913,0.0
696,Iowa,Henry,20145,17.923,43,33,1,26,1,0.163812,3.030303,0.00381,0.129064,3.846154
88,Arkansas,Carroll,27446,16.584,32,6,0,5,0,0.021861,0.0,0.000683,0.018218,0.0
918,Louisiana,St. Bernard,35897,7.726,49,475,20,358,10,1.32323,4.210526,0.027005,0.997298,2.793296
209,Colorado,Delta,30952,10.397,40,50,1,25,1,0.16154,2.0,0.004039,0.08077,4.0
1678,Oklahoma,Oklahoma,718633,391.412,51,838,33,385,15,0.11661,3.937947,0.002286,0.053574,3.896104


In [128]:
county_health = pd.read_csv('relevant_data/us-county-health-rankings-2020.csv')

In [129]:
county_health = county_health.dropna(subset=['county'])

In [130]:
county_health

Unnamed: 0,fips,state,county,num_deaths,years_of_potential_life_lost_rate,95percent_ci_low,95percent_ci_high,quartile,ypll_rate_aian,ypll_rate_aian_95percent_ci_low,...,percent_hispanic,num_non_hispanic_white,percent_non_hispanic_white,num_not_proficient_in_english,percent_not_proficient_in_english,95percent_ci_low_39,95percent_ci_high_39,percent_female,num_rural,percent_rural
1,1001,Alabama,Autauga,791.0,8128.591190,7283.340731,8973.841649,1.0,,,...,2.965774,41316,74.308016,426,0.820225,0.347891,1.292558,51.448715,22921.0,42.002162
2,1003,Alabama,Baldwin,2967.0,7354.122530,6918.554269,7789.690790,1.0,,,...,4.646779,181201,83.111337,1068,0.543517,0.347271,0.739763,51.538377,77060.0,42.279099
3,1005,Alabama,Barbour,472.0,10253.573403,8782.217281,11724.929524,2.0,,,...,4.276355,11356,45.641252,398,1.631683,0.824903,2.438462,47.216752,18613.0,67.789635
4,1007,Alabama,Bibb,471.0,11977.539484,10344.064842,13611.014126,3.0,,,...,2.625000,16708,74.589286,57,0.268210,0.000000,0.807504,46.781250,15663.0,68.352607
5,1009,Alabama,Blount,1085.0,11335.071134,10288.871387,12381.270881,3.0,,,...,9.571231,50255,86.886238,934,1.724520,1.198129,2.250911,50.726141,51562.0,89.951502
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3188,56037,Wyoming,Sweetwater,532.0,7831.827655,6915.944384,8747.710925,4.0,,,...,16.083250,34145,79.312908,669,1.633221,0.993813,2.272629,48.544749,4782.0,10.916313
3189,56039,Wyoming,Teton,109.0,2730.603992,1991.644111,3469.563872,1.0,,,...,14.878038,18812,81.504268,945,4.314477,2.787340,5.841615,48.403449,9887.0,46.430920
3190,56041,Wyoming,Uinta,256.0,7330.683549,6011.148684,8650.218415,3.0,,,...,9.236908,17741,87.398394,133,0.696080,0.000000,1.429709,49.332479,9101.0,43.095937
3191,56043,Wyoming,Washakie,110.0,6586.049959,4500.924119,8671.175799,2.0,,,...,14.051997,6498,82.409639,25,0.324886,0.000000,1.376766,49.422955,3068.0,35.954529


In [131]:
county_health.columns[:100]

Index(['fips', 'state', 'county', 'num_deaths',
       'years_of_potential_life_lost_rate', '95percent_ci_low',
       '95percent_ci_high', 'quartile', 'ypll_rate_aian',
       'ypll_rate_aian_95percent_ci_low', 'ypll_rate_aian_95percent_ci_high',
       'ypll_rate_asian', 'ypll_rate_asian_95percent_ci_low',
       'ypll_rate_asian_95percent_ci_high', 'ypll_rate_black',
       'ypll_rate_black_95percent_ci_low', 'ypll_rate_black_95percent_ci_high',
       'ypll_rate_hispanic', 'ypll_rate_hispanic_95percent_ci_low',
       'ypll_rate_hispanic_95percent_ci_high', 'ypll_rate_white',
       'ypll_rate_white_95percent_ci_low', 'ypll_rate_white_95percent_ci_high',
       'percent_fair_or_poor_health', '95percent_ci_low_2',
       '95percent_ci_high_2', 'quartile_2',
       'average_number_of_physically_unhealthy_days', '95percent_ci_low_3',
       '95percent_ci_high_3', 'quartile_3',
       'average_number_of_mentally_unhealthy_days', '95percent_ci_low_4',
       '95percent_ci_high_4', 'quar

In [132]:
excluded_column_words = [
    'quartile',
    'ci_high',
    'ci_low',
    'unreliabe',
    'petitioned',
    'fips',
    'num',
    'unreliable',
    'denominator',
    'ratio',
    'population'
]


filtered_columns = county_health.columns[~county_health.columns.str.contains('|'.join(excluded_column_words))]

In [133]:
len(filtered_columns)

187

In [134]:
filtered_columns[:100]

Index(['state', 'county', 'years_of_potential_life_lost_rate',
       'ypll_rate_aian', 'ypll_rate_asian', 'ypll_rate_black',
       'ypll_rate_hispanic', 'ypll_rate_white', 'percent_fair_or_poor_health',
       'percent_low_birthweight', 'percent_lbw_aian', 'percent_lbw_asian',
       'percent_lbw_black', 'percent_lbw_hispanic', 'percent_lbw_white',
       'percent_smokers', 'percent_adults_with_obesity',
       'food_environment_index', 'percent_physically_inactive',
       'percent_with_access_to_exercise_opportunities',
       'percent_excessive_drinking',
       'percent_driving_deaths_with_alcohol_involvement', 'chlamydia_rate',
       'teen_birth_rate', 'teen_birth_rate_aian', 'teen_birth_rate_asian',
       'teen_birth_rate_black', 'teen_birth_rate_hispanic',
       'teen_birth_rate_white', 'percent_uninsured',
       'primary_care_physicians_rate', 'dentist_rate',
       'mental_health_provider_rate', 'preventable_hospitalization_rate',
       'preventable_hosp_rate_aian', 'pr

In [135]:
filtered_columns[100:]

Index(['child_mortality_rate_aian', 'child_mortality_rate_asian',
       'child_mortality_rate_black', 'child_mortality_rate_hispanic',
       'child_mortality_rate_white', 'infant_mortality_rate',
       'infant_mortality_rate_aian', 'infant_mortality_rate_asian',
       'infant_mortality_rate_black', 'infant_mortality_rate_hispanic',
       'infant_mortality_rate_white', 'percent_frequent_physical_distress',
       'percent_frequent_mental_distress', 'percent_adults_with_diabetes',
       'hiv_prevalence_rate', 'percent_food_insecure',
       'percent_limited_access_to_healthy_foods',
       'drug_overdose_mortality_rate', 'drug_overdose_mortality_rate_aian',
       'drug_overdose_mortality_rate_asian',
       'drug_overdose_mortality_rate_black',
       'drug_overdose_mortality_rate_hispanic',
       'drug_overdose_mortality_rate_white', 'motor_vehicle_mortality_rate',
       'mv_mortality_rate_aian', 'mv_mortality_rate_asian',
       'mv_mortality_rate_black', 'mv_mortality_rate_hi

In [136]:
filtered_county_health = county_health[filtered_columns]

In [137]:
filtered_county_health

Unnamed: 0,state,county,years_of_potential_life_lost_rate,ypll_rate_aian,ypll_rate_asian,ypll_rate_black,ypll_rate_hispanic,ypll_rate_white,percent_fair_or_poor_health,percent_low_birthweight,...,percent_65_and_over,percent_black,percent_american_indian_alaska_native,percent_asian,percent_native_hawaiian_other_pacific_islander,percent_hispanic,percent_non_hispanic_white,percent_not_proficient_in_english,percent_female,percent_rural
1,Alabama,Autauga,8128.591190,,,10201.389930,,7885.665369,20.882987,8.619529,...,15.562670,19.343177,0.480207,1.224798,0.111509,2.965774,74.308016,0.820225,51.448715,42.002162
2,Alabama,Baldwin,7354.122530,,,9890.505276,3570.475265,7435.761186,17.509134,8.345003,...,20.443350,8.783976,0.772399,1.150343,0.066966,4.646779,83.111337,0.543517,51.538377,42.279099
3,Alabama,Barbour,10253.573403,,,12422.166370,,8140.174177,29.591802,11.474559,...,19.420441,48.032635,0.659137,0.454162,0.184880,4.276355,45.641252,1.631683,47.216752,67.789635
4,Alabama,Bibb,11977.539484,,,13085.045008,,12241.384199,19.439724,10.308710,...,16.473214,21.120536,0.437500,0.236607,0.116071,2.625000,74.589286,0.268210,46.781250,68.352607
5,Alabama,Blount,11335.071134,,,,,,21.745293,7.604563,...,18.236515,1.462656,0.653527,0.319848,0.121024,9.571231,86.886238,1.724520,50.726141,89.951502
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3188,Wyoming,Sweetwater,7831.827655,,,,5475.015989,8246.109906,14.813082,9.705248,...,12.111217,1.145153,1.544680,1.010429,0.157952,16.083250,79.312908,1.633221,48.544749,10.916313
3189,Wyoming,Teton,2730.603992,,,,,,11.914358,7.736721,...,15.415277,0.632555,0.914172,1.412417,0.151640,14.878038,81.504268,4.314477,48.403449,46.430920
3190,Wyoming,Uinta,7330.683549,,,,,,15.537464,10.110206,...,14.079511,0.684763,1.418789,0.472930,0.147791,9.236908,87.398394,0.696080,49.332479,43.095937
3191,Wyoming,Washakie,6586.049959,,,,,,15.955971,7.028754,...,21.686747,0.469245,1.813570,0.824350,0.101458,14.051997,82.409639,0.324886,49.422955,35.954529


In [138]:
filtered_county_health.info(max_cols=200)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3142 entries, 1 to 3192
Data columns (total 187 columns):
 #   Column                                              Non-Null Count  Dtype  
---  ------                                              --------------  -----  
 0   state                                               3142 non-null   object 
 1   county                                              3142 non-null   object 
 2   years_of_potential_life_lost_rate                   2849 non-null   float64
 3   ypll_rate_aian                                      175 non-null    float64
 4   ypll_rate_asian                                     224 non-null    float64
 5   ypll_rate_black                                     1030 non-null   float64
 6   ypll_rate_hispanic                                  646 non-null    float64
 7   ypll_rate_white                                     1283 non-null   float64
 8   percent_fair_or_poor_health                         3142 non-null   float64
 

In [139]:
complete_data = grouped_data.merge(
    filtered_county_health, left_on=['county', 'state'], right_on=['county', 'state']
)

In [140]:
complete_data.info(max_cols=200)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2368 entries, 0 to 2367
Data columns (total 199 columns):
 #   Column                                              Non-Null Count  Dtype  
---  ------                                              --------------  -----  
 0   state                                               2368 non-null   object 
 1   county                                              2368 non-null   object 
 2   population                                          2368 non-null   int64  
 3   density_km                                          2368 non-null   float64
 4   days_counted                                        2368 non-null   int64  
 5   case_sum                                            2368 non-null   int64  
 6   death_sum                                           2368 non-null   int64  
 7   case_count_28_days                                  2368 non-null   int64  
 8   death_count_28_days                                 2368 non-null   int64  
 

In [141]:
complete_data.dropna(thresh=2250, axis=1).info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2368 entries, 0 to 2367
Data columns (total 82 columns):
 #   Column                                              Non-Null Count  Dtype  
---  ------                                              --------------  -----  
 0   state                                               2368 non-null   object 
 1   county                                              2368 non-null   object 
 2   population                                          2368 non-null   int64  
 3   density_km                                          2368 non-null   float64
 4   days_counted                                        2368 non-null   int64  
 5   case_sum                                            2368 non-null   int64  
 6   death_sum                                           2368 non-null   int64  
 7   case_count_28_days                                  2368 non-null   int64  
 8   death_count_28_days                                 2368 non-null   int64  
 9

In [142]:
final_data = complete_data.dropna(thresh=2250, axis=1)

In [143]:
final_data.shape

(2368, 82)

In [144]:
final_data.dropna()

Unnamed: 0,state,county,population,density_km,days_counted,case_sum,death_sum,case_count_28_days,death_count_28_days,confirmed_infection(%_of_pop),...,percent_65_and_over,percent_black,percent_american_indian_alaska_native,percent_asian,percent_native_hawaiian_other_pacific_islander,percent_hispanic,percent_non_hispanic_white,percent_not_proficient_in_english,percent_female,percent_rural
0,Alabama,Autauga,54571,35.436,40,48,3,28,1,0.087959,...,15.562670,19.343177,0.480207,1.224798,0.111509,2.965774,74.308016,0.820225,51.448715,42.002162
1,Alabama,Baldwin,182265,44.261,50,187,4,59,1,0.102598,...,20.443350,8.783976,0.772399,1.150343,0.066966,4.646779,83.111337,0.543517,51.538377,42.279099
2,Alabama,Barbour,27457,11.979,30,45,1,39,1,0.163893,...,19.420441,48.032635,0.659137,0.454162,0.184880,4.276355,45.641252,1.631683,47.216752,67.789635
3,Alabama,Bibb,22915,14.215,34,43,0,38,0,0.187650,...,16.473214,21.120536,0.437500,0.236607,0.116071,2.625000,74.589286,0.268210,46.781250,68.352607
4,Alabama,Blount,57322,34.325,39,40,0,26,0,0.069781,...,18.236515,1.462656,0.653527,0.319848,0.121024,9.571231,86.886238,1.724520,50.726141,89.951502
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2361,Wyoming,Park,28702,1.596,47,1,0,1,0,0.003484,...,23.206247,0.682035,0.944619,0.821852,0.057973,5.623380,90.915291,0.627835,50.194380,44.240383
2362,Wyoming,Sheridan,29596,4.529,53,12,0,12,0,0.040546,...,20.993616,0.797142,1.366057,0.823603,0.112460,4.342937,91.482817,0.348727,49.793272,35.478775
2364,Wyoming,Sweetwater,45267,1.676,40,11,0,10,0,0.024300,...,12.111217,1.145153,1.544680,1.010429,0.157952,16.083250,79.312908,1.633221,48.544749,10.916313
2366,Wyoming,Uinta,21025,3.899,31,6,0,6,0,0.028537,...,14.079511,0.684763,1.418789,0.472930,0.147791,9.236908,87.398394,0.696080,49.332479,43.095937


In [145]:
wrangled_data = final_data.dropna()

In [146]:
print(wrangled_data.columns)

Index(['state', 'county', 'population', 'density_km', 'days_counted',
       'case_sum', 'death_sum', 'case_count_28_days', 'death_count_28_days',
       'confirmed_infection(%_of_pop)', 'death_perc(%_of_case)',
       'avg_daily_case_increase(%_of_pop)', 'case_count_28_days(%_of_pop)',
       'death_count_28_days(%_of_case)', 'years_of_potential_life_lost_rate',
       'percent_fair_or_poor_health', 'percent_low_birthweight',
       'percent_smokers', 'percent_adults_with_obesity',
       'food_environment_index', 'percent_physically_inactive',
       'percent_with_access_to_exercise_opportunities',
       'percent_excessive_drinking',
       'percent_driving_deaths_with_alcohol_involvement', 'chlamydia_rate',
       'teen_birth_rate', 'percent_uninsured', 'primary_care_physicians_rate',
       'dentist_rate', 'mental_health_provider_rate',
       'preventable_hospitalization_rate', 'percent_with_annual_mammogram',
       'percent_vaccinated', 'high_school_graduation_rate',
       'pe

In [147]:
wrangled_data = wrangled_data.drop(['population', 'case_sum', 'death_sum'], axis=1)

In [148]:
wrangled_data

Unnamed: 0,state,county,density_km,days_counted,case_count_28_days,death_count_28_days,confirmed_infection(%_of_pop),death_perc(%_of_case),avg_daily_case_increase(%_of_pop),case_count_28_days(%_of_pop),...,percent_65_and_over,percent_black,percent_american_indian_alaska_native,percent_asian,percent_native_hawaiian_other_pacific_islander,percent_hispanic,percent_non_hispanic_white,percent_not_proficient_in_english,percent_female,percent_rural
0,Alabama,Autauga,35.436,40,28,1,0.087959,6.250000,0.002199,0.051309,...,15.562670,19.343177,0.480207,1.224798,0.111509,2.965774,74.308016,0.820225,51.448715,42.002162
1,Alabama,Baldwin,44.261,50,59,1,0.102598,2.139037,0.002052,0.032370,...,20.443350,8.783976,0.772399,1.150343,0.066966,4.646779,83.111337,0.543517,51.538377,42.279099
2,Alabama,Barbour,11.979,30,39,1,0.163893,2.222222,0.005463,0.142040,...,19.420441,48.032635,0.659137,0.454162,0.184880,4.276355,45.641252,1.631683,47.216752,67.789635
3,Alabama,Bibb,14.215,34,38,0,0.187650,0.000000,0.005519,0.165830,...,16.473214,21.120536,0.437500,0.236607,0.116071,2.625000,74.589286,0.268210,46.781250,68.352607
4,Alabama,Blount,34.325,39,26,0,0.069781,0.000000,0.001789,0.045358,...,18.236515,1.462656,0.653527,0.319848,0.121024,9.571231,86.886238,1.724520,50.726141,89.951502
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2361,Wyoming,Park,1.596,47,1,0,0.003484,0.000000,0.000074,0.003484,...,23.206247,0.682035,0.944619,0.821852,0.057973,5.623380,90.915291,0.627835,50.194380,44.240383
2362,Wyoming,Sheridan,4.529,53,12,0,0.040546,0.000000,0.000765,0.040546,...,20.993616,0.797142,1.366057,0.823603,0.112460,4.342937,91.482817,0.348727,49.793272,35.478775
2364,Wyoming,Sweetwater,1.676,40,10,0,0.024300,0.000000,0.000608,0.022091,...,12.111217,1.145153,1.544680,1.010429,0.157952,16.083250,79.312908,1.633221,48.544749,10.916313
2366,Wyoming,Uinta,3.899,31,6,0,0.028537,0.000000,0.000921,0.028537,...,14.079511,0.684763,1.418789,0.472930,0.147791,9.236908,87.398394,0.696080,49.332479,43.095937


In [149]:
wrangled_data.to_csv('./relevant_data/wrangled_data.csv', index=False)

Maybe more data is better.