# Jimmy Wrangler Data Explorer
a project for EECS 731 by Benjamin Wyss

Tackling the health industry to explore the relation between covid-19 cases per unit population and reported face mask usage by US county

###### Python imports

In [134]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.close('all')

### Reading Data Sets From CSV Files
###### Population data by US county:

taken from https://usafacts.org/visualizations/coronavirus-covid-19-spread-map/ on 9/3/20

In [118]:
populations = pd.read_csv('../data/raw/us_county_populations.csv')


In [119]:
populations

Unnamed: 0,countyFIPS,County Name,State,population
0,0,Statewide Unallocated,AL,0
1,1001,Autauga County,AL,55869
2,1003,Baldwin County,AL,223234
3,1005,Barbour County,AL,24686
4,1007,Bibb County,AL,22394
...,...,...,...,...
3190,56037,Sweetwater County,WY,42343
3191,56039,Teton County,WY,23464
3192,56041,Uinta County,WY,20226
3193,56043,Washakie County,WY,7805


###### Covid-19 cases by US county:

taken from https://github.com/nytimes/covid-19-data/tree/master/live on 9/3/20

In [120]:
covidCases = pd.read_csv('../data/raw/us_county_covid19_cases.csv')

In [121]:
covidCases

Unnamed: 0,date,county,state,fips,cases,deaths,confirmed_cases,confirmed_deaths,probable_cases,probable_deaths
0,2020-09-03,Autauga,Alabama,1001.0,1466,24.0,1349.0,23.0,117.0,1.0
1,2020-09-03,Baldwin,Alabama,1003.0,4628,44.0,4495.0,40.0,133.0,4.0
2,2020-09-03,Barbour,Alabama,1005.0,771,7.0,614.0,7.0,157.0,0.0
3,2020-09-03,Bibb,Alabama,1007.0,564,8.0,542.0,6.0,22.0,2.0
4,2020-09-03,Blount,Alabama,1009.0,1336,11.0,1037.0,11.0,299.0,0.0
...,...,...,...,...,...,...,...,...,...,...
3228,2020-09-03,Sweetwater,Wyoming,56037.0,304,2.0,285.0,,19.0,
3229,2020-09-03,Teton,Wyoming,56039.0,433,1.0,398.0,,35.0,
3230,2020-09-03,Uinta,Wyoming,56041.0,303,2.0,251.0,,52.0,
3231,2020-09-03,Washakie,Wyoming,56043.0,108,6.0,100.0,,8.0,


###### Reported mask use by US county:

taken from https://github.com/nytimes/covid-19-data/tree/master/mask-use on 9/3/20

results collected via a national NYTimes survey where participants were asked 

"How often do you wear a mask in public when you expect to be within six feet of another person?"

In [122]:
maskUse = pd.read_csv('../data/raw/us_county_mask_use.csv')

In [123]:
maskUse

Unnamed: 0,COUNTYFP,NEVER,RARELY,SOMETIMES,FREQUENTLY,ALWAYS
0,1001,0.053,0.074,0.134,0.295,0.444
1,1003,0.083,0.059,0.098,0.323,0.436
2,1005,0.067,0.121,0.120,0.201,0.491
3,1007,0.020,0.034,0.096,0.278,0.572
4,1009,0.053,0.114,0.180,0.194,0.459
...,...,...,...,...,...,...
3137,56037,0.061,0.295,0.230,0.146,0.268
3138,56039,0.095,0.157,0.160,0.247,0.340
3139,56041,0.098,0.278,0.154,0.207,0.264
3140,56043,0.204,0.155,0.069,0.285,0.287


# The Big Ideas

These data sets can be combined to examine both:

(1): By US county, the relationship between covid-19 cases per unit population and the rate at which individuals respond that they ***always*** wear a face mask in a public setting in which they expect to be within 6 feet of another person

(2): By US county, The relationship between covid-19 cases per unit population and the rate at which individuals respond that they ***never*** wear a face mask in a public setting in which they expect to be within 6 feet of another person

These two relationships can be examined together to determine the impact of public initiative to wear face masks on the total percentage of the US county population that is infected with covid-19

### Cleaning Data

Removing columns that will not be used in analysis and rows corresponding to unknown data. By removing rows of covid-19 cases where the county is unknown, a lower bound for covid-19 cases will be obtained.

In [124]:
populations = populations[populations.population != 0]
populations = populations[['countyFIPS', 'population']]
populations = populations.rename(columns={'countyFIPS': 'fips_code'})

In [125]:
populations

Unnamed: 0,fips_code,population
1,1001,55869
2,1003,223234
3,1005,24686
4,1007,22394
5,1009,57826
...,...,...
3190,56037,42343
3191,56039,23464
3192,56041,20226
3193,56043,7805


In [126]:
covidCases = covidCases[covidCases.county != 'Unknown']
covidCases = covidCases[['fips', 'county', 'state', 'cases']]
covidCases = covidCases.rename(columns={'fips': 'fips_code'})

In [127]:
covidCases

Unnamed: 0,fips_code,county,state,cases
0,1001.0,Autauga,Alabama,1466
1,1003.0,Baldwin,Alabama,4628
2,1005.0,Barbour,Alabama,771
3,1007.0,Bibb,Alabama,564
4,1009.0,Blount,Alabama,1336
...,...,...,...,...
3228,56037.0,Sweetwater,Wyoming,304
3229,56039.0,Teton,Wyoming,433
3230,56041.0,Uinta,Wyoming,303
3231,56043.0,Washakie,Wyoming,108


In [128]:
maskUse = maskUse[['COUNTYFP', 'NEVER', 'ALWAYS']]
maskUse = maskUse.rename(columns={'COUNTYFP': 'fips_code', 'NEVER': 'never', 'ALWAYS': 'always'})

In [129]:
maskUse

Unnamed: 0,fips_code,never,always
0,1001,0.053,0.444
1,1003,0.083,0.436
2,1005,0.067,0.491
3,1007,0.020,0.572
4,1009,0.053,0.459
...,...,...,...
3137,56037,0.061,0.268
3138,56039,0.095,0.340
3139,56041,0.098,0.264
3140,56043,0.204,0.287


### Merging the data sets together

merge based on county fips code

In [130]:
df = pd.merge(populations, covidCases, on='fips_code')
df = pd.merge(df, maskUse, on='fips_code')

In [131]:
df

Unnamed: 0,fips_code,population,county,state,cases,never,always
0,1001,55869,Autauga,Alabama,1466,0.053,0.444
1,1003,223234,Baldwin,Alabama,4628,0.083,0.436
2,1005,24686,Barbour,Alabama,771,0.067,0.491
3,1007,22394,Bibb,Alabama,564,0.020,0.572
4,1009,57826,Blount,Alabama,1336,0.053,0.459
...,...,...,...,...,...,...,...
3115,56037,42343,Sweetwater,Wyoming,304,0.061,0.268
3116,56039,23464,Teton,Wyoming,433,0.095,0.340
3117,56041,20226,Uinta,Wyoming,303,0.098,0.264
3118,56043,7805,Washakie,Wyoming,108,0.204,0.287


### Transforming the data

calculating covid-19 cases per unit population

In [132]:
df['cases_per_population'] = df['cases']/df['population']

In [133]:
df

Unnamed: 0,fips_code,population,county,state,cases,never,always,cases_per_population
0,1001,55869,Autauga,Alabama,1466,0.053,0.444,0.026240
1,1003,223234,Baldwin,Alabama,4628,0.083,0.436,0.020732
2,1005,24686,Barbour,Alabama,771,0.067,0.491,0.031232
3,1007,22394,Bibb,Alabama,564,0.020,0.572,0.025185
4,1009,57826,Blount,Alabama,1336,0.053,0.459,0.023104
...,...,...,...,...,...,...,...,...
3115,56037,42343,Sweetwater,Wyoming,304,0.061,0.268,0.007179
3116,56039,23464,Teton,Wyoming,433,0.095,0.340,0.018454
3117,56041,20226,Uinta,Wyoming,303,0.098,0.264,0.014981
3118,56043,7805,Washakie,Wyoming,108,0.204,0.287,0.013837
