In [17]:
import pandas as pd
import numpy as np


## the other data set uses FIPS (the 'COUNTY' field here) as an int, so we need
## to as well.
# dtypes = {
#     'COUNTY': str
# }


## this data is from 
df = pd.read_csv("vaccinetracking/vacc_data/data_county_current.csv",
    parse_dates=['DATE'])


## rename to make usage of column clear
df = df[df.GEOFLAG == 'County'].rename(columns={'COUNTY': 'FIPS'})

## these are irrelevant because we're using the date field
df = df.drop(['STATE', 'WEEK', 'YEAR'], axis=1)

df

Unnamed: 0,STATE_NAME,COUNTY_NAME,FIPS,GEOFLAG,DATE,CASE_TYPE,CASES,POPN
0,AL,Autauga,01001,County,2022-02-25,Booster,7646.000000,55869.0
1,AL,Autauga,01001,County,2022-02-25,Booster Coverage,13.685586,55869.0
2,AL,Autauga,01001,County,2022-02-25,Complete,24207.000000,55869.0
3,AL,Autauga,01001,County,2022-02-25,Complete Coverage,43.328143,55869.0
4,AL,Autauga,01001,County,2022-02-25,Partial,30642.000000,55869.0
...,...,...,...,...,...,...,...,...
18707,WY,Weston,56045,County,2022-02-25,Booster Coverage,17.063664,6927.0
18708,WY,Weston,56045,County,2022-02-25,Complete,2589.000000,6927.0
18709,WY,Weston,56045,County,2022-02-25,Complete Coverage,37.375487,6927.0
18710,WY,Weston,56045,County,2022-02-25,Partial,2823.000000,6927.0


According to the [codebook](https://github.com/bansallab/vaccinetracking/tree/main/vacc_data#codebook), the columns with "Coverage" are pre-computed percentages.

"Partial/ Partial Coverage" is anybody with at least 1 shot, not people with *only* one shot.

So essentially the unvax percentage is `1 - df[df.CASE_TYPE == 'Partial Coverage' ]`

I should use raw counts when joining...

## Question 1a (MVP): Is there a correlation between vax rate and covid fatality rate?

## Question 1b (MVP): Is there a correlation between income and vaccination rate?


In [29]:
partial_coverage = df[df.CASE_TYPE == 'Partial Coverage']

unvaxed_by_fips = df.loc[df.CASE_TYPE == 'Partial Coverage', ['FIPS', 'CASES', 'POPN']]
#partial_coverage[['FIPS', 'CASES']]

unvaxed_by_fips['UNVAX_PERCENT'] = 100 - unvaxed_by_fips['CASES']

unvaxed_by_fips = unvaxed_by_fips.drop("CASES", axis=1)

In [33]:
## time to get the covid fatality rate by FIPS.

#deaths_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv"

deaths_url = 'time_series_covid19_deaths_US.csv'


## they actually treat FIPS as an integer, so we do that =()
# death_dtypes = {
#     'FIPS': str

# }

deaths_df = pd.read_csv(deaths_url) # , dtype = death_dtypes)

deaths_df


Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,...,2/19/22,2/20/22,2/21/22,2/22/22,2/23/22,2/24/22,2/25/22,2/26/22,2/27/22,2/28/22
0,84001001,US,USA,840,1001.0,Autauga,Alabama,US,32.539527,-86.644082,...,184,184,184,184,184,192,194,194,194,194
1,84001003,US,USA,840,1003.0,Baldwin,Alabama,US,30.727750,-87.722071,...,635,635,635,635,636,640,640,640,640,640
2,84001005,US,USA,840,1005.0,Barbour,Alabama,US,31.868263,-85.387129,...,92,92,92,92,92,92,93,93,93,93
3,84001007,US,USA,840,1007.0,Bibb,Alabama,US,32.996421,-87.125115,...,99,99,99,99,99,99,99,99,99,99
4,84001009,US,USA,840,1009.0,Blount,Alabama,US,33.982109,-86.567906,...,216,216,216,216,216,216,217,218,218,218
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3337,84056039,US,USA,840,56039.0,Teton,Wyoming,US,43.935225,-110.589080,...,15,15,15,16,16,16,16,16,16,16
3338,84056041,US,USA,840,56041.0,Uinta,Wyoming,US,41.287818,-110.547578,...,36,36,36,36,36,36,36,36,36,36
3339,84090056,US,USA,840,90056.0,Unassigned,Wyoming,US,0.000000,0.000000,...,0,0,0,0,0,0,0,0,0,18
3340,84056043,US,USA,840,56043.0,Washakie,Wyoming,US,43.904516,-107.680187,...,42,42,42,43,43,43,43,43,43,43


In [32]:
df

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,...,2/19/22,2/20/22,2/21/22,2/22/22,2/23/22,2/24/22,2/25/22,2/26/22,2/27/22,2/28/22
0,84001001,US,USA,840,1001.0,Autauga,Alabama,US,32.539527,-86.644082,...,184,184,184,184,184,192,194,194,194,194
1,84001003,US,USA,840,1003.0,Baldwin,Alabama,US,30.727750,-87.722071,...,635,635,635,635,636,640,640,640,640,640
2,84001005,US,USA,840,1005.0,Barbour,Alabama,US,31.868263,-85.387129,...,92,92,92,92,92,92,93,93,93,93
3,84001007,US,USA,840,1007.0,Bibb,Alabama,US,32.996421,-87.125115,...,99,99,99,99,99,99,99,99,99,99
4,84001009,US,USA,840,1009.0,Blount,Alabama,US,33.982109,-86.567906,...,216,216,216,216,216,216,217,218,218,218
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3337,84056039,US,USA,840,56039.0,Teton,Wyoming,US,43.935225,-110.589080,...,15,15,15,16,16,16,16,16,16,16
3338,84056041,US,USA,840,56041.0,Uinta,Wyoming,US,41.287818,-110.547578,...,36,36,36,36,36,36,36,36,36,36
3339,84090056,US,USA,840,90056.0,Unassigned,Wyoming,US,0.000000,0.000000,...,0,0,0,0,0,0,0,0,0,18
3340,84056043,US,USA,840,56043.0,Washakie,Wyoming,US,43.904516,-107.680187,...,42,42,42,43,43,43,43,43,43,43
