# Process New York Times Data for USA by Counties
https://github.com/nytimes/covid-19-data

In [32]:
import pandas as pd

## Configurations

In [33]:
datafile = './data/us-counties.csv'
cases_save_file = 'COVID-19-Cases-USA-By-County.csv'
deaths_save_file = 'COVID-19-Deaths-USA-By-County.csv'
df = pd.read_csv(datafile, encoding='utf-8', index_col=False)

## Process Case File

In [34]:
df_cases = df.groupby(['date', 'fips', 'state', 'county']).agg({'cases':'sum'})
df_cases

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,cases
date,fips,state,county,Unnamed: 4_level_1
2020-01-21,53061.0,Washington,Snohomish,1
2020-01-22,53061.0,Washington,Snohomish,1
2020-01-23,53061.0,Washington,Snohomish,1
2020-01-24,17031.0,Illinois,Cook,1
2020-01-24,53061.0,Washington,Snohomish,1
2020-01-25,6059.0,California,Orange,1
2020-01-25,17031.0,Illinois,Cook,1
2020-01-25,53061.0,Washington,Snohomish,1
2020-01-26,4013.0,Arizona,Maricopa,1
2020-01-26,6037.0,California,Los Angeles,1


In [35]:
df_cases = df_cases.reset_index().pivot_table(index=['fips','state', 'county'], columns='date', values='cases',  aggfunc=sum,  margins=True, fill_value=0)
df_cases

Unnamed: 0_level_0,Unnamed: 1_level_0,date,2020-01-21,2020-01-22,2020-01-23,2020-01-24,2020-01-25,2020-01-26,2020-01-27,2020-01-28,2020-01-29,2020-01-30,...,2020-03-25,2020-03-26,2020-03-27,2020-03-28,2020-03-29,2020-03-30,2020-03-31,2020-04-01,2020-04-02,All
fips,state,county,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
1001.0,Alabama,Autauga,0,0,0,0,0,0,0,0,0,0,...,4,6,6,6,6,7,7,10,10,63
1003.0,Alabama,Baldwin,0,0,0,0,0,0,0,0,0,0,...,4,5,5,10,15,18,19,23,25,143
1007.0,Alabama,Bibb,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,3,3,4,12
1009.0,Alabama,Blount,0,0,0,0,0,0,0,0,0,0,...,1,3,5,5,5,5,5,5,6,40
1011.0,Alabama,Bullock,0,0,0,0,0,0,0,0,0,0,...,0,2,2,3,3,3,3,3,2,21
1013.0,Alabama,Butler,0,0,0,0,0,0,0,0,0,0,...,1,1,1,1,1,1,1,1,1,9
1015.0,Alabama,Calhoun,0,0,0,0,0,0,0,0,0,0,...,2,2,3,3,3,9,9,11,12,63
1017.0,Alabama,Chambers,0,0,0,0,0,0,0,0,0,0,...,10,13,15,17,27,36,36,45,67,278
1019.0,Alabama,Cherokee,0,0,0,0,0,0,0,0,0,0,...,1,1,1,1,2,2,2,2,4,16
1021.0,Alabama,Chilton,0,0,0,0,0,0,0,0,0,0,...,1,4,7,7,8,10,11,13,14,75


## Process Deaths File

In [36]:
df_deaths = df.groupby(['date', 'fips', 'state', 'county']).agg({'deaths':'sum'})
df_deaths

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,deaths
date,fips,state,county,Unnamed: 4_level_1
2020-01-21,53061.0,Washington,Snohomish,0
2020-01-22,53061.0,Washington,Snohomish,0
2020-01-23,53061.0,Washington,Snohomish,0
2020-01-24,17031.0,Illinois,Cook,0
2020-01-24,53061.0,Washington,Snohomish,0
2020-01-25,6059.0,California,Orange,0
2020-01-25,17031.0,Illinois,Cook,0
2020-01-25,53061.0,Washington,Snohomish,0
2020-01-26,4013.0,Arizona,Maricopa,0
2020-01-26,6037.0,California,Los Angeles,0


In [37]:
df_deaths = df_deaths.reset_index().pivot_table(index=['fips','state', 'county'], columns='date', values='deaths',  aggfunc=sum,  margins=True, fill_value=0)
df_cases

Unnamed: 0_level_0,Unnamed: 1_level_0,date,2020-01-21,2020-01-22,2020-01-23,2020-01-24,2020-01-25,2020-01-26,2020-01-27,2020-01-28,2020-01-29,2020-01-30,...,2020-03-25,2020-03-26,2020-03-27,2020-03-28,2020-03-29,2020-03-30,2020-03-31,2020-04-01,2020-04-02,All
fips,state,county,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
1001.0,Alabama,Autauga,0,0,0,0,0,0,0,0,0,0,...,4,6,6,6,6,7,7,10,10,63
1003.0,Alabama,Baldwin,0,0,0,0,0,0,0,0,0,0,...,4,5,5,10,15,18,19,23,25,143
1007.0,Alabama,Bibb,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,3,3,4,12
1009.0,Alabama,Blount,0,0,0,0,0,0,0,0,0,0,...,1,3,5,5,5,5,5,5,6,40
1011.0,Alabama,Bullock,0,0,0,0,0,0,0,0,0,0,...,0,2,2,3,3,3,3,3,2,21
1013.0,Alabama,Butler,0,0,0,0,0,0,0,0,0,0,...,1,1,1,1,1,1,1,1,1,9
1015.0,Alabama,Calhoun,0,0,0,0,0,0,0,0,0,0,...,2,2,3,3,3,9,9,11,12,63
1017.0,Alabama,Chambers,0,0,0,0,0,0,0,0,0,0,...,10,13,15,17,27,36,36,45,67,278
1019.0,Alabama,Cherokee,0,0,0,0,0,0,0,0,0,0,...,1,1,1,1,2,2,2,2,4,16
1021.0,Alabama,Chilton,0,0,0,0,0,0,0,0,0,0,...,1,4,7,7,8,10,11,13,14,75


### Save Files

In [38]:
df_cases.to_csv(cases_save_file, encoding='utf-8')
df_deaths.to_csv(deaths_save_file, encoding='utf-8')