# Process Daily Confirmed Cases John Hopkins Data
https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_daily_reports

In [316]:
import pandas as pd

#### Configurations

In [317]:
daily_datafile = './jh-daily-data/03-28-2020.csv'
daily_date = '3/28/20'
confirmed_datafile = 'COVID-19-Confirmed-Cases-USA-By-State.csv'

#### Load John Hopkins Daily Covid-19 File

In [318]:
df = pd.read_csv(daily_datafile, encoding='utf-8')
df

Unnamed: 0,FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key
0,45001.0,Abbeville,South Carolina,US,2020-03-28 23:05:37,34.223334,-82.461707,3,0,0,0,"Abbeville, South Carolina, US"
1,22001.0,Acadia,Louisiana,US,2020-03-28 23:05:37,30.295065,-92.414197,9,1,0,0,"Acadia, Louisiana, US"
2,51001.0,Accomack,Virginia,US,2020-03-28 23:05:37,37.767072,-75.632346,2,0,0,0,"Accomack, Virginia, US"
3,16001.0,Ada,Idaho,US,2020-03-28 23:05:37,43.452658,-116.241552,76,0,0,0,"Ada, Idaho, US"
4,19001.0,Adair,Iowa,US,2020-03-28 23:05:37,41.330756,-94.471059,1,0,0,0,"Adair, Iowa, US"
5,21001.0,Adair,Kentucky,US,2020-03-28 23:05:37,37.104598,-85.281297,0,0,0,0,"Adair, Kentucky, US"
6,29001.0,Adair,Missouri,US,2020-03-28 23:05:37,40.190586,-92.600782,1,0,0,0,"Adair, Missouri, US"
7,40001.0,Adair,Oklahoma,US,2020-03-28 23:05:37,35.884942,-94.658593,3,0,0,0,"Adair, Oklahoma, US"
8,8001.0,Adams,Colorado,US,2020-03-28 23:05:37,39.874321,-104.336258,71,0,0,0,"Adams, Colorado, US"
9,16003.0,Adams,Idaho,US,2020-03-28 23:05:37,44.893336,-116.454525,0,0,0,0,"Adams, Idaho, US"


#### Select only US rows

In [319]:
df = df[df.Country_Region== 'US']

#### Group By States, Sum by Confirmed Cases, Deaths, Recoved, Active

In [320]:
df = df.groupby('Province_State').agg({'Confirmed':'sum','Deaths':'sum','Recovered':'sum','Active':'sum'})

# Drop rows
if 'Wuhan Evacuee' in df.index:
    df = df.drop(['Wuhan Evacuee'])
if 'Recovered' in df.index:
    df = df.drop(['Recovered'])

df

Unnamed: 0_level_0,Confirmed,Deaths,Recovered,Active
Province_State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alabama,694,4,0,0
Alaska,85,2,0,0
American Samoa,0,0,0,0
Arizona,773,15,0,0
Arkansas,409,5,0,0
California,5095,110,0,0
Colorado,1740,31,0,0
Connecticut,1524,33,0,0
Delaware,214,5,0,0
Diamond Princess,49,0,0,0


#### Get daily confirmed cases by State

In [321]:
df_daily = df.iloc[:, [0]]
df_daily

Unnamed: 0_level_0,Confirmed
Province_State,Unnamed: 1_level_1
Alabama,694
Alaska,85
American Samoa,0
Arizona,773
Arkansas,409
California,5095
Colorado,1740
Connecticut,1524
Delaware,214
Diamond Princess,49


#### Load Confirmed Cases by State File

In [322]:
df_confirmed = pd.read_csv(confirmed_datafile, encoding='utf-8', index_col='State')
df_confirmed

Unnamed: 0_level_0,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,...,3/18/20,3/19/20,3/20/20,3/21/20,3/22/20,3/23/20,3/24/20,3/25/20,3/26/20,3/27/20
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Alabama,0,0,0,0,0,0,0,0,0,0,...,46,78,83,131,138,196,242,381,517,587
Alaska,0,0,0,0,0,0,0,0,0,0,...,6,9,12,15,21,30,34,41,56,58
American Samoa,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Arizona,0,0,0,0,1,1,1,1,1,1,...,27,45,78,118,152,235,326,401,508,665
Arkansas,0,0,0,0,0,0,0,0,0,0,...,33,62,96,122,165,192,219,280,335,381
California,0,0,0,0,2,2,2,2,2,3,...,751,952,1177,1364,1642,2108,2538,2998,3899,4657
Colorado,0,0,0,0,0,0,0,0,0,0,...,184,277,363,390,476,704,723,1021,1430,1433
Connecticut,0,0,0,0,0,0,0,0,0,0,...,68,159,194,194,223,415,618,875,1012,1291
Delaware,0,0,0,0,0,0,0,0,0,0,...,19,30,38,45,47,68,104,119,130,163
Diamond Princess,0,0,0,0,0,0,0,0,0,0,...,47,47,49,49,49,49,49,49,49,49


#### Make sure number of rows match before insert

In [323]:
assert(df_daily.shape[0]==df_confirmed.shape[0])

58
58


#### Insert Daily Confirmed Cases to Main File

In [325]:
df_confirmed.insert(df_confirmed.shape[1], daily_date , df_daily.values)
df_confirmed

Unnamed: 0_level_0,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,...,3/19/20,3/20/20,3/21/20,3/22/20,3/23/20,3/24/20,3/25/20,3/26/20,3/27/20,3/28/20
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Alabama,0,0,0,0,0,0,0,0,0,0,...,78,83,131,138,196,242,381,517,587,694
Alaska,0,0,0,0,0,0,0,0,0,0,...,9,12,15,21,30,34,41,56,58,85
American Samoa,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Arizona,0,0,0,0,1,1,1,1,1,1,...,45,78,118,152,235,326,401,508,665,773
Arkansas,0,0,0,0,0,0,0,0,0,0,...,62,96,122,165,192,219,280,335,381,409
California,0,0,0,0,2,2,2,2,2,3,...,952,1177,1364,1642,2108,2538,2998,3899,4657,5095
Colorado,0,0,0,0,0,0,0,0,0,0,...,277,363,390,476,704,723,1021,1430,1433,1740
Connecticut,0,0,0,0,0,0,0,0,0,0,...,159,194,194,223,415,618,875,1012,1291,1524
Delaware,0,0,0,0,0,0,0,0,0,0,...,30,38,45,47,68,104,119,130,163,214
Diamond Princess,0,0,0,0,0,0,0,0,0,0,...,47,49,49,49,49,49,49,49,49,49


#### Copy Confirmed Cases File to csv

In [326]:
df_confirmed.to_csv(confirmed_datafile, encoding='utf-8')