### COVID-19 Case count: Global and Canada

The following is the ETL process for COVID-19 cases from Johns Hopkins. 

In [1]:
%run COVID-19_Cases.ipynb

In [3]:
# EXTRACT

#Importing the csv files from John Hopkins Repository on COVID 19
csvpath= os.path.join("Resources", "time_series_covid19_confirmed_global.csv")

#Reading the csv file
cases_df = pd.read_csv(csvpath, encoding="utf-8" )


In [4]:
# TRANSFORM - Global Cases

#Global Cases

global_cases = cases_df.drop(['Lat','Long', 'Province/State'], axis=1)
global_cases = global_cases.rename(columns={"Country/Region":"Country"})

#Transpose data to have Provinces in columns and groupby Month
global_cases = global_cases.transpose()

columns= global_cases.iloc[0].values
global_cases.columns=columns
global_cases.drop('Country', axis =0, inplace=True)
global_cases.reset_index(inplace =True)
global_cases.rename(columns = {"index":"Date"}, inplace=True)
global_cases.head()

Unnamed: 0,Date,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,...,United Kingdom,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,West Bank and Gaza,Yemen,Zambia,Zimbabwe
0,1/22/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1/23/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
2,1/24/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
3,1/25/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
4,1/26/20,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0


In [5]:
# TRANSFORM - Global Cases

# Format Global Data into Table

global_cases=global_cases.set_index('Date').stack().reset_index()
global_cases.rename(columns={'level_1': 'Country Name', 0:'No_Cases'}, inplace=True)
global_cases['Date_dt']=pd.to_datetime(global_cases["Date"], format='%m/%d/%y')

# Groupby Country Name and Date_dt

global_cases = global_cases.groupby(['Country Name', 'Date_dt']).sum()
global_cases = global_cases.reset_index()
global_cases = global_cases[["Date_dt", "Country Name", "No_Cases"]]

global_cases

Unnamed: 0,Date_dt,Country Name,No_Cases
0,2020-01-22,Afghanistan,0
1,2020-01-23,Afghanistan,0
2,2020-01-24,Afghanistan,0
3,2020-01-25,Afghanistan,0
4,2020-01-26,Afghanistan,0
...,...,...,...
62070,2020-12-07,Zimbabwe,10839
62071,2020-12-08,Zimbabwe,10912
62072,2020-12-09,Zimbabwe,11007
62073,2020-12-10,Zimbabwe,11081


In [6]:
# TRANSFORM - Canadian Cases by Province

#Isolate for Canadian Cases by Province

can_cases = cases_df.loc[cases_df["Country/Region"]=="Canada"]
can_cases = can_cases.drop([41,42,52])
can_cases = can_cases.rename(columns={"Province/State":"Province"})
can_cases =can_cases.drop(['Lat','Long', 'Country/Region'], axis=1)


#Transpose data to have Provinces in columns and groupby Month
can_cases = can_cases.transpose()

columns= can_cases.iloc[0].values
can_cases.columns=columns
can_cases.drop('Province', axis =0, inplace=True)
can_cases.reset_index(inplace =True)
can_cases.rename(columns = {"index":"Date"}, inplace=True)
can_cases

Unnamed: 0,Date,Alberta,British Columbia,Manitoba,New Brunswick,Newfoundland and Labrador,Northwest Territories,Nova Scotia,Nunavut,Ontario,Prince Edward Island,Quebec,Saskatchewan,Yukon
0,1/22/20,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1/23/20,0,0,0,0,0,0,0,0,0,0,0,0,0
2,1/24/20,0,0,0,0,0,0,0,0,0,0,0,0,0
3,1/25/20,0,0,0,0,0,0,0,0,0,0,0,0,0
4,1/26/20,0,0,0,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
320,12/7/20,70301,38152,19131,536,351,15,1376,219,132736,84,153176,10412,57
321,12/8/20,72028,38718,19376,541,352,15,1383,220,134605,84,154740,10597,58
322,12/9/20,73488,39337,19655,542,353,15,1389,229,136599,84,156468,10899,58
323,12/10/20,75054,40060,19947,546,354,15,1393,229,138529,84,158310,11223,58


In [7]:
# TRANSFORM - Canadian Cases by Province

# Format data into table

can_cases=can_cases.set_index('Date').stack().reset_index()
can_cases.rename(columns={'level_1': 'Province', 0:'No_Cases'}, inplace=True)
can_cases['Date_dt']=pd.to_datetime(can_cases["Date"], format='%m/%d/%y')

can_cases = can_cases[["Date_dt", "Province", "No_Cases"]]
can_cases


Unnamed: 0,Date_dt,Province,No_Cases
0,2020-01-22,Alberta,0
1,2020-01-22,British Columbia,0
2,2020-01-22,Manitoba,0
3,2020-01-22,New Brunswick,0
4,2020-01-22,Newfoundland and Labrador,0
...,...,...,...
4220,2020-12-11,Ontario,140320
4221,2020-12-11,Prince Edward Island,84
4222,2020-12-11,Quebec,160023
4223,2020-12-11,Saskatchewan,11475


In [None]:
# LOAD - Global Cases

# Create engine
engine = create_engine('postgresql+psycopg2://cggjytcd:2Lf6GkD0Cb8TbV6e4-X7ZBCvNMh_zV3F@raja.db.elephantsql.com:5432/cggjytcd')

#Load table into Postgres database
global_cases.to_sql(name='global_cases', schema='public', con=engine, if_exists='replace', method='multi', index=False)


In [None]:
# LOAD - Canadian Cases

# Create engine
engine = create_engine('postgresql+psycopg2://cggjytcd:2Lf6GkD0Cb8TbV6e4-X7ZBCvNMh_zV3F@raja.db.elephantsql.com:5432/cggjytcd')

#Load table into Postgres database
can_cases.to_sql(name='can_cases', schema='public',con=engine, if_exists='replace', method='multi',index=False)

### CNN News
This is the ELT process for the CNN Timeline of Key Events related to COVID-19 found here:
    
https://dataviz.nbcnews.com/projects/20200302-covid-timeline/index.html?initialWidth=1160&childId=embed-20200302-covid-timeline&parentTitle=Coronavirus%20timeline%3A%20Tracking%20the%20critical%20moments%20of%20Covid-19&parentUrl=https%3A%2F%2Fwww.nbcnews.com%2Fhealth%2Fhealth-news%2Fcoronavirus-timeline-tracking-critical-moments-covid-19-n1154341