Data from https://github.com/CSSEGISandData/COVID-19

In [1]:
import pandas as pd
from sqlalchemy import create_engine

In [2]:
COVID19_confirmed = "Resources/time_series_19-covid-Confirmed.csv"
confirmed_df = pd.read_csv(COVID19_confirmed)
confirmed_df = confirmed_df.fillna(value="Null")
confirmed_df = confirmed_df.rename(columns={'Province/State':'province_state','Country/Region':'country_region', 'Lat':'lat', 'Long':'long'})
confirmed_df = confirmed_df
confirmed_df.head()
# print(len(confirmed_df.index))

Unnamed: 0,province_state,country_region,lat,long,Date,confirmed_cum,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 39,Unnamed: 40,Unnamed: 41,Unnamed: 42,Unnamed: 43,Unnamed: 44,Unnamed: 45,Unnamed: 46,Unnamed: 47,Unnamed: 48
0,Null,Afghanistan,33,65,1/22/2020,0,Null,Null,Null,Null,...,Null,Null,Null,Null,Null,Null,Null,Null,Null,Null
1,Null,Null,Null,Null,1/23/2020,0,Null,Null,Null,Null,...,Null,Null,Null,Null,Null,Null,Null,Null,Null,Null
2,Null,Null,Null,Null,1/24/2020,0,Null,Null,Null,Null,...,Null,Null,Null,Null,Null,Null,Null,Null,Null,Null
3,Null,Null,Null,Null,1/25/2020,0,Null,Null,Null,Null,...,Null,Null,Null,Null,Null,Null,Null,Null,Null,Null
4,Null,Null,Null,Null,1/26/2020,0,Null,Null,Null,Null,...,Null,Null,Null,Null,Null,Null,Null,Null,Null,Null


In [26]:
COVID19_deaths = "Resources/time_series_19-covid-Deaths.csv"
deaths_df = pd.read_csv(COVID19_deaths)
deaths_df = deaths_df.fillna(value="Null")
deaths_df = deaths_df.rename(columns={'Province/State':'province_state','Country/Region':'country_region', 'Lat':'lat', 'Long':'long'})
deaths_df.head()
# print(len(deaths_df.index))

Unnamed: 0,province_state,country_region,lat,long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,2/24/20,2/25/20,2/26/20,2/27/20,2/28/20,2/29/20,3/1/20,3/2/20,3/3/20,3/4/20
0,Anhui,Mainland China,31.8257,117.2264,0,0,0,0,0,0,...,6,6,6,6,6,6,6,6,6,6
1,Beijing,Mainland China,40.1824,116.4142,0,0,0,0,0,1,...,4,4,4,5,7,8,8,8,8,8
2,Chongqing,Mainland China,30.0572,107.874,0,0,0,0,0,0,...,6,6,6,6,6,6,6,6,6,6
3,Fujian,Mainland China,26.0789,117.9874,0,0,0,0,0,0,...,1,1,1,1,1,1,1,1,1,1
4,Gansu,Mainland China,36.0611,103.8343,0,0,0,0,0,0,...,2,2,2,2,2,2,2,2,2,2


In [27]:
COVID19_recovered = "Resources/time_series_19-covid-Recovered.csv"
recovered_df = pd.read_csv(COVID19_recovered)
recovered_df = recovered_df.fillna(value="Null")
recovered_df = recovered_df.rename(columns={'Province/State':'province_state','Country/Region':'country_region', 'Lat':'lat', 'Long':'long'})
recovered_df.head()
# print(len(recovered_df.index))

Unnamed: 0,province_state,country_region,lat,long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,2/24/20,2/25/20,2/26/20,2/27/20,2/28/20,2/29/20,3/1/20,3/2/20,3/3/20,3/4/20
0,Anhui,Mainland China,31.8257,117.2264,0,0,0,0,0,0,...,663,712,744,792,821,868,873,917,936,956
1,Beijing,Mainland China,40.1824,116.4142,0,0,1,2,2,2,...,198,215,235,248,257,271,276,282,288,297
2,Chongqing,Mainland China,30.0572,107.874,0,0,0,0,0,0,...,349,372,384,401,422,438,450,469,490,502
3,Fujian,Mainland China,26.0789,117.9874,0,0,0,0,0,0,...,183,199,218,228,235,243,247,255,260,270
4,Gansu,Mainland China,36.0611,103.8343,0,0,0,0,0,0,...,80,80,81,81,82,82,84,85,86,87


### Connect to local database

In [28]:
rds_connection_string = "postgres:postgres@localhost:5432/COVID-19"
engine = create_engine(f'postgresql://{rds_connection_string}')

In [29]:
engine.table_names()

['confirmed', 'deaths', 'recovered']

### Use pandas to load csv converted DataFrame into database
Commented out because data is already in database.

In [30]:
confirmed_df.to_sql(name='confirmed', con=engine, if_exists='append', index=False)

In [32]:
deaths_df.to_sql(name='deaths', con=engine, if_exists='append', index=False)

In [36]:
recovered_df.to_sql(name='recovered', con=engine, if_exists='append', index=False)

### Confirm data has been added by querying the tables

In [37]:
## CONFIRMED
pd.read_sql_query('select * from confirmed', con=engine).head()

Unnamed: 0,id,province_state,country_region,lat,long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,...,2/24/20,2/25/20,2/26/20,2/27/20,2/28/20,2/29/20,3/1/20,3/2/20,3/3/20,3/4/20
0,1,Anhui,Mainland China,31.8257,117.2264,1,9,15,39,60,...,989,989,989,989,990,990,990,990,990,990
1,2,Beijing,Mainland China,40.1824,116.4142,14,22,36,41,68,...,399,400,400,410,410,411,413,414,414,418
2,3,Chongqing,Mainland China,30.0572,107.874,6,9,27,57,75,...,576,576,576,576,576,576,576,576,576,576
3,4,Fujian,Mainland China,26.0789,117.9874,1,5,10,18,35,...,293,294,294,296,296,296,296,296,296,296
4,5,Gansu,Mainland China,36.0611,103.8343,0,2,2,4,7,...,91,91,91,91,91,91,91,91,91,91


In [38]:
## DEATHS
pd.read_sql_query('select * from deaths', con=engine).head()

Unnamed: 0,id,province_state,country_region,lat,long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,...,2/24/20,2/25/20,2/26/20,2/27/20,2/28/20,2/29/20,3/1/20,3/2/20,3/3/20,3/4/20
0,1,Anhui,Mainland China,31.8257,117.2264,0,0,0,0,0,...,6,6,6,6,6,6,6,6,6,6
1,2,Beijing,Mainland China,40.1824,116.4142,0,0,0,0,0,...,4,4,4,5,7,8,8,8,8,8
2,3,Chongqing,Mainland China,30.0572,107.874,0,0,0,0,0,...,6,6,6,6,6,6,6,6,6,6
3,4,Fujian,Mainland China,26.0789,117.9874,0,0,0,0,0,...,1,1,1,1,1,1,1,1,1,1
4,5,Gansu,Mainland China,36.0611,103.8343,0,0,0,0,0,...,2,2,2,2,2,2,2,2,2,2


In [39]:
## RECOVERED
pd.read_sql_query('select * from recovered', con=engine).head()

Unnamed: 0,id,province_state,country_region,lat,long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,...,2/24/20,2/25/20,2/26/20,2/27/20,2/28/20,2/29/20,3/1/20,3/2/20,3/3/20,3/4/20
0,1,Anhui,Mainland China,31.8257,117.2264,0,0,0,0,0,...,663,712,744,792,821,868,873,917,936,956
1,2,Beijing,Mainland China,40.1824,116.4142,0,0,1,2,2,...,198,215,235,248,257,271,276,282,288,297
2,3,Chongqing,Mainland China,30.0572,107.874,0,0,0,0,0,...,349,372,384,401,422,438,450,469,490,502
3,4,Fujian,Mainland China,26.0789,117.9874,0,0,0,0,0,...,183,199,218,228,235,243,247,255,260,270
4,5,Gansu,Mainland China,36.0611,103.8343,0,0,0,0,0,...,80,80,81,81,82,82,84,85,86,87
