In [2]:
# Fetch the dataset for Covid19 and perform analysis
### source: https://github.com/CSSEGISandData/COVID-19/blob/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv
### raw data: https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv
### https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv


In [4]:
import pandas as pd
import geopandas as gpd
import descartes

# Data Visualization Pkg
import matplotlib.pyplot as plt
%matplotlib inline

In [6]:
confirmed_cases_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
death_cases_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv"
recovered_cases_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv"

In [8]:
df_confirmed = pd.read_csv(confirmed_cases_url)
df_confirmed.head()
df_confirmed.shape

(264, 95)

In [9]:
df_confirmed.shape

(264, 95)

In [20]:
df_confirmed.columns

Index(['Province/State', 'Country/Region', 'Lat', 'Long', '1/22/20', '1/23/20',
       '1/24/20', '1/25/20', '1/26/20', '1/27/20', '1/28/20', '1/29/20',
       '1/30/20', '1/31/20', '2/1/20', '2/2/20', '2/3/20', '2/4/20', '2/5/20',
       '2/6/20', '2/7/20', '2/8/20', '2/9/20', '2/10/20', '2/11/20', '2/12/20',
       '2/13/20', '2/14/20', '2/15/20', '2/16/20', '2/17/20', '2/18/20',
       '2/19/20', '2/20/20', '2/21/20', '2/22/20', '2/23/20', '2/24/20',
       '2/25/20', '2/26/20', '2/27/20', '2/28/20', '2/29/20', '3/1/20',
       '3/2/20', '3/3/20', '3/4/20', '3/5/20', '3/6/20', '3/7/20', '3/8/20',
       '3/9/20', '3/10/20', '3/11/20', '3/12/20', '3/13/20', '3/14/20',
       '3/15/20', '3/16/20', '3/17/20', '3/18/20', '3/19/20', '3/20/20',
       '3/21/20', '3/22/20', '3/23/20', '3/24/20', '3/25/20', '3/26/20',
       '3/27/20', '3/28/20', '3/29/20', '3/30/20', '3/31/20', '4/1/20',
       '4/2/20', '4/3/20', '4/4/20', '4/5/20', '4/6/20', '4/7/20', '4/8/20',
       '4/9/20', '4/10/20'

In [21]:
# lets start reshaping our data
## We will keep some of our columns and melt the rest
## lets see how that works
confirmed_df = df_confirmed.melt(id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'])
confirmed_df.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,variable,value
0,,Afghanistan,33.0,65.0,1/22/20,0
1,,Albania,41.1533,20.1683,1/22/20,0
2,,Algeria,28.0339,1.6596,1/22/20,0
3,,Andorra,42.5063,1.5218,1/22/20,0
4,,Angola,-11.2027,17.8739,1/22/20,0


In [22]:
confirmed_df.rename(columns={"variable": "Date", "value":"Confirmed"}, inplace = True)
confirmed_df.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed
0,,Afghanistan,33.0,65.0,1/22/20,0
1,,Albania,41.1533,20.1683,1/22/20,0
2,,Algeria,28.0339,1.6596,1/22/20,0
3,,Andorra,42.5063,1.5218,1/22/20,0
4,,Angola,-11.2027,17.8739,1/22/20,0


In [24]:
confirmed_df.shape

(24024, 6)

In [25]:
confirmed_df.tail()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed
24019,Saint Pierre and Miquelon,France,46.8852,-56.3159,4/21/20,1
24020,,South Sudan,6.877,31.307,4/21/20,4
24021,,Western Sahara,24.2155,-12.8858,4/21/20,6
24022,,Sao Tome and Principe,0.18636,6.613081,4/21/20,4
24023,,Yemen,15.552727,48.516388,4/21/20,1


In [26]:
# Lets create a function to first get the data and then melt the data

In [27]:
def get_n_melt_data(data_url, case_type):
    df = pd.read_csv(data_url)
    melted_df = df.melt(id_vars=['Province/State', 'Country/Region', 'Lat', 'Long']) 
    melted_df.rename(columns={"variable": "Date", "value":case_type}, inplace = True)
    return melted_df

In [None]:
# Lets fetch the other datasets (recovered and death)
