# Covid-19 Report

* sorce data : https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_time_series


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import ipywidgets as widgets
from ipywidgets import interact, interactive, interact_manual, fixed

from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

%matplotlib inline
%config InlineBackend.figure_format='svg'

In [2]:
pd.set_option("display.max_rows", 10)

## Load data

In [3]:
url_confirmed = 'https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
url_deaths = 'https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'
url_recovered = 'https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv'

urls = {
    "Confirmed" : url_confirmed,
    "Deaths": url_deaths,
    "Recovered" : url_recovered
}

dfs = []

for k, v in urls.items() :
    df = pd.read_csv(v)
    dfs.append(df)
    
df = pd.concat(dfs, keys=list(urls.keys()))

## Transform data

In [4]:
id_vars = ['Province/State', 'Country/Region', 'Lat', 'Long']

#value_vars = df.columns[~df.columns.str.contains('|'.join(id_vars))]
#pd.melt(df, id_vars=id_vars,value_vars=value_vars,var_name="Date",value_name="Confirmed")

dtemp = [pd.melt(df, id_vars=id_vars,var_name="Date",value_name=k) for k in list(urls.keys())]
df = pd.concat(dtemp, axis='columns')
df = df.loc[:, ~df.columns.duplicated()].copy()
df

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered
0,,Afghanistan,33.939110,67.709953,1/22/20,0,0,0
1,,Albania,41.153300,20.168300,1/22/20,0,0,0
2,,Algeria,28.033900,1.659600,1/22/20,0,0,0
3,,Andorra,42.506300,1.521800,1/22/20,0,0,0
4,,Angola,-11.202700,17.873900,1/22/20,0,0,0
...,...,...,...,...,...,...,...,...
475111,,Vietnam,14.058324,108.277199,8/21/21,0,0,0
475112,,West Bank and Gaza,31.952200,35.233200,8/21/21,0,0,0
475113,,Yemen,15.552727,48.516388,8/21/21,0,0,0
475114,,Zambia,-13.133897,27.849332,8/21/21,0,0,0


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 475116 entries, 0 to 475115
Data columns (total 8 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   Province/State  141610 non-null  object 
 1   Country/Region  475116 non-null  object 
 2   Lat             472226 non-null  float64
 3   Long            472226 non-null  float64
 4   Date            475116 non-null  object 
 5   Confirmed       475116 non-null  int64  
 6   Deaths          475116 non-null  int64  
 7   Recovered       475116 non-null  int64  
dtypes: float64(2), int64(3), object(3)
memory usage: 29.0+ MB


In [6]:
df['Date'] = pd.to_datetime(df['Date'], format='%m/%d/%y')
df = df.sort_values(['Country/Region','Date']).reset_index(drop=True)
df.drop(columns=['Province/State', 'Lat', 'Long'], inplace=True)
df

Unnamed: 0,Country/Region,Date,Confirmed,Deaths,Recovered
0,Afghanistan,2020-01-22,0,0,0
1,Afghanistan,2020-01-22,0,0,0
2,Afghanistan,2020-01-22,0,0,0
3,Afghanistan,2020-01-23,0,0,0
4,Afghanistan,2020-01-23,0,0,0
...,...,...,...,...,...
475111,Zimbabwe,2021-08-20,4198,4198,4198
475112,Zimbabwe,2021-08-20,0,0,0
475113,Zimbabwe,2021-08-21,122487,122487,122487
475114,Zimbabwe,2021-08-21,4236,4236,4236


In [7]:
# There are some rows that contain data of confirmed/Deaths/Recoverd cases in the same country and the same day 
# just being separated by [Province/State] column previously, so it has to be recalculated to find 
# the number of infected people separated by country and day.

dCountry = df.groupby(['Country/Region','Date']).sum().reset_index()
dCountry['dialy_new_patient'] = dCountry.groupby(['Country/Region'])['Confirmed'].diff()
dCountry['dialy_new_patient_pct'] = dCountry.groupby(['Country/Region'])['Confirmed'].pct_change()
dCountry

Unnamed: 0,Country/Region,Date,Confirmed,Deaths,Recovered,dialy_new_patient,dialy_new_patient_pct
0,Afghanistan,2020-01-22,0,0,0,,
1,Afghanistan,2020-01-23,0,0,0,0.0,
2,Afghanistan,2020-01-24,0,0,0,0.0,
3,Afghanistan,2020-01-25,0,0,0,0.0,
4,Afghanistan,2020-01-26,0,0,0,0.0,
...,...,...,...,...,...,...,...
112705,Zimbabwe,2021-08-17,125202,125202,125202,1005.0,0.008092
112706,Zimbabwe,2021-08-18,125679,125679,125679,477.0,0.003810
112707,Zimbabwe,2021-08-19,126100,126100,126100,421.0,0.003350
112708,Zimbabwe,2021-08-20,126100,126100,126100,0.0,0.000000


## Compare the number of confirmed cases with interactive chart

In [8]:
countryList = dCountry['Country/Region'].unique()

@interact(country1=countryList, country2=countryList)
def plot_chart(country1,country2):
    df = dCountry[dCountry['Country/Region'].isin([country1, country2])]
    fig, ax = plt.subplots(figsize=(8,5))
    plt.yscale("symlog")
    plt.grid(True)
    sns.lineplot(data=df, x='Date', y='Confirmed', hue='Country/Region', ax=ax)

interactive(children=(Dropdown(description='country1', options=('Afghanistan', 'Albania', 'Algeria', 'Andorra'…