In [1]:
%config IPCompleter.greedy=True
%matplotlib inline

In [2]:
import pandas as pd, numpy as np, os, sys
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
from IPython.display import display, HTML

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

font = {'size'   : 18}
matplotlib.rc('font', **font)

def plotHorizontalBars(series, xlabel, title):
    xpos = np.arange(len(series.index), 0, -1)
    plt.barh(xpos, series, align='center', alpha=0.5, color='g')
    plt.grid(axis='x')
    plt.yticks(xpos, series.index)
    plt.xlabel(xlabel)
    plt.title(title)

def plotVerticalBars(series, ylabel, title):
    xpos = np.arange(len(series.index), 0, -1)
    plt.bar(xpos, series, align='center', alpha=0.5, color='g')
    plt.grid(axis='y')
    plt.xticks(xpos, series.index)
    plt.ylabel(ylabel)
    plt.title(title)

In [3]:
import ssl, re
ssl._create_default_https_context = ssl._create_unverified_context
pd.__version__

'0.25.3'

In [4]:
# on Github, open CSV file and click on 'Raw'. Copy URL

filename = '03-10-2020.csv'
data_url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/'+filename
orig_ts_df = pd.read_csv(data_url)
display(orig_ts_df.head(4))
print(orig_ts_df.shape)

Unnamed: 0,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,Latitude,Longitude
0,Hubei,Mainland China,2020-03-10T15:13:05,67760,3024,47743,30.9756,112.2707
1,,Italy,2020-03-10T17:53:02,10149,631,724,43.0,12.0
2,,Iran (Islamic Republic of),2020-03-10T19:13:20,8042,291,2731,32.0,53.0
3,,Republic of Korea,2020-03-10T19:13:20,7513,54,247,36.0,128.0


(206, 8)


In [5]:
orig_us_df = orig_ts_df[orig_ts_df['Country/Region'] == 'US'].copy()
orig_us_df = orig_us_df.sort_values(['Country/Region', 'Province/State'])
display(orig_us_df)
print("total confirmed:", orig_us_df.Confirmed.sum())

Unnamed: 0,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,Latitude,Longitude
193,Alaska,US,2020-03-10T02:33:04,0,0,0,61.3707,-152.4044
127,Arizona,US,2020-03-10T02:53:04,6,0,1,33.7298,-111.4312
194,Arkansas,US,2020-03-10T02:33:04,0,0,0,34.9697,-92.3731
40,California,US,2020-03-10T19:13:28,144,2,2,36.1162,-119.6816
94,Colorado,US,2020-03-10T18:33:03,15,0,0,39.0598,-105.3111
162,Connecticut,US,2020-03-10T03:13:17,2,0,0,41.5978,-72.7554
195,Delaware,US,2020-03-10T02:33:04,0,0,0,39.3185,-75.5071
65,Diamond Princess,US,2020-03-10T02:33:04,46,0,0,35.4437,139.638
139,District of Columbia,US,2020-03-10T03:53:03,5,0,0,38.8974,-77.0268
95,Florida,US,2020-03-10T13:13:14,15,2,0,27.7663,-81.6868


total confirmed: 959


In [6]:
ts_df = orig_ts_df[~orig_ts_df['Province/State'].astype(str).map(lambda x: re.search('Diamond Princess', x) is not None)]
ts_df = ts_df[~ts_df['Province/State'].astype(str).map(lambda x: re.search('Grand Princess', x) is not None)]
ts_df = ts_df.sort_values(['Country/Region', 'Province/State'])
print(ts_df.shape)

(202, 8)


In [7]:
us_df = ts_df[ts_df['Country/Region'] == 'US'].copy()
us_df = us_df.sort_values(['Country/Region', 'Province/State'])
display(us_df)
print('total confirmed:', us_df.Confirmed.sum())

Unnamed: 0,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,Latitude,Longitude
193,Alaska,US,2020-03-10T02:33:04,0,0,0,61.3707,-152.4044
127,Arizona,US,2020-03-10T02:53:04,6,0,1,33.7298,-111.4312
194,Arkansas,US,2020-03-10T02:33:04,0,0,0,34.9697,-92.3731
40,California,US,2020-03-10T19:13:28,144,2,2,36.1162,-119.6816
94,Colorado,US,2020-03-10T18:33:03,15,0,0,39.0598,-105.3111
162,Connecticut,US,2020-03-10T03:13:17,2,0,0,41.5978,-72.7554
195,Delaware,US,2020-03-10T02:33:04,0,0,0,39.3185,-75.5071
139,District of Columbia,US,2020-03-10T03:53:03,5,0,0,38.8974,-77.0268
95,Florida,US,2020-03-10T13:13:14,15,2,0,27.7663,-81.6868
90,Georgia,US,2020-03-10T03:53:03,17,0,0,33.0406,-83.6431


total confirmed: 892


In [8]:
countries = ['Mainland China', 'US', 'Italy', 'South Korea', 'Germany', 'France', 'Iran']

lin_df = ts_df.drop(['Latitude', 'Longitude', 'Province/State', 'Last Update'], axis=1)
lin_df = lin_df.groupby('Country/Region', as_index=False).sum()
lin_df = lin_df[lin_df['Country/Region'].isin(countries)]

lin_df['Current Mortality Rate'] = lin_df['Deaths']/(lin_df['Deaths'] + lin_df['Recovered'])
lin_df['Current Mortality Rate'] = lin_df['Current Mortality Rate'].map(lambda x: '{:.1f}%'.format(x*100))

lin_df['WHO Mortality Rate'] = lin_df['Deaths']/lin_df['Confirmed']
lin_df['WHO Mortality Rate'] = lin_df['WHO Mortality Rate'].map(lambda x: '{:.1f}%'.format(x*100))
display(lin_df)

Unnamed: 0,Country/Region,Confirmed,Deaths,Recovered,Current Mortality Rate,WHO Mortality Rate
36,France,1784,33,12,73.3%,1.8%
39,Germany,1457,2,18,10.0%,0.1%
52,Italy,10149,631,724,46.6%,6.2%
62,Mainland China,80757,3136,60106,5.0%,3.9%
109,US,892,28,8,77.8%,3.1%
