# Imports

In [0]:
# Testing Kuwait Graph Publication
> Something I will post later on

- toc:true- branch: master- badges: true- comments: true
- author: Nabeel Khan
- categories: [Kuwait, COVID-19, jupyter]

In [1]:
#hide_input
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly
import plotly.express as px
import plotly.graph_objects as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot, plot_mpl
import plotly.offline as py

plotly.offline.init_notebook_mode(connected=True)
plt.rcParams.update({'font.size': 14})

import plotly.io as pio
pio.renderers.default = 'colab'

  import pandas.util.testing as tm


# Data

Import & Clean Data

In [0]:
#hide_input
confirmed_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
deaths_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
recoveries_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')

kuwait_df = pd.read_csv('https://raw.githubusercontent.com/nabeelkhan/COVID19/master/data/Time_Series_COVID19_Kuwait.csv')


# Drop date columns if they are mostly NaN
na_columns = (confirmed_df.isna().sum() / confirmed_df.shape[0]) > 0.99
na_columns = na_columns[na_columns]

confirmed_df = confirmed_df.drop(na_columns.index, axis=1)
deaths_df = deaths_df.drop(na_columns.index, axis=1)
recoveries_df = recoveries_df.drop(na_columns.index, axis=1)

## Tidy up the data
confirmed_df = confirmed_df.melt(id_vars=['Country/Region', 'Province/State', 'Lat', 'Long'], var_name='date', value_name='confirmed')
deaths_df = deaths_df.melt(id_vars=['Country/Region', 'Province/State', 'Lat', 'Long'], var_name='date', value_name='deaths')
recoveries_df = recoveries_df.melt(id_vars=['Country/Region', 'Province/State', 'Lat', 'Long'], var_name='date', value_name='recoveries')

# Date Conversion
confirmed_df['date'] = pd.to_datetime(confirmed_df['date'])
deaths_df['date'] = pd.to_datetime(deaths_df['date'])
recoveries_df['date'] = pd.to_datetime(recoveries_df['date'])
kuwait_df['Date'] = pd.to_datetime(kuwait_df['Date'])

In [0]:
# recoveries_df[recoveries_df["Country/Region"]=="Kuwait"]

In [0]:
# confirmed_df[confirmed_df['Country/Region']== 'Kuwait']
# confirmed_df[confirmed_df['confirmed'] > 0 & (confirmed_df['Country/Region']=='Canada')]

In [0]:
# deaths_df[deaths_df["Country/Region"]=="Kuwait"]

In [0]:
# kuwait_df

Functions

In [0]:
def add_rates(df):
    df['Confirmed Change'] = df['Confirmed'].diff().shift(-1)
 
    df['Mortality Rate'] = df['Deaths'] / df['Confirmed'].fillna(0)
    df['Recovery Rate'] = df['Recoveries'] / df['Confirmed'].fillna(0)
    df['Growth Rate'] = df['Confirmed Change'] / df['Confirmed'].fillna(0)
    df['Growth Rate Change'] = df['Growth Rate'].diff().shift(-1)
    df['Growth Rate Accel'] = df['Growth Rate Change'] / df['Growth Rate'].fillna(0)
    return df

# Dataframes

In [8]:
#hide_input
# Full DataFrame -> Missing Canadian Details
full_df = confirmed_df.merge(recoveries_df).merge(deaths_df)
full_df = full_df.rename(columns={'Country/Region': 'Country', 'date': 'Date', 'confirmed': "Confirmed", "recoveries": "Recoveries", "deaths": "Deaths"})
# Check nulls
full_df.isnull().sum()

Country               0
Province/State    13706
Lat                   0
Long                  0
Date                  0
Confirmed             0
Recoveries            0
Deaths                0
dtype: int64

In [0]:
#hide_input
# Canada df
confirmed_ca=confirmed_df[confirmed_df["Country/Region"]=="Canada"]
recoveries_ca=recoveries_df[recoveries_df["Country/Region"]=="Canada"]
deaths_ca=deaths_df[deaths_df["Country/Region"]=="Canada"]

ca_df = confirmed_ca.merge(recoveries_ca).merge(deaths_ca)
ca_df = ca_df.rename(columns={'Country/Region': 'Country', 'date': 'Date', 'confirmed': 'Confirmed', 'recoveries': 'Recoveries', 'deaths': 'Deaths'})

canada_df = ca_df.groupby(['Date']).agg({'Confirmed': ['sum'], 'Recoveries': ['sum'], 'Deaths': ['sum']}).reset_index()
canada_df.columns = canada_df.columns.get_level_values(0)

canada_df = add_rates(canada_df)

In [0]:
#hide_input
# World df
world_df = full_df.groupby(['Date']).agg({'Confirmed': ['sum'], 'Recoveries': ['sum'], 'Deaths': ['sum']}).reset_index()
world_df.columns = world_df.columns.get_level_values(0)

world_df = add_rates(world_df)

In [11]:
#hide_input
# Drop 0 Values

kuwait_df.shape

(43, 5)

In [12]:
#hide_input
kuwait_df[kuwait_df['Case'] == 0]

Unnamed: 0,Date,Case,Critical,Deceased,Recovered
31,2020-03-06,0,0,0,0
33,2020-03-04,0,2,0,0
34,2020-03-03,0,0,0,0
37,2020-02-29,0,0,0,0


In [13]:
#hide_input
kuwait_df['Case'].describe()

count     43.000000
mean      15.465116
std       22.995185
min        0.000000
25%        3.000000
50%        8.000000
75%       17.000000
max      109.000000
Name: Case, dtype: float64

In [0]:
#hide_input
kuwait_df = kuwait_df.drop([31, 33, 34, 37], axis=0).reset_index()

In [0]:
#hide_input
# Kuwait df
 kuwait_df = kuwait_df.sort_values(by=['Date'], ignore_index=True)

kuwait_df['Confirmed'] = kuwait_df['Case'].cumsum()
kuwait_df['Recoveries'] = kuwait_df['Recovered'].cumsum()
kuwait_df['Deaths'] = kuwait_df['Deceased'].cumsum()

In [0]:
#hide_input
# kuwait_df['Conf_cs Diff n shift'] = kuwait_df['Confirmed_cs'].diff().shift(-1)
# kuwait_df['Conf_cs Diff'] = kuwait_df['Confirmed_cs'].diff()
# kuwait_df['Conf_cs shift'] = kuwait_df['Confirmed_cs'].shift(-1)

# kuwait_df = kuwait_df.rename(columns={'Case': 'Confirmed', 'Recovered': 'Recoveries', 'Deceased':'Deaths'})

kuwait_df = add_rates(kuwait_df)

In [17]:
#hide_input
kuwait_df[kuwait_df['Case'] == 0]

Unnamed: 0,index,Date,Case,Critical,Deceased,Recovered,Confirmed,Recoveries,Deaths,Confirmed Change,Mortality Rate,Recovery Rate,Growth Rate,Growth Rate Change,Growth Rate Accel


In [0]:
#hide_input
kuwait = kuwait_df.groupby(['Date']).agg({'Confirmed': ['sum'], 'Recoveries': ['sum'], 'Deaths': ['sum']}).reset_index()
kuwait.columns = kuwait.columns.get_level_values(0) 

kuwait = add_rates(kuwait)

# Kuwaiti Cases

In [19]:
#hide_input
print('Kuwait Data Describe')
kuwait_df.describe()

Kuwait Data Describe


Unnamed: 0,index,Case,Critical,Deceased,Recovered,Confirmed,Recoveries,Deaths,Confirmed Change,Mortality Rate,Recovery Rate,Growth Rate,Growth Rate Change,Growth Rate Accel
count,39.0,39.0,39.0,39.0,39.0,39.0,39.0,39.0,38.0,39.0,39.0,38.0,37.0,37.0
mean,19.692308,17.051282,0.538462,0.025641,2.641026,177.230769,30.923077,0.076923,17.368421,0.000138,0.11925,0.16113,-0.027134,0.493324
std,12.445997,23.594099,0.883955,0.160128,2.942315,151.007574,33.886331,0.269953,23.826423,0.00049,0.095923,0.289927,0.172092,1.669147
min,0.0,1.0,0.0,0.0,0.0,5.0,0.0,0.0,1.0,0.0,0.0,0.005319,-0.70979,-0.928865
25%,9.5,4.0,0.0,0.0,0.0,64.5,1.5,0.0,4.0,0.0,0.023113,0.04372,-0.038736,-0.456209
50%,19.0,10.0,0.0,0.0,2.0,142.0,15.0,0.0,10.5,0.0,0.105634,0.075624,0.01042,0.136364
75%,28.5,17.0,1.0,0.0,4.0,230.0,60.5,0.0,17.0,0.0,0.200416,0.110063,0.036923,0.923077
max,42.0,109.0,4.0,1.0,11.0,665.0,103.0,1.0,109.0,0.002088,0.27234,1.363636,0.195169,8.782609


In [20]:
#hide_input
print('Kuwait Data Frame')
kuwait_df

Kuwait Data Frame


Unnamed: 0,index,Date,Case,Critical,Deceased,Recovered,Confirmed,Recoveries,Deaths,Confirmed Change,Mortality Rate,Recovery Rate,Growth Rate,Growth Rate Change,Growth Rate Accel
0,42,2020-02-24,5,0,0,0,5,0,0,6.0,0.0,0.0,1.2,0.163636,0.136364
1,41,2020-02-25,6,0,0,0,11,0,0,15.0,0.0,0.0,1.363636,-0.70979,-0.520513
2,40,2020-02-26,15,0,0,0,26,0,0,17.0,0.0,0.0,0.653846,-0.607335,-0.928865
3,39,2020-02-27,17,0,0,0,43,0,0,2.0,0.0,0.0,0.046512,-0.024289,-0.522222
4,38,2020-02-28,2,0,0,0,45,0,0,1.0,0.0,0.0,0.022222,0.195169,8.782609
5,36,2020-03-01,1,0,0,0,46,0,0,10.0,0.0,0.0,0.217391,-0.181677,-0.835714
6,35,2020-03-02,10,0,0,0,56,0,0,2.0,0.0,0.0,0.035714,0.01601,0.448276
7,32,2020-03-05,2,0,0,1,58,1,0,3.0,0.0,0.017241,0.051724,-0.002544,-0.04918
8,30,2020-03-07,3,0,0,0,61,1,0,3.0,0.0,0.016393,0.04918,-0.033555,-0.682292
9,29,2020-03-08,3,1,0,0,64,1,0,1.0,0.0,0.015625,0.015625,0.045913,2.938462


In [0]:
#hide_input
def plot_aggregate_metrics(df, fig=None):
    if fig is None:
        fig = go.Figure()
    # Set options common to all traces with fig.update_traces
    # fig.update_traces(mode='markers', marker_line_width=2, marker_size=10)
    fig.update_layout(template='ggplot2', yaxis_zeroline=False, xaxis_zeroline=False, xaxis_title='Timeline <br> <br> Data Source: MOH Kuwait <br> Copyright: Nabeel Khan (nabeelkhan.com/ @TheNabeelKhan)',
                      yaxis_title='No. Of Cases', title={'text': 'Kuwaiti Cases as of 06/04/2020', 'y':0.9, 'x':0.5, 'xanchor': 'center', 'yanchor': 'top'})
    fig.add_trace(go.Scatter(x=df['Date'], 
                             y=df['Confirmed'],
                             mode='lines+markers',
                             name='Confirmed',
                             #line=dict(color='Yellow', width=2)
                            ))
    fig.add_trace(go.Scatter(x=df['Date'], 
                             y=df['Deaths'],
                             mode='lines+markers',
                             name='Deaths',
                             #line=dict(color='Red', width=2)
                            ))
    fig.add_trace(go.Scatter(x=df['Date'], 
                             y=df['Recoveries'],
                             mode='lines+markers',
                             name='Recoveries',
                             #line=dict(color='Green', width=2)
                            ))
    return fig

## Total Kuwaiti Cases as of 06/04/2020

In [22]:
#hide_input
plot_aggregate_metrics(kuwait_df).show()

## Daily Percent Change in Growth Rate

Useful for tracking whether the growth rate is increasing. Any positive percentage indicates exponential growth.

In [23]:
#hide_input
fig = go.Figure()
fig.update_layout(template='ggplot2', yaxis_zeroline=False, xaxis_zeroline=False, xaxis_title='Timeline <br> <br> Data Source: MOH Kuwait <br> Copyright: Nabeel Khan (nabeelkhan.com/ @TheNabeelKhan)', yaxis_title='Growth Rate',    title={
        'text': 'Kuwaiti Cases as of 06/04/2020',
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})

tmp_df = kuwait_df.copy()
tmp_df = tmp_df[tmp_df['Growth Rate Accel'] < 10]

fig.add_trace(go.Scatter(x=tmp_df['Date'], 
                         y=tmp_df['Growth Rate Accel'],
                         mode='lines+markers',
                         name='Growth Acceleration',
                         #line=dict(color='Green', width=3)
                         ))
fig.update_layout(yaxis=dict(tickformat=".2%"))

fig.show()

## Confirmed Cases by Country