In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# COVID-19 Modelling

Coronavirus disease (COVID-19) is an infectious disease caused by a newly discovered coronavirus.

Most people who fall sick with COVID-19 will experience mild to moderate symptoms and recover without special treatment.

COVID-19 is a new infectious disease. There is much still unknown about how the disease
works, and how it will progress in the South African context.

Due to the rapidly changing nature of the outbreak globally and in South Africa, the projections
are updated regularly as new data become available. 

# Import Libraries

In [None]:
# for numerical analysis
import numpy as np
# to store and process data in dataframe
import pandas as pd

# basic visualization package
import matplotlib.pyplot as plt
# advanced ploting
import seaborn as sns

# interactive visualization
import plotly.express as px
import plotly.graph_objs as go
# import plotly.figure_factory as ff
from plotly.subplots import make_subplots

# datetime oprations
from datetime import datetime

import folium
from ggplot import *
from scipy.integrate import odeint
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# Visualisation libraries
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import folium 
from folium import plugins

# Manipulating the default plot size
plt.rcParams['figure.figsize'] = 10, 12

# Disable warnings 
import warnings
warnings.filterwarnings('ignore')

# Load Dataset

In [None]:
tests = pd.read_csv('/kaggle/input/covid19-sa/data/covid19za_provincial_cumulative_timeline_testing.csv')
confirmed = pd.read_csv('/kaggle/input/covid19-sa/data/covid19za_provincial_cumulative_timeline_confirmed.csv')
recovered = pd.read_csv('/kaggle/input/covid19-sa/data/covid19za_provincial_cumulative_timeline_recoveries.csv')
deaths = pd.read_csv('/kaggle/input/covid19-sa/data/covid19za_provincial_cumulative_timeline_deaths.csv')
transmission = pd.read_csv('/kaggle/input/covid19-sa/data/covid19za_timeline_transmission_type.csv') 

In [None]:
tests['date'] = pd.to_datetime(tests['date'], format='%d-%m-%Y')
tests.tail(5)

In [None]:
confirmed['date'] = pd.to_datetime(confirmed['date'], format='%d-%m-%Y')
confirmed.tail(5)

In [None]:
recovered['date'] = pd.to_datetime(recovered['date'], format='%d-%m-%Y')
recovered.tail(5)

In [None]:
recovered.drop('UNKNOWN',axis=1,inplace=True)
recovered.head(2)

In [None]:
deaths['date'] = pd.to_datetime(deaths['date'], format='%d-%m-%Y')
deaths.head(5)

In [None]:
deaths.drop('UNKNOWN',axis=1,inplace=True)
deaths.head(2)

# Clean-up Data

In [None]:
# Shape and NaN values
tests.shape

In [None]:
tests.isna().sum()

In [None]:
confirmed.shape

In [None]:
confirmed.isna().sum()

In [None]:
confirmed['EC'] = confirmed['EC'].replace(np.nan, 0)
confirmed['FS'] = confirmed['FS'].replace(np.nan, 0)
confirmed['GP'] = confirmed['GP'].replace(np.nan, 0)
confirmed['LP'] = confirmed['LP'].replace(np.nan, 0)
confirmed['NC'] = confirmed['NC'].replace(np.nan, 0)
confirmed['MP'] = confirmed['MP'].replace(np.nan, 0)
confirmed['NW'] = confirmed['NW'].replace(np.nan, 0)
confirmed['WC'] = confirmed['WC'].replace(np.nan, 0)
confirmed['KZN'] = confirmed['KZN'].replace(np.nan, 0)
confirmed['source'] = confirmed['source'].replace(np.nan, 0)
confirmed['UNKNOWN'] = confirmed['UNKNOWN'].replace(np.nan, 0)
confirmed.isna().sum()

In [None]:
recovered.shape

In [None]:
recovered.isna().sum()

In [None]:
deaths.shape

In [None]:
deaths.isna().sum()

In [None]:
deaths['source'] = deaths['source'].replace(np.nan, 0)
deaths.isna().sum()

In [None]:
deaths.head(5)

In [None]:
transmission = transmission.drop(['Unnamed: 4'], axis = 1)
transmission.head(2)

# Visualise Data 

In [None]:
# Create total number of confirmed cases in new data frame
total_cases = pd.concat([confirmed['date'], confirmed['total'],
                         recovered['total'], deaths['total']], axis=1)
print('Total number of confirmed COVID-19 cases across South Africa):', total_cases)

In [None]:
total_cases['confirmed'] = confirmed['total']
total_cases['recovered'] = recovered['total']
total_cases['deaths'] = deaths['total']


In [None]:
total_cases['date'] = pd.to_datetime(total_cases['date'], format='%d-%m-%Y')

In [None]:
total_cases = total_cases.drop(['total', 'total', 'total'], axis=1)

In [None]:
total_cases['recovered'] = total_cases['recovered'].shift(47)
total_cases['deaths'] = total_cases['deaths'].shift(24)


In [None]:
# calculate recovery and deaths rates
total_cases['recov_rate'] = (total_cases['recovered']/total_cases['confirmed'])*100
total_cases['death_rate'] = (total_cases['deaths']/total_cases['confirmed'])*100

In [None]:
total_cases.tail(5)

In [None]:
# C
total_cases = pd.concat([confirmed['date'], confirmed['total'],
                         recovered['total'], deaths['total']], axis=1)

In [None]:
#Visualizations using Seaborn
f, ax = plt.subplots(figsize=(12, 8))
data = total_cases[['Confirmed','Recovered','Deaths']]
data.sort_values('Total cases',ascending=False,inplace=True)
sns.set_color_codes("pastel")
sns.barplot(x="Total cases", y="Name of State / UT", data=data,label="Total", color="r")

sns.set_color_codes("muted")
sns.barplot(x="Cured", y="Name of State / UT", data=data, label="Recovered", color="g")


# Add a legend and informative axis label
ax.legend(ncol=2, loc="lower right", frameon=True)
ax.set(xlim=(0, 35), ylabel="",xlabel="Cases")
sns.despine(left=True, bottom=True)

In [None]:

# plot the number of positive cases and recovered
plt.figure(figsize=(8, 5))

plt.plot(total_cases['date'], total_cases['confirmed'],
         color='green', label='Confirmed cases')
plt.plot(total_cases['date'], total_cases['recovered'],
         color='black', label='Recovered')
plt.plot(total_cases['date'], total_cases['deaths'],
         color='red', label='Deaths')

# show the legend
plt.legend()
# set axis labels
plt.ylabel('Total cases')
plt.xlabel('Time')

plt.show()

In [None]:
temp = total_cases[['Date','Deaths', 'Recovered']].tail(1)
temp = temp.melt(id_vars="Date", value_vars=['Active', 'Deaths', 'Recovered'])
fig = px.treemap(temp, path=["variable"], values="value", height=225, 
                 color_discrete_sequence=[act, rec, dth])
fig.data[0].textinfo = 'label+text+value'
fig.show()

In [None]:
#Calculate total number of cases
df['Total Active']= df['Total Cases']- df['Total Deaths'] + ['Total Recovered']

# SEIR Modelling

The SEIR model is a mathematical equation describing the population dynamics of infectious diseases. 

The acronym SEIR is explained below:

Susceptible (S) – individuals in a population who have not yet been infected and could potentially catch the infection.

Exposed (E) - individuals who might have the virus but do not show any symptoms (in the case of COVID-19, could have travelled from a COVID hotspot, these (asymptomatic) individuals can still transmit the disease to susceptible individuals.

Infectious (I) – individual who are currently infected (active cases) and could potentially infect others they come in contact with.

Recovered (R) – individuals who have recovered (or have died) from the disease and are thereby immune to further infections.

In [None]:
#Parameters for SEIR model

S_0 = 11.0e+6  # South Africa excluding initial infected, exposed population,

I_0 = 1  # initial infected from market

E_0 = 20. * I_0  # initial exposed


R_0 = 0  # initial recovered (not to be confused with R_zero, below)
# initially, no one has recovered

c = 0.0  # no mutation (yet)
# maybe this happens later?

N = S_0 + I_0 + E_0 + R_0  # N = total population

In [None]:
(ggplot(total, aes(x='date', y='ML'))
    + geom_line()
    + labs(x='Date', y='Reproductive number')
    + geom_hline(yintercept=1, linetype='dotted', color='blue')
    + geom_vline(xintercept=['2020-03-26', '2020-05-01',
                             '2020-06-01', '2020-08-18'], linetype='dotted', color='red')
    + annotate('text', x='2020-04-10', y=3, label='Level 5', size=6)
    + annotate('text', x='2020-05-15', y=3, label='Level 4', size=6)
    + annotate('text', x='2020-07-01', y=3, label='Level 3', size=6)
    + annotate('text', x='2020-09-01', y=3, label='Level 2', size=6)
    + theme_classic()
 )

In [None]:
# total population
N = 59431006

# recoveries & active infections
I0 = 1
R0 = 0

# susceptible
S0 = N - I0 - R0

# reproductive number
B = 2.07

# recovery rate
G = 1/14

# set the number of days to 250
t = range(0, 250)

In [None]:
# Differential equations of the SEIR Model
def derivative(y, t, N, B, G):
    S, I, E, R = y
    dSdt = -B * S * I / N
    dIdt = B * S * I / N - G * I
    dRdt = G * I
    return dSdt, dIdt, dRdt

In [None]:
# initial conditions
y0 = S0, I0, R0
ret = odeint(derivative, y0, t, args=(N, B, G))
S, I, R = ret.T

# create a dataframe for S, I and R
infection_model = pd.DataFrame(
    {'susceptible': S,
     'infected': I,
     'recovered': R,
     'days': t})

In [None]:
infection_model.tail()

In [None]:
ggplot(infection_model, aes(x='days')) + \
    geom_line(aes(y='recovered'), color='red') + \
    geom_line(aes(y='infected'), color='black') + \
    xlab('Time (days)') + \
    ylab('Number of cases') + \
    theme_bw() + \
    theme()