In [3]:
# import warnings filter 
from warnings import simplefilter
# ignore all future warnings
simplefilter(action='ignore', category=FutureWarning)

# Importing Dask packages
import dask.dataframe as dd
import dask.array as da

#import otheer needed python packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

In [4]:
import os
import wget

# Extracting the dataset online
### To overwrite the previous datasets when re-running
if 'covid19_data.csv' in os.listdir("../data"):
    os.remove('/home/qwerty/Documents/Time Series Forecasting//data/covid19_data.csv')
    path = '/home/qwerty/Documents/Time Series Forecasting//data/covid19_data.csv'
    url =  "https://raw.githubusercontent.com/datasets/covid-19/master/data/time-series-19-covid-combined.csv"
    wget.download(url, path)
else:
    path = '/home/qwerty/Documents/Time Series Forecasting/data/covid19_data.csv'
    url =  "https://raw.githubusercontent.com/datasets/covid-19/master/data/time-series-19-covid-combined.csv"
    wget.download(url, path)
    
# Reading the dataset
data = dd.read_csv('../data/covid19_data.csv',
                   dtype={'Confirmed': 'float64','Deaths': 'float64', 'Recovered':'float64','Lat':'float64', 'long':'float64'})

URLError: <urlopen error [Errno -2] Name or service not known>

### Preprocessing the datasets

In [None]:
data.head()

In [None]:
data.compute().describe()

In [None]:
data.compute().corr()

In [None]:
data.compute().info()

In [None]:
#Parsing the 'Date' feature to a date datatype 
data['Date'] = dd.to_datetime(data['Date'], infer_datetime_format=True) 

In [None]:
data.compute().info()

In [None]:
# Feature Engineering
data['CurrentCase'] = data['Confirmed'] - data['Recovered'] - data['Deaths']

In [None]:
data.head()

In [None]:
# Aggregating all cases in the world and grouping by date
date_df = data.groupby('Date')[['Confirmed' ,'Recovered', 'Deaths', 'CurrentCase']].sum()

In [None]:
date_df.head()

In [None]:
plt.figure(figsize=(15,10))
plt.plot(date_df["Recovered"].compute(),color='g',label='Recovered')
plt.plot(date_df["Confirmed"].compute(),color='b', label='Confirmed')
plt.plot(date_df["Deaths"].compute(),color='r',label='Deaths')
plt.plot(date_df['CurrentCase'].compute(),color='k', label='Current cases')
plt.title('Global SARS Cov 2 over time')
plt.legend();

In [None]:
# Model initialization
from fbprophet import Prophet
model = Prophet()

In [None]:
# Add seasonality
model.add_seasonality(name='Monthly', period=30.42, fourier_order=5)

In [None]:
### Splitting datasets operation
global_cases = date_df.reset_index()
confirmed_df = global_cases[['Date', 'Confirmed']]
recovered_df = global_cases[['Date', 'Recovered']]
deaths_df = global_cases[['Date', 'Deaths']]
current_df = global_cases[['Date', 'CurrentCase']]

In [None]:
# For Prophet to predict, its required for the names on the variables to be ds and y
def rename_func(dataframe):
    cols = dataframe.columns
    dataframe = dataframe.rename(columns= {cols[0]: 'ds', cols[1]:'y'})
    return dataframe

In [None]:
confirmed_df = rename_func(confirmed_df)
recovered_df = rename_func(recovered_df)
deaths_df = rename_func(deaths_df)
current_df = rename_func(current_df)

In [None]:
# Get overview of changes
confirmed_df.head()

In [None]:
def train_test_split(dataframe, ratio):
    """
    Parameters:
    dataframe- Dataframe required to be split
    ratio: Percentage of training split in the scale of 0-100
    """
    divisor = round((ratio/100)*dataframe.compute().shape[0])
    #print(f'Division occurs at index {divisor}')
    train = dataframe.compute().iloc[:divisor]
    test = dataframe.compute().iloc[divisor:]
    return train, test, divisor

### Confirmed cases Prediction operations

In [None]:
confirmed_train, confirmed_test, divisor = train_test_split(confirmed_df, 70)

In [None]:
confirmed_train.shape, confirmed_test.shape

In [None]:
# Fitting model
model.fit(confirmed_train)

In [None]:
# Creating future dates
future_dates = model.make_future_dataframe(periods=40)

In [None]:
# Making predictions
predictions = model.predict(future_dates)

In [None]:
predictions.head()

In [None]:
predictions.columns

In [None]:
predictions.yhat.head()

In [None]:
model.plot(predictions);

In [None]:
model.plot_components(predictions);

In [None]:
def check_metrics(test, predictions):
    mse_score = np.sqrt(mean_squared_error(test['y'],predictions['yhat'].iloc[divisor:]))
    R2_score = r2_score(test['y'],predictions['yhat'].iloc[divisor:])
    print(f"MSE: {mse_score}")
    print(f"R-square Score: {R2_score}")

In [None]:
check_metrics(confirmed_test, predictions)

In [None]:
from fbprophet.plot import add_changepoints_to_plot
fig = model.plot(predictions)
changes = add_changepoints_to_plot(fig.gca(), model, predictions)

In [None]:
def determinate_beta_gamma_delta(data=None):
    '''
        this function compute transmission rate, recovered rate and fatalities rate over time
        params: data
        return: beta, gamma, delta
    '''
    
    beta = []
    gamma = []
    delta = []
    
    for t in range(len(data.Date.compute().values)):
        
        x = data.Confirmed.compute().iloc[t]
        y = data.Deaths.compute().iloc[t]
        z = data.Recovered.compute().iloc[t]
        w = data.CurrentCase.compute().iloc[t]
        
        if x == 0.0:
            beta.append(0)
            gamma.append(0)
            delta.append(0)
        else:
            beta_t = w/x
            gamma_t = z/x
            delta_t = y/x
            
            beta.append(beta_t)
            gamma.append(gamma_t)
            delta.append(delta_t)
            
    return np.array(beta), np.array(gamma), np.array(delta)        

In [None]:
transmission, recovery, fatality = determinate_beta_gamma_delta(data=global_cases)

In [None]:
parameter_dynamic = pd.DataFrame()
parameter_dynamic['beta'] = transmission
parameter_dynamic['gamma'] = recovery
parameter_dynamic['delta'] = fatality
parameter_dynamic['Date'] = global_cases.Date
parameter_dynamic = parameter_dynamic.set_index('Date')

In [None]:
parameter_dynamic.head()

In [None]:
def find_R0(data=None):
    '''
        This function compute R0 over time
        params: data
        return: R0
    '''
    return data.beta.values/(data.gamma.values + data.delta.values)

In [None]:
#Compute R0
parameter_dynamic['R0'] = find_R0(data=parameter_dynamic)
print(parameter_dynamic['R0'].min())
parameter_dynamic.columns=['\u03B2', '\u03B3', '\u03B4','R0']
parameter_dynamic.columns

In [None]:
parameter_dynamic[['\u03B2', '\u03B3', '\u03B4']].plot(figsize=(15,7))
plt.legend(loc='best')
plt.title('parameter dynamics for spreading of SARS Cov 2 in the world');

In [None]:
# Plot R0
parameter_dynamic['R0'].plot(figsize=(15,7))
plt.legend(loc='best')
plt.title('ratio reproductive number for SARS Cov 2 Globally');

In [None]:
def growth_rate(data=None):
    """
        This function compute a growth rate of one variable
        params: data
        return: growth rate x
    
    """
    x = []
    x.append(0)
    for i in range(data.compute().shape[0]-1):
        a = data.compute().iloc[i+1]-data.compute().iloc[i]
        b = a/data.compute().iloc[i]
        x.append(b)
        
    return np.array(x)

In [None]:
growth_rate_currentCase = pd.DataFrame(growth_rate(data=global_cases.CurrentCase), columns=['currentCase'])

In [None]:
growth_rate_currentCase.plot(figsize=(15,7))
plt.legend(loc='best')
plt.title('Infective growth rate for Covid 19 disease in globally');