In [None]:
import numpy as np
import pandas as pd

pd.set_option('display.max_rows', None)
np.set_printoptions(suppress=True)
df = pd.read_csv('data/covid_19_data.csv')

In [None]:
# I referred code regarding data preprocessing from https://www.kaggle.com/chirag9073/coronavirus-covid-19-outbreak-data-analysis

df.drop(['SNo'], axis=1, inplace=True)
df['ObservationDate'] = df['ObservationDate'].apply(pd.to_datetime)

In [None]:
df.head(10)

In [None]:
df.isnull().sum()

In [None]:
df[df['Province/State'].isnull()].head(10)

In [None]:
# Current situation
grouped_df = df.groupby(['Country/Region', 'Province/State'])['Confirmed', 'Deaths', 'Recovered'].max()
grouped_df.style.background_gradient(cmap='Pastel1_r')

In [None]:
# Top 10 Countries with most no. of reported cases
latest_df = df[df['ObservationDate'] == max(df['ObservationDate'])].reset_index()
grouped_df = latest_df.groupby('Country/Region')['Confirmed', 'Deaths', 'Recovered'].sum().reset_index()

temp = grouped_df[['Country/Region', 'Confirmed']]
temp = temp.sort_values(by='Confirmed', ascending=False)
temp = temp.reset_index(drop=True)
temp.head(10).style.background_gradient(cmap='Pastel1_r')

In [None]:
# Countries with all the cases recovered
temp = grouped_df[grouped_df['Confirmed']==grouped_df['Recovered']]
temp = temp[['Country/Region', 'Confirmed', 'Recovered']]
temp = temp.sort_values('Confirmed', ascending=False)
temp = temp.reset_index(drop=True)
temp.style.background_gradient(cmap='Greens')

In [None]:
# Most recent stats
world_daily_df = df.groupby('ObservationDate')['Confirmed', 'Deaths', 'Recovered'].sum()
world_daily_df = world_daily_df.reset_index()
world_daily_df = world_daily_df.sort_values('ObservationDate', ascending=False)
world_daily_df.head(1).style.background_gradient(cmap='Pastel1')

In [None]:
world_daily_df.style.background_gradient(cmap='Pastel1')

In [None]:
world_daily_df.isnull().sum()

In [None]:
import datetime
import os
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from sklearn.preprocessing import StandardScaler
from keras.callbacks import EarlyStopping, ModelCheckpoint


class PandemicRegressor():
    def __init__(self, window_size, batch_size, n_feature):
        self.window_size = window_size
        self.batch_size = batch_size
        self.n_feture = n_feature
        save_dir = os.path.join(os.path.join(os.getcwd(), 'model'), 
                                datetime.datetime.now().strftime('%Y-%m-%d;%H.%M.%S'))
        os.mkdir(save_dir)
        self.model_path = os.path.join(save_dir, 'pdm_reg.h5')
        self.callbacks = [
			EarlyStopping(monitor='val_loss', patience=20)
		]
        self.reg = self.build_model()
    def __del__(self):
        self.reg.save(self.model_path)
        print('saved in ' + self.model_path)
        
    def build_model(self):
        model = Sequential([
            LSTM(16, input_shape=(self.window_size, self.n_feature), dropout=0.5),
            #LSTM(64, batch_input_shape=(self.batch_size, self.window_size, 1), dropout=0.5, stateful=True),
            Dense(16, activation='relu'),
            Dropout(0.5),
            Dense(self.n_feature, activation='linear')
        ])
        model.compile(loss='mean_squared_error', 
                      optimizer='adam',
                      metrics=['mae', 'mape'])
        return model
    
    def fit(self, X_train, y_train, X_val, y_val, epochs=500):
        hist = self.reg.fit(X_train, 
                            y_train, 
                            epochs=epochs, 
                            batch_size=self.batch_size,
                            validation_data=(X_val, y_val),
                            callbacks=self.callbacks,
                            verbose=1,
                           shuffle=False)
        return hist
    
    def evaluate(self, X_test, y_test):
        loss_and_metrics = self.reg.evaluate(X_test, 
                                             y_test, 
                                             batch_size=self.batch_size)
        return loss_and_metrics
    
    def save(self):
        self.reg.save(self.model_path)
        print('saved in ' + self.model_path)
        
pdm_reg = PandemicRegressor(window_size=WINDOW_SIZE, batch_size=32)

In [None]:
"""
Predict the global spread of coronavirus
"""
%matplotlib inline
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

def show_train_hist(hist):
    plt.plot(hist.history['loss'], 'b-', label='train_loss')
    plt.plot(hist.history['val_loss'], 'r', label='val_loss')
    plt.title("loss history")
    plt.legend()
    plt.tight_layout()
    plt.show()

# Normalize
_mean = np.mean(world_daily_df[['Confirmed', 'Deaths', 'Recovered']].values, axis=0)
_std = np.std(world_daily_df[['Confirmed', 'Deaths', 'Recovered']].values, axis=0)

print(_mean)
print(_std)

In [None]:
WINDOW_SIZE = 4

def seq2dataset(sr, window_size):
        seq_data = []
        for i in range(len(sr) - window_size):
            subset = list(sr[i:(i+window_size+1)])
            subset.reverse()
            seq_data.append(subset)
        seq_data.reverse()
        seq_data = np.array(seq_data)
        return seq_data[:,0:window_size,:], seq_data[:,window_size,:]

In [None]:
X, y = seq2dataset(world_daily_df[['Confirmed', 'Deaths', 'Recovered']].values, WINDOW_SIZE)
X

In [None]:
y

In [None]:
X = (X - _mean) / _std

In [None]:
X

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, shuffle=False)

In [None]:
X_train

In [None]:
X_test

In [None]:
y_train

In [None]:
y_test

In [None]:
hist = pdm_reg.fit(X_train=X_train, 
                       y_train=y_train, 
                       X_val=X_test, 
                       y_val=y_test,
                       epochs=300)

show_train_hist(hist)
loss_and_metrics = pdm_reg.evaluate(X_test=X_test, 
                     y_test=y_test)
print(f'[{col_name}] MAE: {loss_and_metrics[1]}, MAPE: {loss_and_metrics[2]}')

print('Train is done..')

In [None]:
nation_df = df[df['Country/Region'] == 'South Korea'].groupby('ObservationDate')['Confirmed', 'Deaths', 'Recovered'].sum()
nation_df = nation_df.reset_index()
nation_df = nation_df.sort_values('ObservationDate', ascending=False)
nation_df.style.background_gradient(cmap='Pastel1')

In [None]:
print(df['Country/Region'].unique())

In [None]:
# for nation_name in df['Country/Region'].unique():
# pdm_reg = PandemicRegressor(window_size=WINDOW_SIZE, batch_size=1)

for nation_name in ['South Korea']:
    nation_df = df[df['Country/Region'] == nation_name].groupby('ObservationDate')['Confirmed', 'Deaths', 'Recovered'].sum()
    nation_df = nation_df.reset_index()
    nation_df = nation_df.sort_values('ObservationDate', ascending=False)
    
    print(f'[{nation_name} spread of coronavirus19]')
    
    # Normalize
    _mean = np.mean(nation_df[['Confirmed', 'Deaths', 'Recovered']].values, axis=0)
    _std = np.std(nation_df[['Confirmed', 'Deaths', 'Recovered']].values, axis=0)
    
    X, y = seq2dataset(nation_df[['Confirmed', 'Deaths', 'Recovered']].values, WINDOW_SIZE)
    X = (X - _mean) / _std
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, shuffle=False)
        
    hist = pdm_reg.fit(X_train=X_train, 
                       y_train=y_train, 
                       X_val=X_test, 
                       y_val=y_test,
                       epochs=300)

    show_train_hist(hist)
    loss_and_metrics = pdm_reg.evaluate(X_test=X_test, 
                                            y_test=y_test)
    print(f'[{col_name}] MAE: {loss_and_metrics[1]}, MAPE: {loss_and_metrics[2]}')
       
print('Train is done..')

In [None]:
pdm_reg.save()