#Time Series Prediction Global Covid-19 Cases


In [0]:
%tensorflow_version 2.x # make sure that collab use tensorflow 2
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
import os
import datetime

%matplotlib inline
%config InlineBackend.figure_format='retina'

sns.set(style='whitegrid', palette='muted', font_scale=1.5)
rcParams['figure.figsize'] = 16, 10

RANDOM_SEED = 42

np.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)


In [0]:
downloaded = tf.keras.utils.get_file(origin='https://raw.githubusercontent.com/virgiawan/covid-19-prediction/master/dataset/global_total.csv', fname='global_total.csv', extract=False)
csv_path, _ = os.path.splitext(downloaded+'.csv')
downloaded

In [0]:
# !rm '/root/.keras/datasets/global_total.csv'

In [0]:
csv_path
df = pd.read_csv(csv_path)


In [0]:
univariat_data = df[' Confirmed Case']
dateToString = lambda date: date.strftime("%d/%m/%Y")
base = datetime.datetime(2020, 1, 22, 0, 0)
date_list = [base + datetime.timedelta(days=x) for x in range(len(univariat_data))]
date_list = list(map(dateToString, date_list))
univariat_data.index = date_list
univariat_data

In [0]:
univariat_data.plot(subplots=True, title="Covid 19 Global Occurence")

In [0]:
df_univariat = univariat_data.to_frame()
df_univariat

In [0]:
train_size = int(len(df_univariat) * 0.8)
test_size = len(df_univariat) - train_size
train, test = df_univariat.iloc[0:train_size], df_univariat.iloc[train_size:len(df_univariat)]
print(len(train), len(test))

In [0]:
def create_dataset(X, y, time_steps=1):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        v = X.iloc[i:(i + time_steps)].values
        Xs.append(v)
        ys.append(y.iloc[i + time_steps])
    return np.array(Xs), np.array(ys)

In [0]:
time_steps = 7

# reshape to [samples, time_steps, n_features]

X_train, y_train = create_dataset(train, train[' Confirmed Case'], time_steps)
X_test, y_test = create_dataset(test, train[' Confirmed Case'], time_steps)

print(X_train.shape, y_train.shape)

In [0]:
# print(X_train)
print(X_train[2])
print(X_train[2][1])
y_train[2]

In [0]:
def create_time_steps(length):
  return list(range(-length, 0))
  
def show_plot(plot_data, delta, title):
  labels = ['History', 'True Future', 'Model Prediction']
  marker = ['.-', 'rx', 'go']
  time_steps = create_time_steps(plot_data[0].shape[0])
  if delta:
    future = delta
  else:
    future = 0

  plt.title(title)
  for i, x in enumerate(plot_data):
    if i:
      plt.plot(future, plot_data[i], marker[i], markersize=10,
               label=labels[i])
    else:
      plt.plot(time_steps, plot_data[i].flatten(), marker[i], label=labels[i])
  plt.legend()
  plt.xlim([time_steps[0], (future+5)*2])
  plt.xlabel('Time-Step')
  return plt

In [0]:
show_plot([X_train[0], y_train[0]], 0, 'Sample Example Window 1st')

In [0]:
#@title Default title text
def create_model(x, learning_rate):
  model = keras.Sequential()
  # pakai RNN Sort Term Memory
  # model.add(keras.layers.LSTM(
  #   units=128,
  #   input_shape=(x.shape[1], x.shape[2])
  # ))
  # model.add(keras.layers.Dense(units=1))
  # model.compile(
  #   loss='mean_squared_error',
  #   optimizer=keras.optimizers.Adam(0.001)
  # )

  # simple sequence
  model.add(tf.keras.layers.Dense(units=1, 
                                  input_shape=(x.shape[1], x.shape[2])))
  
  model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=learning_rate),
                loss="mean_squared_error",
                metrics=[tf.keras.metrics.RootMeanSquaredError()])
  return model

In [0]:
model = create_model(X_train, 0.01)
history = model.fit(
    X_train, y_train,
    epochs=500,
    verbose=1,
    batch_size=None,
)