## Pediction New Death Cases Global Covid-19 Cases

## Load Data and Import Libraries

In [None]:
# Use some functions from tensorflow_docs
!pip install -q git+https://github.com/tensorflow/docs

In [None]:
%tensorflow_version 2.x # make sure that collab use tensorflow 2
import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp
from tensorflow import keras
import pandas as pd
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
import os
import datetime
import tensorflow_docs as tfdocs
import tensorflow_docs.plots
import tensorflow_docs.modeling
from google.colab import drive
drive.mount('/content/drive')

%matplotlib inline
%config InlineBackend.figure_format='retina'

sns.set(style='whitegrid', palette='muted', font_scale=1.5)
rcParams['figure.figsize'] = 16, 10

In [None]:
# !rm '/root/.keras/datasets/global_total.csv'

## Load Data


In [None]:
df_new_cases = pd.read_csv("https://raw.githubusercontent.com/virgiawan/covid-19-prediction/linear-regression/dataset/corona-virus/new_cases.csv")

In [None]:
def plot_series(time, series, format="-", start=0, end=None):
    plt.plot(time[start:end], series[start:end], format)
    plt.xlabel("Time")
    plt.ylabel("Value")
    plt.grid(True)


In [None]:
step = 0;
times = []
series = []

for case in df_new_cases['World']:
  times.append(step)
  series.append(case)
  step += 1

In [None]:
plot_series(times, series)
print('Total data {} series'.format(len(series)))

In [None]:
# Series 0 - 63 indicate flat data. Data not increased significantly. 
# Try to ignore it first
skip = 63
used_series = series[skip:]
used_times = times[skip:]

plot_series(used_times, used_series)
print('Total data {} series'.format(len(used_series)))

In [None]:
split_percentage = 0.70
split_time = (int) (len(used_times) * split_percentage)

time_train = used_times[:split_time]
x_train = used_series[:split_time]
time_valid = used_times[split_time:]
x_valid = used_series[split_time:]

In [None]:
# create DNN window
def windowed_dataset_dnn(series, window_size, batch_size, shuffle_buffer):
  series = tf.expand_dims(series, axis=-1)
  dataset = tf.data.Dataset.from_tensor_slices(series)
  dataset = dataset.window(window_size + 1, shift=1, drop_remainder=True)
  dataset = dataset.flat_map(lambda window: window.batch(window_size + 1))
  dataset = dataset.shuffle(shuffle_buffer)
  dataset = dataset.map(lambda window: (window[:-1], window[-1]))
  dataset = dataset.batch(batch_size).prefetch(1)
  return dataset


In [None]:
# define hyper parameter
window_size = 20
batch_size = 2
shuffle_buffer_size = 10
epochs = 100

In [None]:
tf.keras.backend.clear_session()
tf.random.set_seed(51)
np.random.seed(51)

dataset = windowed_dataset_dnn(x_train, window_size, batch_size, shuffle_buffer_size)


l0 = tf.keras.layers.Conv1D(filters=32, kernel_size=5,
                      strides=1, padding="causal",
                      activation="relu",
                      input_shape=[None, 1])
l1 = tf.keras.layers.Dense(32, input_shape=[window_size], activation='relu')
l2 = tf.keras.layers.Dense(32, activation='relu')
l3 = tf.keras.layers.Dense(1)
l4 = tf.keras.layers.Lambda(lambda x: x * 10000)

model = tf.keras.models.Sequential([l0, l1, l2, l3, l4])

lr_schedule = tf.keras.callbacks.LearningRateScheduler(
    lambda epoch: 1e-8 * 10**(epoch / 20))
optimizer = tf.keras.optimizers.SGD(lr=1e-8, momentum=0.9)
model.compile(loss=tf.keras.losses.Huber(), optimizer=optimizer, metrics=['mae'])
history = model.fit(dataset, epochs=epochs, callbacks=[lr_schedule], verbose=0)

In [None]:
len_data = 0
for window_dataset in dataset:
  len_data += 1
print('Windows number: {}'.format(len_data))

In [None]:
plt.semilogx(history.history["lr"], history.history["loss"])
plt.axis([1e-8, 10, 0, 100000])

In [None]:
tf.keras.backend.clear_session()
tf.random.set_seed(51)
np.random.seed(51)
epochs = 10000

dataset = windowed_dataset_dnn(x_train, window_size, batch_size, shuffle_buffer_size)

l0 = tf.keras.layers.Conv1D(filters=32, kernel_size=5,
                      strides=1, padding="causal",
                      activation="relu",
                      input_shape=[None, 1])
l1 = tf.keras.layers.Dense(32, input_shape=[window_size], activation='relu')
l2 = tf.keras.layers.Dense(32, activation='relu')
l3 = tf.keras.layers.Dense(1)
l4 = tf.keras.layers.Lambda(lambda x: x * 10000)

model = tf.keras.models.Sequential([l0, l1, l2, l3, l4])

optimizer = tf.keras.optimizers.SGD(lr=1e-5, momentum=0.9)
model.compile(loss=tf.keras.losses.Huber(), optimizer=optimizer, metrics=['mae', 'acc'])
history = model.fit(dataset, epochs=epochs, verbose=2)

In [None]:
plt.semilogx(range(0, epochs), history.history["loss"])
plt.axis([0, 10000, 4000, 25000])

In [None]:
forecast = []
np_used_series = np.array(used_series)
np_used_series = tf.expand_dims(np_used_series, axis=-1)

for time in range(len(np_used_series) - window_size):
  forecast.append(model.predict(np_used_series[time:time + window_size][np.newaxis]))

forecast = forecast[split_time-window_size:]
results = np.array(forecast)[:, 0, 0]

plt.figure(figsize=(10, 6))
plot_series(time_valid, x_valid)
plot_series(time_valid, results)

In [None]:
tf.keras.metrics.mean_absolute_error(x_valid, results).numpy()