In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [2]:
tf.__version__

'2.0.0'

In [3]:
confirmed = pd.read_csv("confirmed_t.csv")
deaths = pd.read_csv("deaths_t.csv")
recovered = pd.read_csv("recovered.csv")

In [4]:
dataCols = list(recovered.columns[4:])

In [5]:
X_confirmed = confirmed[dataCols].values
X_deaths = deaths[dataCols].values
X_recovered = recovered[dataCols].values

In [6]:
def normalize(values):
    mean = values.mean(axis=1).reshape(-1,1)
    std = values.std(axis=1).reshape(-1,1)
    return (values - mean) / (std+1e-9), mean, std

In [7]:
X_confirmed, mean_confirmed, std_confirmed = normalize(X_confirmed)
X_deaths, mean_deaths, std_deaths= normalize(X_deaths)
X_recovered, mean_recovered, std_recovered = normalize(X_recovered)

In [8]:
def univariate_data(dataset, start_index, end_index, history_size, target_size, single_step=True):
    data = []
    labels = []

    start_index = start_index + history_size
    if end_index is None:
        end_index = len(dataset[0]) - target_size
        
    for d in dataset:
        for i in range(start_index, end_index):
            indices = range(i-history_size, i)
            # Reshape data from (history_size,) to (history_size, 1)
            data.append(np.reshape(d[indices], (history_size, 1)))
            if single_step:
                labels.append(d[i+target_size])
            else:
                labels.append(d[i:i+target_size])
    return np.array(data), np.array(labels)

In [9]:
HISTORY_SIZE = 20
PRED_DAYS=3
X_confirmed_train, y_confirmed_train = univariate_data(X_confirmed, 0, None, HISTORY_SIZE, PRED_DAYS, False)
X_deaths_train, y_deaths_train = univariate_data(X_deaths, 0, None, HISTORY_SIZE, PRED_DAYS, False)
X_recovered_train, y_recovered_train = univariate_data(X_recovered, 0, None, HISTORY_SIZE, PRED_DAYS, False)

In [10]:
X_confirmed_train.shape

(28336, 20, 1)

In [11]:
def build_model(input_shape):
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.LSTM(32, return_sequences=True, input_shape=input_shape))
    model.add(tf.keras.layers.LSTM(16, activation='relu'))
    model.add(tf.keras.layers.Dense(3))
    model.compile(optimizer=tf.keras.optimizers.RMSprop(), loss='mae')
    return model

In [12]:
model_confirmed = build_model(X_confirmed_train.shape[-2:])
model_deaths = build_model(X_deaths_train.shape[-2:])
model_recovered = build_model(X_recovered_train.shape[-2:])

In [13]:
BATCH_SIZE = 256
BUFFER_SIZE = 10000
def make_dataset(X, y):
    dataset = tf.data.Dataset.from_tensor_slices((X, y))
    dataset = dataset.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()
    return dataset
    
train_data_confirmed = make_dataset(X_confirmed_train, y_confirmed_train)
train_data_deaths = make_dataset(X_deaths_train, y_deaths_train)
train_data_recovered = make_dataset(X_recovered_train, y_recovered_train)

In [14]:
EPOCHS = 10
EVALUATION_INTERVAL = 200
def train_and_predict(model, train_dataset, test_dataset, mean, std):
    model.fit(train_dataset, epochs=EPOCHS, steps_per_epoch=EVALUATION_INTERVAL)
    pred = model.predict(test_dataset)
    pred = pred * std + mean
    return pred
    
test_data_confirmed = X_confirmed[:,-HISTORY_SIZE:].reshape(-1,HISTORY_SIZE,1)
test_data_deaths = X_deaths[:,-HISTORY_SIZE:].reshape(-1,HISTORY_SIZE,1)
test_data_recovered = X_recovered[:,-HISTORY_SIZE:].reshape(-1,HISTORY_SIZE,1)
    
pred_confirmed = train_and_predict(model_confirmed, train_data_confirmed, test_data_confirmed, mean_confirmed, std_confirmed)
pred_deaths = train_and_predict(model_deaths, train_data_deaths, test_data_deaths, mean_deaths, std_deaths )
pred_recovered = train_and_predict(model_recovered, train_data_recovered, test_data_recovered, mean_recovered, std_recovered)

Train for 200 steps
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train for 200 steps
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train for 200 steps
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [15]:
import datetime

def append_pred(dataframe, pred):
    enddate = datetime.datetime.strptime(dataCols[-1], '%m/%d/%y')
    prevdate = enddate
    for i in range(1, PRED_DAYS+1):
        nextdate = enddate + datetime.timedelta(days=i)
        temp = np.concatenate([dataframe[prevdate.strftime('%m/%d/%y')].values.reshape(-1,1), pred[:,i-1].reshape(-1,1)], axis=1)
        dataframe[nextdate.strftime('%m/%d/%y')] = temp.max(axis=1).astype(np.int32)
        prevdate = nextdate
        
append_pred(confirmed, pred_confirmed)
append_pred(deaths, pred_deaths)
append_pred(recovered, pred_recovered)

In [17]:
confirmed.to_csv('confirmed_with_pred.csv', index=False)
deaths.to_csv('deaths_with_pred.csv', index=False)
recovered.to_csv('recovered_with_pred.csv', index=False)