Subjest
===
Use LSTM scheme to predict the future. Create a new directory, named "model", in which the trained model should be stored here.




Requirements
---
1. warktermark, An IPython magic extension for printing date and time stamps, version numbers, and hardware information. (install by pip)
- tensorflow
- keras


In [None]:
%load_ext watermark
%watermark -v -p tensorflow,sklearn,keras

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
dir_name='../COVID-19-master/csse_covid_19_data/csse_covid_19_time_series/'
url = dir_name+"/time_series_covid19_confirmed_global.csv"
df_confirmed = pd.read_csv(url)

In [None]:
df_confirmed.head()

In [None]:
country = "Taiwan*"

In [None]:
# retrieve the data
df_confirmed1 = df_confirmed[df_confirmed["Country/Region"] == country]

In [None]:
## structuring times eries data
df_confirmed2 = pd.DataFrame(df_confirmed1[df_confirmed1.columns[4:]].sum(),columns=["confirmed"])
df_confirmed2.index = pd.to_datetime(df_confirmed2.index)#,format='%m/%d/%y')
df_confirmed2.tail()

In [None]:
## for cases of dead

df_dead = pd.read_csv(dir_name+"time_series_covid19_deaths_global.csv")

In [None]:
df_dead.head()

In [None]:
df_dead1 = df_dead[df_dead["Country/Region"] == country]

In [None]:
df_dead2 = pd.DataFrame(df_dead1[df_dead1.columns[4:]].sum(),columns=["dead"])
df_dead2.index = pd.to_datetime(df_dead2.index)#,format='%m/%d/%y')
df_dead2.tail()

In [None]:
### for cases of recovered
df_recovered = pd.read_csv(dir_name+"time_series_covid19_recovered_global.csv")

In [None]:
df_recovered1 = df_recovered[df_recovered["Country/Region"] == country]

In [None]:
df_recovered1

In [None]:
df_recovered2 = pd.DataFrame(df_recovered1[df_recovered1.columns[4:]].sum(),columns=["recovered"])
df_recovered2.index = pd.to_datetime(df_recovered2.index)#,format='%m/%d/%y')
df_recovered2.tail()

In [None]:
## join confirmed with dead
df_conf_dead = df_confirmed2.join(df_dead2,how = "inner")
df_conf_dead.head()

In [None]:
## join confirmed+dead with recovered
df_all = df_conf_dead.join(df_recovered2,how = "inner")
df_all.tail()

In [None]:
df_all.plot(figsize=(10,5),title="COVID-19 statistics at %s" %country)

In [None]:
# up to 2020-04-06
df_new = df_confirmed2[["confirmed"]]
df_new.tail()

In [None]:
# daily data and i want to predict 5 days afterwards
len(df_new)

In [None]:
x = len(df_new)-5
x

In [None]:
train=df_new.iloc[:x]
test = df_new.iloc[x:]
train.tail()

Data ranges too wide to keep stationary; one of the methods is to convert them among `[0,1]`. 

In [None]:
##scale or normalize data as the data is too skewed
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

In [None]:
scaler.fit(train) #find max value 

In [None]:
scaled_train = scaler.transform(train)#and divide every point by max value
scaled_test = scaler.transform(test)
print(scaled_train[-5:])

In [None]:
plt.plot(scaled_train)
plt.grid(True)

In [None]:
## feed in batches [t1,t2,t3] --> t4
##                 [conf,dead,recov]  --> confirm-predict
from keras.preprocessing.sequence import TimeseriesGenerator

In [None]:
scaled_train.shape

In [None]:
## how to decide num of inputs , 
n_input = 5  ## number of steps
n_features = 1 ## number of features you want to predict (for univariate time series n_features=1)
generator = TimeseriesGenerator(scaled_train,scaled_train,length = n_input,batch_size=1)

In [None]:
#62
len(scaled_train)

In [None]:
#57
len(generator)

In [None]:
scaled_train[:6]

In [None]:
generator[0]

In [None]:
# the 50-th pair
x,y = generator[50]

In [None]:
(x.shape,y.shape)

In [None]:
(x,y)

In [None]:
## above takes 5 inputs and predicts next point in scaled_train
## smaller batch size leads to better trainig for time series

In [None]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, Activation

```
LSTM:  Long Short Term Memory Networks (LSTM) models           

           150 neurons      75 neurons      1 neuron
  input ➨    LSTM       ➨   Dense Layer  ➨  output  
```  

In [None]:
model = Sequential()
model.add(LSTM(150,activation="relu",input_shape=(n_input,n_features)))
#model.add(Dropout(0.2))
model.add(Dense(75, activation='relu'))
model.add(Dense(units=1))
model.compile(optimizer="adam",loss="mse")

In [None]:
model.summary()

In [None]:
TimeseriesGenerator?

In [None]:
validation_set = np.append(scaled_train[60],scaled_test)
validation_set=validation_set.reshape(6,1)
validation_set

In [None]:
## how to decide num of inputs , 
n_input = 5
n_features = 1
validation_gen = TimeseriesGenerator(validation_set,validation_set,length=5,batch_size=1)

In [None]:
validation_gen[0][0].shape,validation_gen[0][1].shape

In [None]:
from tensorflow.keras.callbacks import EarlyStopping
early_stop = EarlyStopping(monitor='val_loss',patience=20,restore_best_weights=True)

In [None]:
model.fit_generator(generator,validation_data=validation_gen,epochs=100,callbacks=[early_stop],steps_per_epoch=10)

In [None]:
pd.DataFrame(model.history.history).plot(title="loss vs epochs curve",figsize=(10,6))

In [None]:
model.history.history.keys()

In [None]:
myloss = model.history.history["val_loss"]
plt.title("validation loss vs epochs")
plt.plot(range(len(myloss)),myloss)

In [None]:
### evaluation batch
## 5 history steps ---> step 6
## last 5 point train predicts point 1 of test data

## forecast

In [None]:
## holding predictions
test_prediction = []

##last n points from training set
first_eval_batch = scaled_train[-n_input:]
current_batch = first_eval_batch.reshape(1,n_input,n_features)

In [None]:
current_batch.shape

In [None]:
## how far in future we can predict
for i in range(len(test)+7):
    current_pred = model.predict(current_batch)[0]
    test_prediction.append(current_pred)
    current_batch = np.append(current_batch[:,1:,:],[[current_pred]],axis=1)

In [None]:
test_prediction

In [None]:
### inverse scaled data
true_prediction = scaler.inverse_transform(test_prediction)
true_prediction[:,0]

In [None]:
time_series_array = test.index
for k in range(0,7):
    time_series_array = time_series_array.append(time_series_array[-1:] + pd.DateOffset(1))
time_series_array

In [None]:
# not yet input 
df_forecast = pd.DataFrame(columns=["confirmed","confirmed_predicted"],index=time_series_array)
df_forecast

In [None]:
df_forecast.loc[:,"confirmed_predicted"] = true_prediction[:,0]
df_forecast.loc[:,"confirmed"] = test["confirmed"]

In [None]:
df_forecast

In [None]:
#plt.ylim([80000,85000])
df_forecast.plot(figsize=(10,6),title="%s Predictions for next 7 days" %country)

In [None]:
MAPE = np.mean(np.abs(np.array(df_forecast["confirmed"][:5]) - np.array(df_forecast["confirmed_predicted"][:5]))/np.array(df_forecast["confirmed"][:5]))
print("MAPE is " + str(MAPE*100) + " %")

In [None]:
sum_errs = np.sum((np.array(df_forecast["confirmed"][:5]) - np.array(df_forecast["confirmed_predicted"][:5]))**2)
sum_errs

In [None]:
stdev = np.sqrt(1/(5-2) * sum_errs)
stdev

In [None]:
# calculate prediction interval
interval = 1.96 * stdev
interval

In [None]:
df_forecast["confirm_min"] = df_forecast["confirmed_predicted"] - interval
df_forecast["confirm_max"] = df_forecast["confirmed_predicted"] + interval
df_forecast

In [None]:
df_forecast["Model Accuracy"] = round((1-MAPE),2)
df_forecast

In [None]:
from datetime import datetime
df_forecast["Country"] = country
df_forecast["Execution date"] = str(datetime.now()).split()[0]
df_forecast

In [None]:
#df_forecast.to_excel("output/Iran_confirmed.xlsx")

In [None]:
### save model
model.save("model/confirmed_{0}_{1}.h5".format(country,str(datetime.now()).split()[0]))

In [None]:
df_forecast.iloc[:,:4].plot()

In [None]:
fig= plt.figure(figsize=(10,5))
plt.title("{} - Results".format(country))
plt.plot(df_forecast.index,df_forecast["confirmed"],label="confirmed")
plt.plot(df_forecast.index,df_forecast["confirmed_predicted"],label="confirmed_predicted")
#ax.fill_between(x, (y-ci), (y+ci), color='b', alpha=.1)
plt.fill_between(df_forecast.index,df_forecast["confirm_min"],df_forecast["confirm_max"],color="indigo",alpha=0.09,label="Confidence Interval")
plt.legend()
plt.show()

## load a saved model

In [None]:
from keras.models import load_model
name = "model/confirmed_{0}_{1}.h5".format(country,str(datetime.now()).split()[0])
model1 = load_model(name)

In [None]:
model1.summary()