[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dionatrafk/workload_prediction/blob/master/GRU.ipynb)

**`GRU Gated Recurrent Unit`**

**Import dataset from git**

In [0]:
!git clone --recursive https://github.com/dionatrafk/workload_prediction

In [0]:
import pandas as pd
from keras.layers.core import Dense, Dropout
from keras.layers.recurrent import GRU
from keras.models import Sequential, load_model
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import math, time
import os, sys


In [0]:
def create_dataset(dataset, lookback=1):
    dataX, dataY = [], [] # create 2 empty list
    for i in range(len(dataset)-lookback-1):
      a = dataset[i:(i+lookback),0]
      dataX.append(a)
      dataY.append(dataset[i+lookback,0]) # get the next value
    return np.array(dataX), np.array(dataY)

In [0]:
filename = 'workload_prediction/trace60.csv' 

# Dataset configuration
dataset = pd.read_csv(filename, usecols = [1], header=None)
dataset.columns = ["request"]
dataset = dataset.values #convert to the array
dataset = dataset.astype('float32') # convert to float

# length of our dataset
training_size = int(len(dataset)*0.67)
testing_size = len(dataset)-training_size

# split the data set
train, test = dataset[0:training_size:], dataset[training_size:len(dataset),:]

# one time step to the future
lookback = 1
trainX, trainY = create_dataset(train, lookback)
testX, testY = create_dataset(test, lookback)  

In [0]:
# Scaling dataset
x_train, y_train = trainX, trainY 
x_test, y_test = testX, testY

# scaling values for model
scaleX = MinMaxScaler()
scaleY = MinMaxScaler()

trainX = scaleX.fit_transform(x_train)
trainX = trainX.reshape((-1,1,1))

trainY = scaleY.fit_transform(y_train.reshape(-1,1))

testX  = scaleX.fit_transform(x_test)
testX = testX.reshape((-1,1,1))

testY  = scaleY.fit_transform(y_test.reshape(-1,1))

In [0]:
# creating model using Keras
model_name = 'requests_GRU'
model = Sequential()
model.add(GRU(units=32,
              return_sequences=True,
              input_shape=(1, 1)))
model.add(Dropout(0.2))
model.add(GRU(units=16))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

In [0]:
# Compilation and training
start = time.time()
model.compile(loss='mean_squared_error', optimizer='adam')

print "Compilation Time : ", time.time() - start

start = time.time()

BATCH_SIZE = 140
NB_EPOCHS = 150

model.fit(trainX,trainY,batch_size=BATCH_SIZE, epochs=NB_EPOCHS, validation_split=0.1, verbose=0)

print "Training time : ", time.time() - start
model.save("{}.h5".format(model_name))


In [0]:

# Making predictions

yhat = model.predict(trainX)
yhat = scaleY.inverse_transform(yhat)
y_test = scaleY.inverse_transform(trainY)

print 'Train samples: %d' %(training_size)+'\n'+'Test samples: %d' %(testing_size)+'\n'

score = mean_squared_error(y_test, yhat)
print ('Trainscore: %.2f MSE (%.2f RMSE)' %(score, math.sqrt(score)))

yhat = model.predict(testX)
yhat = scaleY.inverse_transform(yhat)
y_test = scaleY.inverse_transform(testY)


score = mean_squared_error(y_test, yhat)
print ('Testscore: %.2f MSE (%.2f RMSE)' %(score, math.sqrt(score)))
print ('%.2f' %(math.sqrt(score)))

#print samples
print'Current  Predicted'
for i in range(0, 10):
    print  y_test[i], yhat[i+1]
    
    


In [0]:
plt.plot(yhat[-100:], label='Predicted')
plt.plot(y_test[-100:], label='Current')
plt.legend()
plt.grid()

plt.ylabel('Requests')
plt.xlabel('Time')
plt.show()
