In [None]:
# @hidden_cell
# The project token is an authorization token that is used to access project resources like data sources, connections, and used by platform APIs.
from project_lib import Project
project = Project(project_id='7ef2bc82-f627-4265-be50-c3f2215d4b5d', project_access_token='p-5c21f16b313f85f77558be65262bddeac2431407')
pc = project.project_context


# Final Prediction Model and Results

Now that we have evaluated our model, we can use all the data and build a model to predict values of the future. In this case, we predict Electricity Consumption and Generation in year 2020 in Germany.

## Import Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import time
from matplotlib.dates import date2num
%matplotlib inline
sns.set_style('darkgrid')

In [2]:
from sklearn.preprocessing import MinMaxScaler
from keras.preprocessing import sequence

from keras.layers import Dense
from keras.layers import Input, LSTM
from keras.losses import mean_squared_error

from keras.models import Model
from keras.models import load_model
import h5py

Using TensorFlow backend.


## Load Data

In [3]:
!wget https://github.com/siavash-saki/IBM-Advanced-Capstone-Project/raw/master/Data_Cleaned/consumption_ready_for_forcast.pkl
!wget https://github.com/siavash-saki/IBM-Advanced-Capstone-Project/raw/master/Data_Cleaned/generation_ready_for_forcast.pkl

--2020-02-06 14:09:19--  https://github.com/siavash-saki/IBM-Advanced-Capstone-Project/raw/master/Data_Cleaned/consumption_ready_for_forcast.pkl
Resolving github.com (github.com)... 140.82.118.4
Connecting to github.com (github.com)|140.82.118.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/siavash-saki/IBM-Advanced-Capstone-Project/master/Data_Cleaned/consumption_ready_for_forcast.pkl [following]
--2020-02-06 14:09:19--  https://raw.githubusercontent.com/siavash-saki/IBM-Advanced-Capstone-Project/master/Data_Cleaned/consumption_ready_for_forcast.pkl
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.60.133
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.60.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 702204 (686K) [application/octet-stream]
Saving to: ‘consumption_ready_for_forcast.pkl.2’


2020-02-06 14:09:19 (16.2 MB/s) - ‘consumptio

In [4]:
consumption= pd.read_pickle('consumption_ready_for_forcast.pkl')
generation= pd.read_pickle('generation_ready_for_forcast.pkl')

## Batch Size and Timesteps

In [5]:
# defining the batch size and timesteps
batch_size = 128
timesteps=24*7

## Prepare Train Set

In [6]:
# Function to calculate training size regarding batch_size
def get_train_length(dataset, timesteps, batch_size):
    # substract test_percent to be excluded from training, reserved for testset
    length = len(dataset)-2.1*timesteps
    train_length_values = []
    for x in range(int(length) - 1000,int(length)): 
        modulo=x%batch_size
        if (modulo == 0):
            train_length_values.append(x)
    return (max(train_length_values))

In [7]:
length = get_train_length(consumption, timesteps, batch_size)
upper_train = length + timesteps*2
print('\nDatasets length:',len(consumption))
print('Last divisible index:', upper_train)
print('Train Sets length:', length,'\n')


Datasets length: 43824
Last divisible index: 43728
Train Sets length: 43392 



In [8]:
# Set y_train variable for consumption df
consumption_train_df = consumption[0:upper_train]
consumption_y_train = consumption_train_df.iloc[:,].values
print('\nTraining Sets Shapes after Adding Timesteps:', consumption_y_train.shape)

# Set y_train variable for generation df
generation_train_df = generation[0:upper_train]
generation_y_train = generation_train_df.iloc[:,].values


Training Sets Shapes after Adding Timesteps: (43728, 1)


## Feature Scaling

In [9]:
#scale between 0 and 1. the weights are esier to find.
sc_con = MinMaxScaler(feature_range = (0, 1))
sc_gen = MinMaxScaler(feature_range = (0, 1))
consumption_y_train_scaled = sc_con.fit_transform(np.float64(consumption_y_train))
generation_y_train_scaled = sc_gen.fit_transform(np.float64(generation_y_train))

## Creating a data structure with n timesteps

In [10]:
# Empty Lists to store X_train and y_train
consumption_X_train_matrix = []
consumption_y_train_matrix = []
# Creating a data structure with n timesteps
for i in range(timesteps, length + timesteps):
    #create X_train matrix
    #24*7 items per array (timestep) 
    consumption_X_train_matrix.append(consumption_y_train_scaled[i-timesteps:i,0])
    #create Y_train matrix
    #24*7 items per array (timestep)
    consumption_y_train_matrix.append(consumption_y_train_scaled[i:i+timesteps,0])
    
# reapeat all of these steps fot generation dataframe
generation_X_train_matrix = []
generation_y_train_matrix = []
for i in range(timesteps, length + timesteps):
    generation_X_train_matrix.append(generation_y_train_scaled[i-timesteps:i,0])
    generation_y_train_matrix.append(generation_y_train_scaled[i:i+timesteps,0])

In [11]:
# Check shape
print()
print('X_train sets shape:', np.array(consumption_X_train_matrix).shape)
print('y_train sets shape:', np.array(consumption_y_train_matrix).shape)
print()


X_train sets shape: (43392, 168)
y_train sets shape: (43392, 168)



## Reshape

In [12]:
# Turn list into numpy array
consumption_X_train_matrix = np.array(consumption_X_train_matrix)
consumption_y_train_matrix = np.array(consumption_y_train_matrix)
# reshape arrays
consumption_X_train_reshaped = np.reshape(consumption_X_train_matrix, 
                                         (consumption_X_train_matrix.shape[0], 
                                          consumption_X_train_matrix.shape[1], 1))
consumption_y_train_reshaped = np.reshape(consumption_y_train_matrix, 
                                         (consumption_y_train_matrix.shape[0], 
                                          consumption_y_train_matrix.shape[1], 1))

# Repeat the same stes for generatin dataframe
generation_X_train_matrix = np.array(generation_X_train_matrix)
generation_y_train_matrix = np.array(generation_y_train_matrix)
generation_X_train_reshaped = np.reshape(generation_X_train_matrix, 
                                         (generation_X_train_matrix.shape[0], 
                                          generation_X_train_matrix.shape[1], 1))
generation_y_train_reshaped = np.reshape(generation_y_train_matrix, 
                                         (generation_y_train_matrix.shape[0], 
                                          generation_y_train_matrix.shape[1], 1))

In [13]:
# Check shapes
print()
print('X_train sets shape:', generation_X_train_reshaped.shape)
print('y_train sets shape:', generation_y_train_reshaped.shape)
print()


X_train sets shape: (43392, 168, 1)
y_train sets shape: (43392, 168, 1)



## Building the LSTM

In [14]:
# Initialising the LSTM Model with MSE Loss-Function
# Using Functional API, each layer output is the input of next layer

# Input
inputs = Input(batch_shape=(batch_size,timesteps,1))

# Layer 1: LSTM 
lstm_1 = LSTM(12, 
                activation='tanh', 
                recurrent_activation='sigmoid', 
                stateful=True, 
                return_sequences=True)(inputs)
# Layer 2: LSTM 
lstm_2 = LSTM(12, 
                activation='tanh', 
                recurrent_activation='sigmoid', 
                stateful=True, 
                return_sequences=True)(lstm_1)
# Output
output = Dense(units = 1)(lstm_2)

# Sticking all layers into a Model
regressor = Model(inputs=inputs, outputs = output)

#adam is fast starting off and then gets slower and more precise
regressor.compile(optimizer='adam', loss = mean_squared_error)

# Check the model summary
regressor.summary()

Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (128, 168, 1)             0         
_________________________________________________________________
lstm_1 (LSTM)                (128, 168, 12)            672       
_________________________________________________________________
lstm_2 (LSTM)                (128, 168, 12)            1200      
_________________________________________________________________
dense_1 (Dense)              (128, 168, 1)             13        
Total params: 1,885
Trainable params: 1,885
Non-trainable params: 0
_________________________________________________________________


## Run LSTM

We run the code on the cloud.

In [None]:
epochs = 10

# start time
start=time.time()

#Statefull
for i in range(epochs):
    print("\nEpoch: " + str(i))
    #run through all data but the cell, hidden state are used for the next batch.
    regressor.fit(consumption_X_train_reshaped, consumption_y_train_reshaped, 
                  shuffle=False, epochs = 1, batch_size = batch_size)
    #resets only the states but the weights, cell and hidden are kept.
    regressor.reset_states()

# duration of training the model
duration=time.time()-start


Epoch: 0
Epoch 1/1

Epoch: 1
Epoch 1/1

In [None]:
regressor.save(filepath="LSTM_Model_Consumption_128.h5")

In [None]:
from ibm_botocore.client import Config
import ibm_boto3

In [None]:

# @hidden_cell
# The following code contains the credentials for a file in your IBM Cloud Object Storage.
# You might want to remove those credentials before you share your notebook.
credentials= {
    'IAM_SERVICE_ID': 'iam-ServiceId-af7f118d-33e5-4db8-8af0-91ccdc9d6664',
    'IBM_API_KEY_ID': 'MSokvXUNblLCMbjLQdSUSZjz69jMdutBIqI6S8JZeFkh',
    'ENDPOINT': 'https://s3.eu-geo.objectstorage.service.networklayer.com',
    'IBM_AUTH_ENDPOINT': 'https://iam.eu-gb.bluemix.net/oidc/token',
    'BUCKET': 'def-donotdelete-pr-rwrregjlstqzff',
}


In [None]:
cos = ibm_boto3.client(service_name='s3',
    ibm_api_key_id=credentials['IBM_API_KEY_ID'],
    ibm_service_instance_id=credentials['IAM_SERVICE_ID'],
    ibm_auth_endpoint=credentials['IBM_AUTH_ENDPOINT'],
    config=Config(signature_version='oauth'),
    endpoint_url=credentials['ENDPOINT'])

In [None]:
# Upload file wine.csv from wine folder into project bucket as wine_data.csv
cos.upload_file(Filename='LSTM_Model_Consumption_720.h5',Bucket=credentials['BUCKET'],Key='LSTM_Model_Consumption_720.h5')