# 1. Load Important Libraries

In [1]:
import pandas as pd
import os
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error,r2_score,mean_squared_error
import numpy as np 
import json

# Import Libraries and packages from Keras
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TensorBoard

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM, GRU
from keras.layers import Dropout
np.random.seed(1234)

Using TensorFlow backend.


In [2]:
def model(n_past, input_dim):
    """
    LSTM model architecture construction.
    
    Args
    ----------
    n_past : Integer
        Holds the number observations used for forecasting.
    
    input_dim : Integer
        Holds the number of inputs passed to the LSTM network.

    Returns
    -------
    Regressor
        Keras LSTM model architecture.
    """
    
    # Layer List dimensions
    layers = [50,50,50,50,1] 
        
    # Initializing the RNN
    regressor = Sequential()

    # Adding fist LSTM layer and Drop out Regularization
    regressor.add(LSTM(units=layers[0], return_sequences=True, input_shape=(n_past, input_dim)))
    regressor.add(Dropout(.2))

    # Part 3 - Adding more layers

    # Adding 2nd layer with some drop out regularization
    regressor.add(LSTM(units=layers[1], return_sequences=True))
    regressor.add(Dropout(.2))

    # Adding 3rd layer with some drop out regularization
    regressor.add(LSTM(units=layers[2], return_sequences=True))
    regressor.add(Dropout(.2))

    # Adding 4th layer with some drop out regularization
    regressor.add(GRU(units=layers[3], return_sequences=False))
    regressor.add(Dropout(.2))

    # Output layer
    regressor.add(Dense(units=layers[4], activation='linear'))

    # Compiling the RNN
    regressor.compile(optimizer='rmsprop', loss="mse")  # Can change loss to mean-squared-error if you require.

    
    return regressor

In [3]:
from os import listdir

def find_csv_filenames( path_to_dir, suffix=".csv" ):
    """
    Find the names of the csv files.
    
    Args
    ----------
    path_to_dir : String
        Holds the path to the files directory.
    
    suffix : String
        Holds the related suffix.

    Returns
    -------
    Filenames
        The names of the found files.
    """
    filenames = listdir(path_to_dir)
    return [ filename for filename in filenames if filename.endswith( suffix ) ]

In [4]:
def get_features(train_data_list, feature_index_list):
    """
    Extracts the features chosen for training.
    
    Args
    ----------
    train_data_list : List
        Holds the columns from the dataset.
    
    feature_index_list : List
        Holds the related suffix.

    Returns
    -------
    col_names
        Returns the chosen feature names.
    """
    getVar = lambda searchList, ind: [searchList[i] for i in ind]
    col_names = getVar(train_data_list, feature_index_list)
    print('Chosen features:',  col_names)
    return col_names

In [5]:
def preprocess(data, cols):
    """
    Removes commas with csv files.
    
    Args
    ----------
    data : Dataframe
        Holds the   the dataset.
    
    cols : List
        Holds the column names.

    Returns
    -------
    data_set
        Returns the dataset.
    """
    #Preprocess data for training by removing all commas
    data = data[cols].astype(str)
    for i in cols:
        for j in range(0,len(data)):
            data[i][j] = data[i][j].replace(",","")
    data = data.astype(float)
    data_set = data.as_matrix() # Using multiple predictors.
    return data_set

In [6]:
def scale(data, lower_bound, upper_bound):
    """
    Normalization using MinMax values.
    
    Args
    ----------
    data : Dataframe
        Holds the   the dataset.
    
    lower_bound : Integer
        Holds index of the first feature.
    
    upper_bound : Integer
        Holds index of the last feature .
        
    Returns
    -------
    data_set
        Returns the scaled dataset.
    """
    # Feature Scaling
    sc = MinMaxScaler(feature_range=(lower_bound, upper_bound))
    data_scaled = sc.fit_transform(data)
    return data_scaled

In [7]:
def train_test_split(dataset, x_lower_bound, x_upper_bound, y_index):
    """
    Splits the dataset into train and test sets, 
    taking into account the number of past days (Lag) and the number of days used for forecasting,
    
    Args
    ----------
    dataset : Dataframe
        Holds the   the dataset.
    
    x_lower_bound : Integer
        Holds index of the first indicator.
    
    x_upper_bound : Integer
        Holds index of the last indicator.
    
    y_index : Integer
        Holds index of the predicted value.
        
    Returns
    -------
    X_values, y_values : Numpy Array
        Holds the train and test sets.
    """
    # Creating a data structure with 60 timesteps and 1 output
    X_values = []
    y_value = []

    n_future = 20  # Number of days you want to predict into the future
    n_past = 60  # Number of past days you want to use to predict the future

    for i in range(n_past, len(dataset) - n_future + 1):
        X_values.append(dataset[i - n_past:i, x_lower_bound:x_upper_bound])
        y_value.append(dataset[i+n_future-1:i + n_future, y_index])

    #X_train, y_train = np.array(X_train), np.array(y_train)
    return np.array(X_values), np.array(y_value)

In [8]:
def visuzalizaion(y, y_hat):
    """
    Visualzes the performance of the of the trained LSTM architecture, using the actual and predicted values.
    
    Args
    ----------
    y : Array
        Holds actual target values.
    
    y_hat : Array
        Holds the predicted target values.
        
    Returns
    -------
    Empty.
    """
    hfm, = plt.plot(y_hat, 'r', label='predicted_consuption_level')
    hfm2, = plt.plot(y,'b', label = 'actual_consuption_level')

    plt.legend(handles=[hfm,hfm2])
    plt.title('Predictions vs Actual Price')
    plt.xlabel('Sample index')
    plt.ylabel('Stock Price')
    plt.savefig('graph.png', bbox_inches='tight')
    plt.show()
    plt.close()

In [9]:
def run_model(regressor = None, data = None):
    
    """
    Trains the proposed model architecture using a number of train and evaluate it's performance using predictions from 
    and trend visualization of the test datasets. 
    
    Args
    ----------
    regressor : Keras object
        Holds model architecture.
    
    data : Dataframe
        Holds the datasets.
        
    Returns
    -------
    Empty.
    """
    
    # Load datasets
    filenames = find_csv_filenames("datasets/train_datasets")
    for name in filenames:
        #Impor Training and Test datasets
        dataset_train = pd.read_csv("datasets/train_datasets/" + name)

        dataset_test = pd.read_csv("datasets/test_datasets/" + name)

        # Build model
        lag = 60
        input_dim = 2
        regressor = model(lag, input_dim)

        # Get features
        ls = list(dataset_train)
        cols = get_features(ls,[48,49])

        # Dataset Pre-processing
        training_set = preprocess(dataset_train, cols)
        test_set = preprocess(dataset_test, cols)

        # Feature Scaling train
        sc_predict_train = MinMaxScaler(feature_range=(0,1)) 
        sc_predict_train.fit_transform(training_set[:,0:1])

        # Feature Scaling test
        sc_predict_test = MinMaxScaler(feature_range=(0,1)) 
        sc_predict_test.fit_transform(test_set[:,0:1])



        training_set_scaled = scale(training_set, 0,2)       
        test_set_scaled = scale(test_set, 0, 2)

        # Train, test split
        X_train, y_train = train_test_split(training_set_scaled, 0, 2, 0)
        X_test, y_test = train_test_split(test_set_scaled, 0, 2, 0)


        # Fitting RNN to training set using Keras Callbacks. Read Keras callbacks docs for more info. 
        es = EarlyStopping(monitor='val_loss', min_delta=1e-10, patience=50, verbose=1)
        rlr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, verbose=1)
        mcp = ModelCheckpoint(filepath='weights.h5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=True)
        tb = TensorBoard('logs')

        history = regressor.fit(X_train, y_train, shuffle=True, epochs=100,
                                callbacks=[es, rlr,mcp, tb], validation_split=0.2, verbose=1, batch_size=10)

        # Prediction results
        predictions_train = regressor.predict(X_train)
        predictions_test = regressor.predict(X_test)

        # Inverse train scaling transformation
        predictions_plot_train = sc_predict_train.inverse_transform(predictions_train[0:-60])
        actual_plot_train = sc_predict_train.inverse_transform(y_train[60:-1])

        # Inverse test scaling transformation
        predictions_plot_test = sc_predict_test.inverse_transform(predictions_test[0:-60])
        actual_plot_test = sc_predict_test.inverse_transform(y_test[60:-1])

        # Visualize trends
        visuzalizaion(actual_plot_train,predictions_plot_train)
        visuzalizaion(actual_plot_test,predictions_plot_test)


       

In [12]:
run_model()
        

Chosen features: ['eload', 'Temp']
Train on 6915 samples, validate on 1729 samples
Epoch 1/100

Epoch 00001: val_loss improved from inf to 0.01347, saving model to weights.h5
Epoch 2/100

Epoch 00002: val_loss improved from 0.01347 to 0.00528, saving model to weights.h5
Epoch 3/100

Epoch 00003: val_loss did not improve
Epoch 4/100

Epoch 00004: val_loss did not improve
Epoch 5/100

Epoch 00005: val_loss improved from 0.00528 to 0.00509, saving model to weights.h5
Epoch 6/100

Epoch 00006: val_loss did not improve
Epoch 7/100

Epoch 00007: val_loss did not improve
Epoch 8/100

Epoch 00008: val_loss did not improve
Epoch 9/100

Epoch 00009: val_loss did not improve
Epoch 10/100

Epoch 00010: val_loss did not improve
Epoch 11/100

Epoch 00011: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.

Epoch 00011: val_loss did not improve
Epoch 12/100

Epoch 00012: val_loss did not improve
Epoch 13/100

Epoch 00013: val_loss did not improve
Epoch 14/100

Epoch 00014: val_loss di

KeyboardInterrupt: 