# Household Power Consumption: prediction of electric usage

#### We develop a model that predicts household electric power consumption based on previous usage. The model needs to infer the next twenty four observations from the past twenty four. The baseline model given to beat throws a validation MAE of approximately 0.055. 

We load the dataset and explore some of its statistics.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

In [None]:
data = pd.read_table('household_power_consumption.csv', sep=',')
data.head()

In [None]:
data.info()

In [None]:
data.describe()

The dataset is quite clean so we procede to the other steps in preparation for training a model.

-----

Here we define the dataset for training and testing.

In [None]:
data_ = data.values[:,1:]

data_min = np.min(data_, axis=0)
data_max = np.max(data_, axis=0)

In [None]:
split_time = 69120

t_train = np.array(range(0,split_time+1))
x_train = (data_[:split_time,:]-data_min)/data_max

t_test = np.array(range(split_time+1,data.shape[0]+1))
x_test = (data_[split_time:,:]-data_min)/data_max

In [None]:
n_past = 24  
n_future = 24 
window_size = n_past + n_future

X_train = []
Y_train = []
for i in range(x_train.shape[0]-window_size):
    X_train.append(x_train[i:i+n_past,:])
    Y_train.append(x_train[i+n_past:i+window_size,:])
X_train = np.array(X_train, dtype='float32')
Y_train = np.array(Y_train, dtype='float32')

X_test = []
Y_test = []
for i in range(x_test.shape[0]-window_size):
    X_test.append(x_test[i:i+n_past,:])
    Y_test.append(x_test[i+n_past:i+window_size,:])
X_test = np.array(X_test, dtype='float32')
Y_test = np.array(Y_test, dtype='float32')

-----

Here we train our first model, a Deep Neural Network with LSTM.

In [None]:
def Model_1():
    f1 = tf.keras.layers.LSTM(units=128,
                              activation='tanh',
                              recurrent_activation='sigmoid',
                              kernel_initializer='glorot_uniform',
                              bias_initializer='zeros',
                              recurrent_initializer='zeros',
                              return_sequences=True,
                              return_state = False)
    f2 = tf.keras.layers.Dropout(rate=0.5)
    f3 = tf.keras.layers.Dense(units=Y_train.shape[1:][1],
                              activation='linear',
                              kernel_initializer='glorot_uniform',
                              bias_initializer='zeros')
    x = tf.keras.Input(shape=X_train.shape[1:])
    a1 = f1(x)
    a2 = f2(a1)
    y = f3(a2)
    model = tf.keras.Model(x, y)   
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, 
                                         beta_1=0.9, 
                                         beta_2=0.999, 
                                         epsilon=1e-07)
    model.compile(loss='huber_loss', metrics=['mae'], optimizer=optimizer)
    model.summary()
    callback1 = tf.keras.callbacks.ReduceLROnPlateau(monitor='loss', 
                                                     patience=10,
                                                     min_delta=0.001,
                                                     factor=0.1, 
                                                     min_lr=0.0001)
    callback2 = tf.keras.callbacks.EarlyStopping(monitor='loss',
                                                 patience=20,
                                                 min_delta=0.001)
    model.fit(X_train, Y_train, epochs=100, batch_size=64, callbacks=[callback1, callback2])
    return model

In [None]:
model_1 = Model_1()

In [None]:
model_1.evaluate(X_train, Y_train)

In [None]:
model_1.evaluate(X_test, Y_test)

This first model is performing worse than the baseline model so we discard it.

-----

Here we train our second model, a Deep Neural Network with Bidirectional LSTM.

In [None]:
def Model_2():
    f1 = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units=128,
                                                            activation='tanh',
                                                            recurrent_activation='sigmoid',
                                                            kernel_initializer='glorot_uniform',
                                                            bias_initializer='zeros',
                                                            recurrent_initializer='zeros',
                                                            return_sequences=True,
                                                            return_state = False), 
                                       merge_mode='concat')
    f2 = tf.keras.layers.Dropout(rate=0.5)
    f3 = tf.keras.layers.Dense(units=128,
                               activation='relu',
                               kernel_initializer='glorot_uniform',
                               bias_initializer='zeros')
    f4 = tf.keras.layers.Dropout(rate=0.5)
    f5 = tf.keras.layers.Dense(units=128,
                               activation='relu',
                               kernel_initializer='glorot_uniform',
                               bias_initializer='zeros')
    f6 = tf.keras.layers.Dropout(rate=0.5)
    f7 = tf.keras.layers.Dense(units=Y_train.shape[1:][1],
                              activation='linear',
                              kernel_initializer='glorot_uniform',
                              bias_initializer='zeros')
    x = tf.keras.Input(shape=X_train.shape[1:])
    a1 = f1(x)
    a2 = f2(a1)
    a3 = f3(a2)
    a4 = f4(a3)
    a5 = f5(a4)
    a6 = f6(a5)
    y = f7(a6)
    model = tf.keras.Model(x, y)   
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, 
                                         beta_1=0.9, 
                                         beta_2=0.999, 
                                         epsilon=1e-07)
    model.compile(loss='huber_loss', metrics=['mae'], optimizer=optimizer)
    model.summary()
    callback1 = tf.keras.callbacks.ReduceLROnPlateau(monitor='loss', 
                                                     patience=10,
                                                     min_delta=0.001,
                                                     factor=0.1, 
                                                     min_lr=0.0001)
    callback2 = tf.keras.callbacks.EarlyStopping(monitor='loss',
                                                 patience=20,
                                                 min_delta=0.001)
    model.fit(X_train, Y_train, epochs=100, batch_size=64, callbacks=[callback1, callback2])
    return model

In [None]:
model_2 = Model_2()

In [None]:
model_2.evaluate(X_train, Y_train)

In [None]:
model_2.evaluate(X_test, Y_test)

This second model is also performing worse than the baseline model so we discard it.

-----

Here we train our third and final model, a Deep Neural Network with Conv1D and Bidirectional LSTM.

In [None]:
def Model_3():
    f1 = tf.keras.layers.Conv1D(filters=256, 
                                kernel_size=5, 
                                strides=1,
                                padding='causal',
                                activation='relu',
                                kernel_initializer='glorot_uniform',
                                bias_initializer='zeros')
    f2 = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units=128,
                                                            activation='tanh',
                                                            recurrent_activation='sigmoid',
                                                            kernel_initializer='glorot_uniform',
                                                            bias_initializer='zeros',
                                                            recurrent_initializer='zeros',
                                                            return_sequences=True,
                                                            return_state = False), 
                                       merge_mode='concat')
    f3 = tf.keras.layers.Dropout(rate=0.5)
    f4 = tf.keras.layers.Dense(units=128,
                               activation='relu',
                               kernel_initializer='glorot_uniform',
                               bias_initializer='zeros')
    f5 = tf.keras.layers.Dropout(rate=0.5)
    f6 = tf.keras.layers.Dense(units=128,
                               activation='relu',
                               kernel_initializer='glorot_uniform',
                               bias_initializer='zeros')
    f7 = tf.keras.layers.Dropout(rate=0.5)
    f8 = tf.keras.layers.Dense(units=Y_train.shape[1:][1],
                                activation='linear',
                                kernel_initializer='glorot_uniform',
                                bias_initializer='zeros')
    x = tf.keras.Input(shape=X_train.shape[1:])
    a1 = f1(x)
    a2 = f2(a1)
    a3 = f3(a2)
    a4 = f4(a3)
    a5 = f5(a4)
    a6 = f6(a5)
    a7 = f7(a6)
    y = f8(a7)
    model = tf.keras.Model(x, y)   
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, 
                                         beta_1=0.9, 
                                         beta_2=0.999, 
                                         epsilon=1e-07)
    model.compile(loss='huber_loss', metrics=['mae'], optimizer=optimizer)
    model.summary()
    callback1 = tf.keras.callbacks.ReduceLROnPlateau(monitor='loss', 
                                                     patience=5,
                                                     min_delta=0.001,
                                                     factor=0.1, 
                                                     min_lr=0.0001)
    callback2 = tf.keras.callbacks.EarlyStopping(monitor='loss',
                                                 patience=20,
                                                 min_delta=0.001)
    model.fit(X_train, Y_train, epochs=100, batch_size=64, callbacks=[callback1, callback2])
    return model

In [None]:
model_3 = Model_3()

In [None]:
model_3.evaluate(X_train, Y_train)

In [None]:
model_3.evaluate(X_test, Y_test)

#### As we can see, this last model beats the provided baseline model MAE, so it could be considered a candidate solution for the original problem presented. Additional exploration of hyperparameters would be necessary to make a final conclusion. 