# Chapter 12 - Strategies to Build Superior Models

_pg. 169-175_

## UK Unemployment Rate Data

Before we can work with this data, we need to look at how it was obtained and processed in Chapter 8.

In [1]:
# From Chapter 8 pg.105-112
import numpy as np
import pandas as pd
import urllib

# UPDATED
# url = "http://www.bankofengland.co.uk/research/Documents/onebank/threecenturies_v2.3.xlsx"
# dataset has been updated to version 3 and renamed
# now found @ http://www.bankofengland.co.uk/research/Documents/datasets/millenniumofdata_v3_final.xlsx
# using other url for version 2.3
url = "https://github.com/diku-irlab/big-data-summer-school-2017/raw/dev/threecenturies_v2.3.xlsx"

# ATTN: you need to redefine `loc` accordingly
loc = "/home/dpindk/irlab/big-data-summer-school-2017/UK_Economic.xls"
urllib.urlretrieve(url, loc)

Excel_file = pd.ExcelFile(loc)

spreadsheet = Excel_file.parse("A1. Headline series")

# extracting the target data
unemployment = spreadsheet.iloc[201:361, 15]

# lagged data
inflation = spreadsheet.iloc[200:361, 28]
bank_rate = spreadsheet.iloc[200:361, 30]
debt = spreadsheet.iloc[200:361, 57]
GDP_trend = spreadsheet.iloc[200:361, 3]

# organizing and saving out the data
x = pd.concat([GDP_trend, debt, bank_rate, inflation], axis=1)
x.columns = ["GDP_trend", "debt", "bank_rate", "inflation"]

x['debt'] = x['debt'].astype('float64')
x['bank_rate'] = x['bank_rate'].astype('float64')
x['GDP_trend'] = x['GDP_trend'].astype('float64')
x['inflation'] = x['inflation'].astype('float64')

y = unemployment
y = pd.to_numeric(y)

loc = "/home/dpindk/irlab/big-data-summer-school-2017/economic_x.csv"
x.to_csv(loc)
loc = "/home/dpindk/irlab/big-data-summer-school-2017/economic_y.csv"
y.to_csv(loc)

In [2]:
# Now back to Chapter 12
import numpy as np
import pandas as pd

loc = "/home/dpindk/irlab/big-data-summer-school-2017/economic_x.csv"
x = pd.read_csv(loc)
x = x.drop(x.columns[[0]], axis=1)

loc = "/home/dpindk/irlab/big-data-summer-school-2017/economic_y.csv"
y = pd.read_csv(loc, header=None)
y = y.drop(y.columns[[0]], axis=1)


## A Quick Peek

In [3]:
print x.head()

   GDP_trend        debt  bank_rate  inflation
0   0.079944  121.127584        5.0  11.668484
1  -0.406113  120.856960        7.0   0.488281
2   2.193677  117.024347        6.0  -0.485909
3   0.190602  117.183618        8.0  -3.613281
4  -1.505673  120.018119        2.5  -8.409321


In [4]:
print y.head()

          1
0  3.790930
1  3.572757
2  4.008832
3  5.309585
4  3.325983


## Adjust the Data Scale

In [5]:
from sklearn import preprocessing
scaler_x = preprocessing.MinMaxScaler(feature_range=(0,1))
x = np.array(x).reshape((len(x), 4))
x = scaler_x.fit_transform(x)

scaler_y = preprocessing.MinMaxScaler(feature_range=(0,1))
y = np.array(y).reshape((len(y), 1))
y = scaler_y.fit_transform(y)

## Create Train and Test Set

In [6]:
x_train = x[0:136, ]
x_test = x[137:161, ]
y_train = y[0:136]
y_test = y[137:161]

## ReLU with Keras

In [7]:
from keras.models import Sequential
from keras.layers import Dense

seed = 2016
np.random.seed(seed)
fit1 = Sequential()
fit1.add(Dense(40, input_dim=4, init='uniform', activation='relu'))
fit1.add(Dense(20, init='uniform', activation='relu'))
fit1.add(Dense(1, init='normal'))

epochs = 3000
fit1.compile(loss='mean_squared_error', optimizer='adam')
fit1.fit(x_train, y_train, nb_epoch=epochs, batch_size=10)

Using Theano backend.


Epoch 1/3000
Epoch 2/3000
Epoch 3/3000
Epoch 4/3000
Epoch 5/3000
Epoch 6/3000
Epoch 7/3000
Epoch 8/3000
Epoch 9/3000
Epoch 10/3000
Epoch 11/3000
Epoch 12/3000
Epoch 13/3000
Epoch 14/3000
Epoch 15/3000
Epoch 16/3000
Epoch 17/3000
Epoch 18/3000
Epoch 19/3000
Epoch 20/3000
Epoch 21/3000
Epoch 22/3000
Epoch 23/3000
Epoch 24/3000
Epoch 25/3000
Epoch 26/3000
Epoch 27/3000
Epoch 28/3000
Epoch 29/3000
Epoch 30/3000
Epoch 31/3000
Epoch 32/3000
Epoch 33/3000
Epoch 34/3000
Epoch 35/3000
Epoch 36/3000
Epoch 37/3000
Epoch 38/3000
Epoch 39/3000
Epoch 40/3000
Epoch 41/3000
Epoch 42/3000
Epoch 43/3000
Epoch 44/3000
Epoch 45/3000
Epoch 46/3000
Epoch 47/3000
Epoch 48/3000
Epoch 49/3000
Epoch 50/3000
Epoch 51/3000
Epoch 52/3000
Epoch 53/3000
Epoch 54/3000
Epoch 55/3000
Epoch 56/3000
Epoch 57/3000
Epoch 58/3000
Epoch 59/3000
Epoch 60/3000
Epoch 61/3000
Epoch 62/3000
Epoch 63/3000
Epoch 64/3000
Epoch 65/3000
Epoch 66/3000
Epoch 67/3000
Epoch 68/3000
Epoch 69/3000
Epoch 70/3000
Epoch 71/3000
Epoch 72/3000
E

<keras.callbacks.History at 0x7f02fbfaeed0>

In [38]:
pred1 = fit1.predict(x_test)
pred1 = scaler_y.inverse_transform(pred1)
xs = range(len (pred1) - 1)
ys = scaler_y.inverse_transform(y[137:161,])

%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
matplotlib.style.use('ggplot')
plt.rcParams['figure.figsize'] = (8,6)

# Original Series
plt.plot(xs, pred1[:-1], '-', linestyle='solid', label="Predicted", color="darkblue", linewidth='1')

# Predictions
plt.plot(xs, ys, '-', linestyle='solid', label="Predicted", color="red", linewidth='1')

# Desired Tolerance

# Answer:
plt.plot(xs, ys+1500, linestyle='solid', label="Tolerance", color="grey", linewidth='0.5')
plt.plot(xs, ys-1500, linestyle='solid', color="grey", linewidth='0.5')

# Figure Settings
# plt.title("Figure 3.6: Observed and predicted values for COE")
# plt.xlabel("Date")
# plt.ylabel("Singaporean dollars")
plt.ylim(-2,14)
plt.legend()
plt.show()

[ 6.18003233]


ValueError: Found array with 0 sample(s) (shape=(0,)) while a minimum of 1 is required.

## Adding Drop Out with Keras

In [39]:
from keras.layers import Dropout
dropout1 = 0.05
dropout2 = 0.05
fit2 = Sequential()

fit2.add(Dense(40, input_dim=4, init='uniform', activation='relu'))
fit2.add(Dropout(dropout1))

fit2.add(Dense(20, init='uniform', activation='relu'))
fit2.add(Dropout(dropout2))

fit2.add(Dense(1, init='normal'))

fit2.compile(loss='mean_squared_error', optimizer='adam')
fit2.fit(x_train, y_train, nb_epoch=epochs, batch_size=10)



Epoch 1/3000
Epoch 2/3000
Epoch 3/3000
Epoch 4/3000
Epoch 5/3000
Epoch 6/3000
Epoch 7/3000
Epoch 8/3000
Epoch 9/3000
Epoch 10/3000
Epoch 11/3000
Epoch 12/3000
Epoch 13/3000
Epoch 14/3000
Epoch 15/3000
Epoch 16/3000
Epoch 17/3000
Epoch 18/3000
Epoch 19/3000
Epoch 20/3000
Epoch 21/3000
Epoch 22/3000
Epoch 23/3000
Epoch 24/3000
Epoch 25/3000
Epoch 26/3000
Epoch 27/3000
Epoch 28/3000
Epoch 29/3000
Epoch 30/3000
Epoch 31/3000
Epoch 32/3000
Epoch 33/3000
Epoch 34/3000
Epoch 35/3000
Epoch 36/3000
Epoch 37/3000
Epoch 38/3000
Epoch 39/3000
Epoch 40/3000
Epoch 41/3000
Epoch 42/3000
Epoch 43/3000
Epoch 44/3000
Epoch 45/3000
Epoch 46/3000
Epoch 47/3000
Epoch 48/3000
Epoch 49/3000
Epoch 50/3000
Epoch 51/3000
Epoch 52/3000
Epoch 53/3000
Epoch 54/3000
Epoch 55/3000
Epoch 56/3000
Epoch 57/3000
Epoch 58/3000
Epoch 59/3000
Epoch 60/3000
Epoch 61/3000
Epoch 62/3000
Epoch 63/3000
Epoch 64/3000
Epoch 65/3000
Epoch 66/3000
Epoch 67/3000
Epoch 68/3000
Epoch 69/3000
Epoch 70/3000
Epoch 71/3000
Epoch 72/3000
E

<keras.callbacks.History at 0x7f85fe19fe90>

## Using Early Stopping in Keras

In [41]:
from keras.callbacks import EarlyStopping
fit3 = Sequential()
fit3.add(Dense(40, input_dim=4, init='uniform', activation='relu'))
fit3.add(Dense(20, init='uniform', activation='relu'))
fit3.add(Dense(1, init='normal'))
fit3.compile(loss='mean_squared_error', optimizer='adam')

  app.launch_new_instance()


## Validation Set

In [43]:
y_valid = y_train[112:136]
x_valid = x_train[112:136]

fit3.fit(x_train, y_train,
         epochs=epochs,
         batch_size=10,
         validation_data=(x_valid, y_valid),
         callbacks=[EarlyStopping(monitor='val_loss', patience=100, verbose=2, mode='auto')]
        )

Train on 136 samples, validate on 24 samples
Epoch 1/3000
Epoch 2/3000
Epoch 3/3000
Epoch 4/3000
Epoch 5/3000
Epoch 6/3000
Epoch 7/3000
Epoch 8/3000
Epoch 9/3000
Epoch 10/3000
Epoch 11/3000
Epoch 12/3000
Epoch 13/3000
Epoch 14/3000
Epoch 15/3000
Epoch 16/3000
Epoch 17/3000
Epoch 18/3000
Epoch 19/3000
Epoch 20/3000
Epoch 21/3000
Epoch 22/3000
Epoch 23/3000
Epoch 24/3000
Epoch 25/3000
Epoch 26/3000
Epoch 27/3000
Epoch 28/3000
Epoch 29/3000
Epoch 30/3000
Epoch 31/3000
Epoch 32/3000
Epoch 33/3000
Epoch 34/3000
Epoch 35/3000
Epoch 36/3000
Epoch 37/3000
Epoch 38/3000
Epoch 39/3000
Epoch 40/3000
Epoch 41/3000
Epoch 42/3000
Epoch 43/3000
Epoch 44/3000
Epoch 45/3000
Epoch 46/3000
Epoch 47/3000
Epoch 48/3000
Epoch 49/3000
Epoch 50/3000
Epoch 51/3000
Epoch 52/3000
Epoch 53/3000
Epoch 54/3000
Epoch 55/3000
Epoch 56/3000
Epoch 57/3000
Epoch 58/3000
Epoch 59/3000
Epoch 60/3000
Epoch 61/3000
Epoch 62/3000
Epoch 63/3000
Epoch 64/3000
Epoch 65/3000
Epoch 66/3000
Epoch 67/3000
Epoch 68/3000
Epoch 69/300

<keras.callbacks.History at 0x7f85fd0f4f90>

In [44]:
# NOTE
# To see the actual model weights
fit1.get_weights()

[array([[-0.02655321,  0.01151112, -0.13541199, -0.1077975 , -0.05423352,
         -0.0709541 , -0.20546918, -0.03363619, -0.06213703, -0.09766508,
         -0.03272305, -0.14911899,  0.01245292,  0.05950642, -0.11391339,
         -0.11182928, -0.02776561, -0.04446455, -0.01656682, -0.19344218,
          0.04715702, -0.31030273, -0.23916577, -0.08917759, -0.11944967,
         -0.00172183,  0.01871762,  0.04561178, -0.04968759,  0.01391866,
         -0.11518813,  0.03666057,  0.02826314, -0.03798304, -0.04002438,
         -0.10841525, -0.05933187,  0.04001876, -0.0976073 , -0.15590626],
        [ 0.01563946,  0.02362541, -0.09186935, -0.03063845, -0.11146233,
         -0.18766861, -0.60756892, -0.04551579, -0.09106056, -0.09785523,
         -0.03120759, -0.38262954, -0.03604712,  0.29336393, -0.16130555,
         -0.14520933, -0.03457343, -0.03036063,  0.01037337, -0.14746545,
         -0.16348028, -0.18988116, -0.17084067, -0.09472258, -0.12678114,
         -0.03174303, -0.04331715, -0