# Chapter 11 - Forecasting Multiple Outputs

_pg. 155-168_

## Working with Zipped Files

In [1]:
import numpy as np
import pandas as pd
import urllib
import zipfile

url = "http://www.economicswebinstitute.org/data/stockindexes.zip"

# ATTN: you need to redefine these lines according to your system
loc = "/home/dpindk/irlab/big-data-summer-school-2017/stockindexes.zip"
dest_location = "/home/dpindk/irlab/big-data-summer-school-2017/"

# ATTN: this line is missing in the book!!!
urllib.urlretrieve(url, loc)

unzip = zipfile.ZipFile(loc, 'r')
unzip.extractall(dest_location)
unzip.close()

## Extraction of Spreadsheet Data

In [2]:
loc = "/home/dpindk/irlab/big-data-summer-school-2017/stockindexes.xls"
Excel_file = pd.ExcelFile(loc)

# print Excel_file.sheet_names
for name in Excel_file.sheet_names:
    print name

Description
Dow Jones Industrial
S&P500
NIKKEI 300
Dax30
CAC40
Swiss Market-Price Index
Mib30
IBEX 35I
Bel20
FTSE100


In [3]:
ftse_data = Excel_file.parse("FTSE100")
dj_data = Excel_file.parse("Dow Jones Industrial")

In [4]:
print ftse_data.head()

                 Start     1995-11-30 00:00:00
0                  End     2000-02-18 00:00:00
1            Frequency                       D
2                 Name  FTSE 100 - PRICE INDEX
3                 Code                 FTSE100
4  1995-11-30 00:00:00                  3664.3


In [5]:
print dj_data.head()

                 Start                  1995-11-30 00:00:00
0                  End                  2000-02-18 00:00:00
1            Frequency                                    D
2                 Name  DOW JONES INDUSTRIALS - PRICE INDEX
3                 Code                              DJINDUS
4  1995-11-30 00:00:00                              5074.49


In [6]:
ftse100 = ftse_data.iloc[4:1357, 1]
dj = dj_data.iloc[4:1357, 1]

## Check Data Values

In [7]:
print ftse100.head()

4    3664.3
5    3680.4
6    3669.7
7    3664.2
8    3662.8
Name: 1995-11-30 00:00:00, dtype: object


In [8]:
print ftse100.tail()

1352    6334.53
1353    6297.53
1354    6251.83
1355    6256.43
1356    6269.21
Name: 1995-11-30 00:00:00, dtype: object


In [9]:
print dj.head()

4    5074.49
5    5087.13
6    5139.52
7    5177.45
8    5199.13
Name: 1995-11-30 00:00:00, dtype: object


In [10]:
print dj.tail()

1352    10881.2
1353    10887.4
1354    10983.6
1355    10864.1
1356    10965.9
Name: 1995-11-30 00:00:00, dtype: object


## How to Work with Multiple Targets

In [11]:
yt = pd.concat([ftse100, dj], axis=1)
print yt.head()

  1995-11-30 1995-11-30
4     3664.3    5074.49
5     3680.4    5087.13
6     3669.7    5139.52
7     3664.2    5177.45
8     3662.8    5199.13


In [12]:
yt = yt.reset_index(drop=True)
yt.columns = ['ftse100', 'dj']
print yt.head()

  ftse100       dj
0  3664.3  5074.49
1  3680.4  5087.13
2  3669.7  5139.52
3  3664.2  5177.45
4  3662.8  5199.13


In [13]:
yt = yt.pct_change(1)
win = 30
vol_t = yt.rolling(window=win, center=True).std()

In [14]:
# Add plot here

## Creation of Hand Crafted Features

In [15]:
x1 = np.log((vol_t.shift(1) / vol_t.shift(2)) * vol_t.shift(1))
x2 = np.log((vol_t.shift(1) / vol_t.shift(3)) * vol_t.shift(1))
x3 = np.log((vol_t.shift(1) / vol_t.shift(4)) * vol_t.shift(1))
x4 = np.log((vol_t.shift(1) / vol_t.shift(5)) * vol_t.shift(1))
x5 = np.log((vol_t.shift(1) / vol_t.shift(6)) * vol_t.shift(1))

In [16]:
data = pd.concat([vol_t, x1, x2, x3, x4, x5], axis=1)
data.columns = ['ftse_t', 'dj_t',
                'ftse_t-1', 'dj_t-1',
                'ftse_t-2', 'dj_t-2',
                'ftse_t-3', 'dj_t-3',
                'ftse_t-4', 'dj_t-4',
                'ftse_t-5', 'dj_t-5',]
data = data.dropna()

## Target and Features in One Place

In [17]:
cols_y = ['ftse_t', 'dj_t']
y = data[cols_y]
cols = ['ftse_t-1', 'dj_t-1',
        'ftse_t-2', 'dj_t-2',
        'ftse_t-3', 'dj_t-3',
        'ftse_t-4', 'dj_t-4',
        'ftse_t-5', 'dj_t-5',]
x = data[cols]

## Scaling Data

In [18]:
from sklearn import preprocessing
num_attrib = 10
scaler_x = preprocessing.MinMaxScaler(feature_range=(-1,1))
x = np.array(x).reshape((len(x), num_attrib))
x = scaler_x.fit_transform(x)

num_response = 2

scaler_y = preprocessing.MinMaxScaler(feature_range=(0,1))
y = np.array(y).reshape((len(y), num_response))
y = scaler_y.fit_transform(y)

## Train and Test Sets

In [19]:
train_end = 1131
data_end = len(y)
x_train = x[0:train_end, ]
x_test = x[train_end + 1:data_end, ]
y_train = y[0:train_end]
y_test = y[train_end + 1:data_end]
x_train = np.reshape(x_train, (x_train.shape[0], 1, x_train.shape[1]))
x_test = np.reshape(x_test, (x_test.shape[0], 1, x_test.shape[1]))

In [20]:
print "Shape of x_train is ", x_train.shape

Shape of x_train is  (1131, 1, 10)


In [21]:
print "Shape of x_test is ", x_test.shape

Shape of x_test is  (185, 1, 10)


## Model Specification and Fit

In [22]:
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.layers.recurrent import SimpleRNN
from keras.optimizers import SGD

Using Theano backend.


In [23]:
seed = 2016
num_epochs=20
np.random.seed(seed)
fit1 = Sequential()
# UPDATED
# fit1.add(SimpleRNN(output_dim=10, activation='sigmoid', input_shape=(1, num_attrib)))
fit1.add(SimpleRNN(units=10, activation='sigmoid', input_shape=(1, num_attrib)))
# UPDATED
# fit1.add(Dense(output_dim=num_response, activation='linear'))
fit1.add(Dense(units=num_response, activation='linear'))
sgd = SGD(lr=0.01, momentum=0.90, nesterov=True)
fit1.compile(loss='mean_squared_error', optimizer=sgd)
fit1.fit(x_train, y_train, batch_size=1, epochs=num_epochs)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f74e12b41d0>

In [24]:
score_train = fit1.evaluate(x_train, y_train, batch_size=1)
score_test = fit1.evaluate(x_test, y_test, batch_size=1)
print 
print "in train  MSE = ", round(score_train, 5)
print "in test   MSE = ", round(score_test, 5)

  1/185 [..............................] - ETA: 0s
in train  MSE =  0.00132
in test   MSE =  0.00105


In [25]:
pred1 = fit1.predict(x_test)
pred1 = scaler_y.inverse_transform(np.array(pred1).reshape((len(pred1), 2)))

In [26]:
# add plot here