In [1]:
# RNN for Koninklijke Philips NV (PHIA)
# Manufacturing Industry

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
import sklearn
import sklearn.preprocessing
import datetime
import matplotlib.pyplot as plt
from tensorflow.python.framework import ops
import tensorflow.compat.v1 as tfc
tfc.disable_v2_behavior()
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 

Instructions for updating:
non-resource variables are not supported in the long term


In [3]:
# read in the data
df = pd.read_csv("PHIA.csv")
df['Date'] = pd.to_datetime(df['Date'])

In [4]:
# function to normalize the price
def normalize_stock_price(df):
    min_max_scaler = sklearn.preprocessing.MinMaxScaler()
    df['StartPrice'] = min_max_scaler.fit_transform(df["StartPrice"].values.reshape(-1,1))
    df['MaxPrice'] = min_max_scaler.fit_transform(df["MaxPrice"].values.reshape(-1,1))
    df['MinPrice'] = min_max_scaler.fit_transform(df["MinPrice"].values.reshape(-1,1))
    df['EndPrice'] = min_max_scaler.fit_transform(df['EndPrice'].values.reshape(-1,1))
    return df


# create train, test, validation sets
def split_sets(stock, seq_len):
    # change to array
    data_raw = stock.as_matrix() 
    # create empty data
    data = []
    
    # split Data
    valid_set_size = 0.1
    test_set_size = 0.1
    
    # create all possible sequences of length seq_len
    for index in range(len(data_raw) - seq_len): 
        # append data
        data.append(data_raw[index: index + seq_len])
    # change to numpy array
    data = np.array(data);
    
    valid_set_size = int(np.round(valid_set_size * data.shape[0]));  
    test_set_size = int(np.round(test_set_size * data.shape[0]));
    train_set_size = data.shape[0] - (valid_set_size + test_set_size);
    
    # training set
    x_train = data[:train_set_size,:-1,:]
    y_train = data[:train_set_size,-1,:]
    
    # validation set
    x_valid = data[train_set_size:train_set_size+valid_set_size,:-1,:]
    y_valid = data[train_set_size:train_set_size+valid_set_size,-1,:]
    
    # test set
    x_test = data[train_set_size+valid_set_size:,:-1,:]
    y_test = data[train_set_size+valid_set_size:,-1,:]
    
    return [x_train, y_train, x_valid, y_valid, x_test, y_test]


# drop cols that are not going to use in this model
df_stock = df.copy()
df_stock.drop(['Mnemonic'],1,inplace=True)
df_stock.drop(['Date'],1,inplace=True)


# normalize stock prices
df_stock_norm = df_stock.copy()
df_stock_norm = normalize_stock_price(df_stock_norm)

# create train, test data
# chose sequence length
seq_len = 20 
x_train, y_train, x_valid, y_valid, x_test, y_test = split_sets(df_stock_norm, seq_len)


  


In [5]:
# use basic cell in tensorflow
index_in_epoch = 0;
perm_array  = np.arange(x_train.shape[0])
np.random.shuffle(perm_array)

# function to get the next batch
def get_next_batch(batch_size):
    global index_in_epoch, x_train, perm_array   
    start = index_in_epoch
    index_in_epoch += batch_size
    
    if index_in_epoch > x_train.shape[0]:
        # random shuffle
        np.random.shuffle(perm_array) 
        # start epoch
        start = 0 
        index_in_epoch = batch_size
        
    end = index_in_epoch
    return x_train[perm_array[start:end]], y_train[perm_array[start:end]]


# define parameters for rnn
n_steps = seq_len-1 
n_inputs = 4 
n_neurons = 300 
n_outputs = 4
n_layers = 3
learning_rate = 0.001
batch_size = 50
n_epochs = 100 
train_set_size = x_train.shape[0]
test_set_size = x_test.shape[0]



ops.reset_default_graph()
# create placeholder
X = tfc.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tfc.placeholder(tf.float32, [None, n_outputs])

In [6]:
# disable the tf ver. 2 behavior to continue
tfc.disable_v2_behavior()

In [7]:
# use Basic RNN Cell
layers = [tf.compat.v1.nn.rnn_cell.BasicRNNCell(num_units=n_neurons, activation=tf.nn.elu)
          for layer in range(n_layers)]


Instructions for updating:
This class is equivalent as tf.keras.layers.SimpleRNNCell, and will be replaced by that in Tensorflow 2.0.


In [8]:
multi_layer_cell = tf.compat.v1.nn.rnn_cell.MultiRNNCell(layers)

Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.


In [9]:
rnn_outputs, states = tf.compat.v1.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)

Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Please use `layer.add_weight` method instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [10]:
stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons]) 

In [11]:
stacked_outputs = tf.compat.v1.layers.dense(stacked_rnn_outputs, n_outputs)

Instructions for updating:
Use keras.layers.Dense instead.
Instructions for updating:
Please use `layer.__call__` method instead.


In [12]:
# get output
outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])
outputs = outputs[:,n_steps-1,:] 
                                              

In [13]:
# mean square error
mse = tf.reduce_mean(tf.square(outputs - y)) 
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate) 
training_op = optimizer.minimize(mse)

In [14]:
# recurrent neural nets
with tf.compat.v1.Session() as sess: 
    sess.run(tf.compat.v1.global_variables_initializer())
    for iteration in range(int(n_epochs*train_set_size/batch_size)):
        x_batch, y_batch = get_next_batch(batch_size) 
        sess.run(training_op, feed_dict={X: x_batch, y: y_batch}) 
        if iteration % int(5*train_set_size/batch_size) == 0:
            mse_train = mse.eval(feed_dict={X: x_train, y: y_train}) 
            mse_valid = mse.eval(feed_dict={X: x_valid, y: y_valid}) 
            print('%.2f epochs: MSE on training set/validation set = %.6f/%.6f'%(
                iteration*batch_size/train_set_size, mse_train, mse_valid))
        y_train_pred = sess.run(outputs, feed_dict={X: x_train})
        y_valid_pred = sess.run(outputs, feed_dict={X: x_valid})
        y_test_pred = sess.run(outputs, feed_dict={X: x_test})

0.00 epochs: MSE on training set/validation set = 0.161899/0.124204
5.00 epochs: MSE on training set/validation set = 0.729387/0.912674
10.00 epochs: MSE on training set/validation set = 0.128253/0.100558
15.00 epochs: MSE on training set/validation set = 0.083117/0.033355
20.00 epochs: MSE on training set/validation set = 0.063693/0.059357
25.00 epochs: MSE on training set/validation set = 0.061648/0.052795
30.00 epochs: MSE on training set/validation set = 0.088136/0.089492
35.00 epochs: MSE on training set/validation set = 0.026485/0.019736
40.00 epochs: MSE on training set/validation set = 0.058741/0.060791
45.00 epochs: MSE on training set/validation set = 0.016643/0.023359
50.00 epochs: MSE on training set/validation set = 0.040342/0.074167
55.00 epochs: MSE on training set/validation set = 0.013127/0.025171
60.00 epochs: MSE on training set/validation set = 0.023321/0.011965
65.00 epochs: MSE on training set/validation set = 0.016224/0.009489
70.00 epochs: MSE on training set/va