# Recurrent Neural Networks

## Part 1: Preprocess the data.
In the csv files, you will find several columns. you only need the "close" column for this exercise


reshape the data! see examples

In [None]:
# Part 1 - Data Preprocessing

# Importing the libraries


# Importing the training set


# Creating a data structure with 30 timesteps and 1 output


# Reshaping [samples,timesteps,features]
# remember: samples: how many observations you have
#           timesteps: how long does the rnn remember (30 in our case)
#           features: how many variables (1 in our case)


In [2]:
import numpy as np
import pandas as pd

In [3]:
# train
df_train = pd.read_csv('Google_Stock_Price_Train.csv')
close_train = df_train['Close']
close_train = close_train.astype(str).str.replace(',', '')
close_train = pd.to_numeric(close_train)

# test
df_test = pd.read_csv('Google_Stock_Price_Test.csv')
close_test = df_test['Close']
close_test = close_test.astype(str).str.replace(',', '')
close_test = pd.to_numeric(close_test)
close_test = np.concatenate([np.zeros(30), close_test])

# normalize data
mean = close_train.mean()
stdv = close_train.std()
close_train = (close_train - mean) / stdv
close_test = (close_test - mean) / stdv

# print shapes
print(close_train.shape)
print(close_test.shape)

(1258,)
(50,)


In [4]:
# split a univariate sequence into samples
def split_sequence(sequence, n_steps):
    X, y = list(), list()
    for i in range(len(sequence)):
        # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the sequence
        if end_ix > len(sequence)-1:
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

X_train, Y_train = split_sequence(close_train, 30)
print(X_train)
print(X_train.shape, '\n')
print(Y_train)
print(Y_train.shape, '\n')

[[-0.29789921 -0.28053984 -0.33662394 ... -0.65807564 -0.62001857
  -0.63476796]
 [-0.28053984 -0.33662394 -0.39106921 ... -0.62001857 -0.63476796
  -0.66020003]
 [-0.33662394 -0.39106921 -0.55786477 ... -0.63476796 -0.66020003
  -0.65437312]
 ...
 [ 0.14209387  0.27811601  0.31447356 ...  0.47702032  0.46882622
   0.47878054]
 [ 0.27811601  0.31447356  0.35544408 ...  0.46882622  0.47878054
   0.43932744]
 [ 0.31447356  0.35544408  0.29055891 ...  0.47878054  0.43932744
   0.4256099 ]]
(1228, 30) 

[-0.66020003 -0.65437312 -0.66578417 ...  0.43932744  0.4256099
  0.35902521]
(1228,) 



In [5]:
# reshape from [samples, timesteps] into [samples, timesteps, features]
n_samples = X_train.shape[0] # 1228
n_steps = X_train.shape[1] # 30
n_features = 1 # "close" column

X_train = X_train.reshape((n_samples, n_steps, n_features))

In [6]:
# https://www.tensorflow.org/tutorials/structured_data/time_series
# https://machinelearningmastery.com/how-to-develop-lstm-models-for-time-series-forecasting/

In [7]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(X_train, Y_train, test_size=0.20, random_state=42)

## part 2: model building
Build The RNN, you can use LSTM or GRU

In [8]:
# Part 2 - Building the RNN

# Importing the Keras libraries and packages


# Initialising the RNN


# Compiling the RNN

# Fitting the RNN to the Training set


In [9]:
from keras import models, layers

Using TensorFlow backend.


#### LSTM

In [10]:
lstm_model = models.Sequential([
    layers.LSTM(32, input_shape=X_train.shape[-2:]),
    layers.Dense(1)
])
print(lstm_model.summary())

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 32)                4352      
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 33        
Total params: 4,385
Trainable params: 4,385
Non-trainable params: 0
_________________________________________________________________
None


In [11]:
lstm_model.compile(optimizer='rmsprop', loss='mse')

history = lstm_model.fit(X_train, y_train, epochs=20, validation_data=(X_val, y_val))

Train on 982 samples, validate on 246 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


## Part 3: Making Predictions
make predictions in the Test dataset

In [12]:
X_train, Y_train = split_sequence(close_test, 30)

# reshape from [samples, timesteps] into [samples, timesteps, features]
n_samples = X_train.shape[0]
n_steps = X_train.shape[1] # 30
n_features = 1 # "close" column

X_train = X_train.reshape((n_samples, n_steps, n_features))

In [13]:
lstm_model.evaluate(X_train, Y_train)



1.3627281188964844