# Data Preprocessing
### import libraries

In [1]:
import sys, os
import numpy as np #only numpy arrays can be the input of keras
import pandas as pd
import matplotlib.pyplot as plt

### Train set

In [2]:
dataset_train = pd.read_csv('Google_Stock_Price_Train.csv')

#'training_set = dataset_train.iloc[:, 1].values' would only give us a 1D array
training_set = dataset_train.iloc[:, 1:2].values #gives us one column for open prices

### Feature scaling with Normalization: (X-min)/(max-min)

In [3]:
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range=(0, 1))
training_set_scaled = sc.fit_transform(training_set) # it is recommended to keep the original set separate

### Creating a data structure with n timesteps and 1 output

In [4]:
n_timesteps = 60 # number of timesteps

X_train = [] # will contain lists of previous stock prices
y_train = [] # will contain the next stock price


# start at 'n_timesteps' so that the first element actually has n previous prices
for i in range(n_timesteps, training_set_scaled.shape[0]):
    X_train.append(training_set_scaled[i-n_timesteps:i, 0]) # adds a list of n previous prices
    y_train.append(training_set_scaled[i, 0]) # adds the current price

X_train, y_train = np.array(X_train), np.array(y_train)

### Reshaping to add more indicators

In [5]:
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
#for RNNs, the dimensions are (batch_size, timesteps, number of indicators). Here the only indicator is open price

# Building the RNN
### import libraries

In [6]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout # to prevent overfiting

### Initialising the RNN

In [7]:
regressor = Sequential() #the RNN
#Tip: A classifier predicts categories. A regressor predicts a continuous value.

### Adding the first LSTM layer and some Dropout regularisation

In [8]:
regressor.add(LSTM( units = 50, return_sequences= True, input_shape = (X_train.shape[1], 1)))
# units : neurons
# return_sequences: set to True if you are going to add more LSTM layers
#input_shape: only include the 2 last ones of X_train. The first is already included

regressor.add(Dropout(.2))
#to prevent overfitting during regression, drop a part of the neurons during each iteration
#using a rate
