# Redes neuronales recurrentes

## Parte 1.- Pre-procesado de datos

In [21]:
import numpy as np
import pandas as pd 
from matplotlib import pyplot as plt

### Importamos datos de entrenamiento

In [22]:
data_train = pd.read_csv("Google_Stock_Price_Train.csv")

In [23]:
data_train.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,1/3/2012,325.25,332.83,324.97,663.59,7380500
1,1/4/2012,331.27,333.87,329.08,666.45,5749400
2,1/5/2012,329.83,330.75,326.89,657.21,6590300
3,1/6/2012,328.34,328.77,323.68,648.24,5405900
4,1/9/2012,322.04,322.29,309.46,620.76,11688800


In [24]:
training_set = data_train.iloc[:,1:2].values #iloc enbase a pos, loc a nombre

In [25]:
training_set

array([[325.25],
       [331.27],
       [329.83],
       ...,
       [793.7 ],
       [783.33],
       [782.75]])

### Escalado de caracteristicas
Por lo general para el escalado usamos la normalziación
x-min(x)/(max(x)-min(x))

In [26]:
from sklearn.preprocessing import MinMaxScaler

In [27]:
sc = MinMaxScaler(feature_range=(0,1))

In [28]:
training_set_scaled = sc.fit_transform(training_set) #fit obtiene minimo y maximo, la formula, y el transform aplica la formula

In [29]:
training_set_scaled.shape

(1258, 1)

In [30]:
training_set_scaled

array([[0.08581368],
       [0.09701243],
       [0.09433366],
       ...,
       [0.95725128],
       [0.93796041],
       [0.93688146]])

### Estructura con 60 time steps y 1 salida, es decir, verá 60 días atras y predecirá el día siguiente

In [31]:
X_train = []
y_train = []
for i in range(60,1258):
    #del 0 al 59, y la y que va a predecir es la 60, luego del 1 al 60, etc
    X_train.append(training_set_scaled[i-60:i,0])
    #el 60 
    y_train.append(training_set_scaled[i,0])
X_train, y_train = np.array(X_train), np.array(y_train)

In [32]:
X_train

array([[0.08581368, 0.09701243, 0.09433366, ..., 0.07846566, 0.08034452,
        0.08497656],
       [0.09701243, 0.09433366, 0.09156187, ..., 0.08034452, 0.08497656,
        0.08627874],
       [0.09433366, 0.09156187, 0.07984225, ..., 0.08497656, 0.08627874,
        0.08471612],
       ...,
       [0.92106928, 0.92438053, 0.93048218, ..., 0.95475854, 0.95204256,
        0.95163331],
       [0.92438053, 0.93048218, 0.9299055 , ..., 0.95204256, 0.95163331,
        0.95725128],
       [0.93048218, 0.9299055 , 0.93113327, ..., 0.95163331, 0.95725128,
        0.93796041]])

In [34]:
print(X_train[0].shape)
print(y_train[0])

(60,)
0.08627874097775134


### Ahora mismo tenemos una matriz en el conjunto de entrenamiento, donde va en la primera columna del 0 al 59, luego del 1 al 60, etc. Sin embargo, se podrían añadir mas datos en un nivel de profundidadd (otra dimensión), esto con el fin de obtener un mejor resultado

### Redimensionar los datos

In [48]:
#Solo necesitamos hacerlos en el conjunto de entrenamiento
#Redimensionamos 
X_train = np.reshape(X_train,(X_train.shape[0],X_train.shape[1],1))# 1 dato en la tercera dimension
print(X_train[0].shape)

(60, 1)


In [49]:
X_train.shape

(1198, 60, 1)

In [50]:
from keras.models import Sequential
from keras.layers import Dense,LSTM,Dropout #dropout regularizqa el overfiting

In [62]:
regresor = Sequential() #es una regresion, no una clasificación

In [63]:
#Los 3 básicos de LSTM, 50 neuronas por capa, True porque quiero agregar mas capas, ña ultima debe ser falsehood
regresor.add(LSTM(units=50,return_sequences=True,input_shape = (X_train.shape[1],1) ))

In [64]:
#Se desactivan unas aleatoriamente para prevenir overfiting, 20%
regresor.add(Dropout(0.2))

In [65]:
regresor.add(LSTM(units=50,return_sequences=True ))
regresor.add(Dropout(0.2))

In [66]:
regresor.add(LSTM(units=50,return_sequences=True ))
regresor.add(Dropout(0.2))

In [67]:
#Capa final de LSTM
regresor.add(LSTM(units=2,return_sequences=False ))
regresor.add(Dropout(0.2))

In [68]:
#Capa de salida
regresor.add(Dense(units=1))

## Compilar la red neuronal


In [69]:
#Como es un problema de regresion el optimizador no sera el mismo 
regresor.compile(optimizer='adam', loss='mean_squared_error')

### Ajustar la red neuronal al conjunto de entrenamiento

In [70]:
#100 iteraciones, de 32 datos
regresor.fit(X_train, y_train,epochs=100,batch_size=32)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78



<keras.callbacks.History at 0x1c0418c8820>

In [72]:
data_test = pd.read_csv('Google_Stock_Price_Test.csv')

In [73]:
data_test.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,1/3/2017,778.81,789.63,775.8,786.14,1657300
1,1/4/2017,788.36,791.34,783.16,786.9,1073000
2,1/5/2017,786.08,794.48,785.02,794.02,1335200
3,1/6/2017,795.26,807.9,792.2,806.15,1640200
4,1/9/2017,806.4,809.97,802.83,806.65,1272400


In [74]:
real_stock_price = data_test.iloc[:,1:2].values

In [75]:
real_stock_price

array([[778.81],
       [788.36],
       [786.08],
       [795.26],
       [806.4 ],
       [807.86],
       [805.  ],
       [807.14],
       [807.48],
       [807.08],
       [805.81],
       [805.12],
       [806.91],
       [807.25],
       [822.3 ],
       [829.62],
       [837.81],
       [834.71],
       [814.66],
       [796.86]])

In [76]:
real_stock_price.shape

(20, 1)

# Visualizar los resultados obtenidos