<a href="https://colab.research.google.com/github/dimoralesa/Diplomado_Ciencias_Datos/blob/main/Tareas/Tarea5/Tarea5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt

from math import sqrt
from datetime import datetime
from dateutil.relativedelta import relativedelta

from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, LSTM

from tensorflow.keras.optimizers import Adam

from scipy import stats
from statsmodels.stats.diagnostic import normal_ad

In [None]:
data = "GOOG"
now = datetime.now()
data = yf.Ticker(data).history(start=(now-relativedelta(years=10)).strftime("%Y-%m-%d"),
                                            end=now.strftime("%Y-%m-%d"))[['Open', 'High', 'Low', 'Close', 'Volume']]

In [None]:
print(f'\n Forma de los datos: {data.shape}\n')
data

In [None]:
google = data[['Close']]
google

In [None]:
# tamaño de pasos a futuro
future_target = 20

# tamaño secuencias de entrada
past_history = 55

def multipaso_data(dataset, target, start_index, end_index, history_size,
                      target_size,  single_step=False):
    ''' dataset: conjunto de datos para las secuencias de entrada
        target:  conjunto de datos para las secuencias de salida
        start_index: índice inicial de donde empezar a tomar los datos
        end_index: índice final para tomar los datos. None para tomarlos todos
        history_size: tamaño de la ventana para crear las secuencias
        target_size: dentro de cuántas observaciones futuras desea pronosticar
        single_step: Predecir solamente un valor futuro (=True),
                     o predecir todos los valores hasta target_size(=False)
    '''
    data = []
    labels = []

    start_index = start_index + history_size
    if end_index is None:
        end_index = len(dataset) - target_size

    for i in range(start_index, end_index):
        indices = range(i-history_size, i)
        data.append(dataset[indices])

        if single_step:
            labels.append(target[i+target_size])
        else:
            labels.append(target[i:i+target_size])

    return np.array(data), np.array(labels)

def create_and_train_model(X_train, y_train, X_test, y_test, units):
    input_shape = (X_train.shape[1], 1)

    inputs = Input(input_shape)
    x = Dropout(0.0, name= 'Dropout_01')(inputs)
    x = LSTM(units=units, name='LSTM_layer')(x)
    x = LSTM(units=units, return_sequences=True,name='LSTM_layer')(inputs)
    x = LSTM(units=units//2, name='LSTM_layer_2')(x)
    outputs = Dense(future_target)(x)

    # model
    model = Model(inputs=inputs, outputs=outputs, name='series_LSTM_model')
    model.compile(loss='mean_squared_error', optimizer=Adam(0.001))
    history = model.fit(
        X_train, y_train,
        epochs=40,
        batch_size=32,
        validation_split=0.2,
        verbose=1,
        shuffle=False
    )

    prediction = model.predict(X_test)
    pred = 0
    y_train_p = X_test[pred,:]
    y_test_p = y_test[pred,:]
    y_pred_p = prediction[pred,:]

    y_train_p = scaler.inverse_transform(y_train_p.reshape(-1, 1))
    y_test_p = scaler.inverse_transform(y_test_p.reshape(-1, 1))
    y_pred_p = scaler.inverse_transform(y_pred_p.reshape(-1, 1))

    return history, y_train_p, y_test_p, y_pred_p

def plot_residuals(residuals):
    sw_result = stats.shapiro(residuals)
    ad_result = normal_ad(np.array(residuals), axis=0)
    dag_result = stats.normaltest(residuals, axis=0, nan_policy='propagate')

    plt.figure(figsize=(15,7))
    res = stats.probplot(residuals, plot=plt)
    ax = plt.gca()
    ax.annotate("SW p-val: {:.4f}".format(sw_result[1]), xy=(0.05,0.9), xycoords='axes fraction', fontsize=15,
                bbox=dict(boxstyle="round", fc="none", ec="gray", pad=0.6))
    ax.annotate("AD p-val: {:.4f}".format(ad_result[1]), xy=(0.05,0.8), xycoords='axes fraction', fontsize=15,
                bbox=dict(boxstyle="round", fc="none", ec="gray", pad=0.6))
    ax.annotate("DAG p-val: {:.4f}".format(dag_result[1]), xy=(0.05,0.7), xycoords='axes fraction', fontsize=15,
                bbox=dict(boxstyle="round", fc="none", ec="gray", pad=0.6))

    plt.show()


def plot_confidence_intervals(residuals, y_train, y_test, y_pred):
    plt.figure(figsize=(20, 5))
    y_train = [arr[0] for arr in y_train.tolist()]
    y_test = [arr[0] for arr in y_test.tolist()]
    y_pred = [arr[0] for arr in y_pred.tolist()]

    RMSFE = np.sqrt(sum([x**2 for x in residuals]) / len(residuals))
    band_size = 1.96*RMSFE

    fig, ax = plt.subplots(figsize=(15,7))
    ax.plot(list(range(len(y_train))), y_train, color='g', label='History')
    ax.plot(list(range(len(y_train), len(y_train) + len(y_test))), y_test, color='#fc7d0b', label='True')
    ax.scatter(list(range(len(y_train), len(y_train) + len(y_test))), y_pred)
    ax.fill_between(list(range(len(y_train), len(y_train) + len(y_test))), (y_test-band_size), (y_test+band_size), color='b', alpha=.1)
    ax.set_title("Predictions w/ 95% Confidence")
    ax.set_xlabel('Timestep')
    ax.set_ylabel('Price')
    plt.show()

In [None]:
len_data = len(google)
len_train = int(len_data*0.8)
len_test = len_data- len_train

dataset = google.values
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = np.squeeze(np.array(scaler.fit_transform(dataset)),axis=1)

TRAIN_SPLIT = int(len_data*0.8)

X_train, y_train = multipaso_data(dataset, dataset, 0,
                                                 TRAIN_SPLIT, past_history,
                                                 future_target)
X_test, y_test = multipaso_data(dataset, dataset, TRAIN_SPLIT,
                                                 None, past_history,
                                                 future_target)

print(TRAIN_SPLIT)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

In [None]:
history_google, y_train_google, y_test_google, y_pred_google = create_and_train_model(X_train, y_train, X_test, y_test, 50)

In [None]:
plt.plot(history_google.history['loss'], label='train')
plt.plot(history_google.history['val_loss'], label='test')
plt.legend();

In [None]:
residuals_google = sorted([(x - y)[0] for x, y in zip(y_pred_google, y_test_google)])
plot_confidence_intervals(residuals_google, y_train_google, y_test_google, y_pred_google)