In [None]:
###################################
##### Importar Librerías ##########
###################################
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import warnings
warnings.filterwarnings('ignore')
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
import yfinance as yf
import datetime as dt
import time
from tensorflow.keras.callbacks import EarlyStopping
%matplotlib inline


In [None]:
###########################################
####  Descargar Datos de Yahoo Finance ####
###########################################
start_date = dt.datetime(2021, 4, 1)
end_date = dt.datetime(2025, 4, 1)

db = yf.download('^GSPC', start=start_date, end=end_date)
print(db.head())
data = db[['Close']]


YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed

Price             Close         High          Low         Open      Volume
Ticker            ^GSPC        ^GSPC        ^GSPC        ^GSPC       ^GSPC
Date                                                                      
2021-04-01  4019.870117  4020.629883  3992.780029  3992.780029  4162130000
2021-04-05  4077.909912  4083.419922  4034.439941  4034.439941  4005030000
2021-04-06  4073.939941  4086.229980  4068.139893  4075.570068  4081270000
2021-04-07  4079.949951  4083.129883  4068.310059  4074.290039  4120810000
2021-04-08  4097.169922  4098.189941  4082.540039  4089.949951  3907100000





In [None]:
#########################################
##### Función para Crear Secuencias #####
#########################################
def create_sequences(data, seq_length, pred_length):
    X, y = [], []
    for i in range(len(data) - seq_length - pred_length + 1):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length:i+seq_length+pred_length].flatten())
    return np.array(X), np.array(y)


In [None]:
##########################
#### Hiperparámetros #####
##########################

seq_length = 5       # Días usados como input
train_size = 955     # Tamaño inicial de la ventana de entrenamiento
test_size = 1        # Tamaño del test en cada paso (Predicción a 1 paso)
step_size = 1        # Desplazamiento en cada iteración



In [None]:
###################################################
##### Expanding Window - Predicción a un paso #####
###################################################

all_predictions = []
all_actuals = []
all_dates = []
iteration_times = []

for target_day in range(train_size, len(data)):
    seq_length = 5
    last_seq_start = target_day - seq_length  # inicio de la secuencia
    last_seq_end = target_day - 1             # fin de la secuencia (incluido)

    # ⚠️ Verificación: no empezar antes del índice 0
    if last_seq_start < 0:
        continue

    # Crear conjunto de entrenamiento hasta el día anterior al target
    train_data = data.iloc[:target_day]  # datos hasta el día t-1 (excluye el target)

    # Transformación MixMaxScaler
    scaler = MinMaxScaler()
    train_scaled = scaler.fit_transform(train_data)

    # Crear secuencias de entrenamiento
    X_train, y_train = create_sequences(train_scaled, seq_length, 1)
    X_train = X_train.reshape((X_train.shape[0], seq_length, 1))

    # Definir y compilar modelo
    model = Sequential()
    model.add(LSTM(20, return_sequences=True, input_shape=(seq_length, 1)))
    model.add(Dropout(0.2))
    model.add(LSTM(20, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(test_size))
    model.compile(optimizer='adam', loss='huber')

    early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    start_time = time.time()

    # Entrenar modelo
    model.fit(X_train, y_train,
              epochs=50,
              batch_size=16,
              verbose=0,
              validation_split=0.1,
              callbacks=[early_stop])

    end_time = time.time()

    # Crear la secuencia de entrada para predecir target_day
    last_seq_raw = data.iloc[last_seq_start:target_day].values  # días t-5 a t-1
    last_seq_scaled = scaler.transform(last_seq_raw)  # escalar antes de pasar al modelo
    last_seq = last_seq_scaled.reshape(1, seq_length, 1)

    prediction_scaled = model.predict(last_seq, verbose=0)
    prediction_inv = scaler.inverse_transform(prediction_scaled)

    # Obtener el valor real del día predicho
    y_real = data.iloc[[target_day]].values.reshape(1, -1)

    # Guardar resultados
    all_predictions.append(prediction_inv.flatten()[0])
    all_actuals.append(y_real.flatten()[0])
    all_dates.append(data.index[target_day])
    iteration_times.append(end_time - start_time)

    # Información de los pasos realizados
    print(f"Ventana {target_day - train_size + 1} - Día predicho: {data.index[target_day].date()}")



In [None]:
########################################
#### Crear DataFrame con Resultados ####
########################################

all_predictions_flat = np.array(all_predictions).flatten()
all_actuals_flat = np.array(all_actuals).flatten()

results_df = pd.DataFrame({
    'Date': all_dates,
    'Predicted': all_predictions,
    'Actual': all_actuals
})
results_df.set_index('Date', inplace=True)
results_df.to_csv('LSTM_expanding.csv', index=True, sep=",", encoding='utf-8')

In [None]:
#########################################
#### Graficar Predicciones vs Reales ####
#########################################
plt.figure(figsize=(14,6))
plt.plot(results_df.index, results_df['Actual'], label='Actual', color='blue')
plt.plot(results_df.index, results_df['Predicted'], label='Predicted', color='orange')
plt.title("Predicciones One-Step-Ahead (Expanding Window) vs Valores Reales")
plt.xlabel("Fecha")
plt.ylabel("Precio de Cierre")
plt.legend()
plt.grid(True)
plt.show()


In [None]:
####################################################
#### Estadísticas Básicas y Gráficos de Tiempos ####
####################################################
print(f"\nTiempo promedio por iteración: {np.mean(iteration_times):.2f} segundos")
print(f"Tiempo total: {np.sum(iteration_times):.2f} segundos")

plt.figure(figsize=(12, 4))
plt.plot(results_df.index,iteration_times, label='Tiempo por iteración (s)', color='green')
plt.xlabel("Fecha")
plt.ylabel("Segundos")
plt.title("Tiempo de ejecución por ventana")
plt.grid(True)
plt.legend()
plt.savefig('/content/Execution_Time_LSTM.pdf', dpi=300)
plt.show()


In [None]:
##################################################
#### Graficar Serie Original con Predicciones ####
##################################################
original_slice = data.loc[results_df.index.min():results_df.index.max()]
pre_days = 90
start_idx = data.index.get_loc(results_df.index.min())
start_idx_with_buffer = max(0, start_idx - pre_days)
extended_slice = data.iloc[start_idx_with_buffer:data.index.get_loc(results_df.index.max()) + 1]

plt.figure(figsize=(14,6))
plt.plot(extended_slice.index, extended_slice['Close'], label='Serie Original', color='black', alpha=0.5, linestyle='--')
plt.plot(results_df.index, results_df['Actual'], label='Real (Observado)', color='blue')
plt.plot(results_df.index, results_df['Predicted'], label='Predicción', color='orange')
plt.title("Predicciones a un Paso (Expanding Window)")
plt.xlabel("Fecha")
plt.ylabel("Precio de Cierre")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig('/content/Forecast_LSTM.pdf', dpi=300)
plt.show()
