## PARTE 1 ‚Äî Configuraci√≥n Inicial y Carga de Datos

In [None]:
# ==============================
# Librer√≠as
# ==============================
import pandas as pd
import numpy as np
import random
from IPython.display import display
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, Callback
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers

import matplotlib.pyplot as plt
import time

# ==============================
# 1Ô∏è‚É£ Fijar semilla
# ==============================
SEED = 42
np.random.seed(SEED)
random.seed(SEED)
tf.random.set_seed(SEED)

# ==============================
# 2Ô∏è‚É£ Cargar dataset y poner 'Date' como √≠ndice
# ==============================
data_path = "../../data/processed/final_data.csv.gz"
data = pd.read_csv(data_path, compression='gzip', parse_dates=['Date'], index_col='Date')

print("Columnas del dataset:", list(data.columns))
print(f"Dimensiones dataset original: {data.shape}")

# ==============================
# 3Ô∏è‚É£ Filtrar rango temporal
# ==============================
start_date = "2005-01-01"
end_date = "2025-10-31"
data_rnn = data.loc[start_date:end_date]

print(f"Dimensiones dataset filtrado 2005-2025: {data_rnn.shape}")

# ==============================
# 5Ô∏è‚É£ Visualizaci√≥n r√°pida
# ==============================
display(data_rnn)

## PARTE 2 ‚Äî Preprocesamiento y creaci√≥n de secuencias

In [None]:
# ==============================
# 1Ô∏è‚É£ Forward-fill / Backward-fill por si hay NaN
# ==============================
data_rnn = data_rnn.ffill().bfill()

# ==============================
# 2Ô∏è‚É£ Escalado con RobustScaler
# ==============================
scaler = RobustScaler()
data_scaled = pd.DataFrame(scaler.fit_transform(data_rnn), 
                           columns=data_rnn.columns, index=data_rnn.index)

# ==============================
# 3Ô∏è‚É£ Funci√≥n para crear secuencias
# ==============================
def create_sequences(data, target_cols=['BBVA.MC_Close', 'SAN.MC_Close'], 
                     lookback=5, horizon=1):
    X, y = [], []
    for i in range(lookback, len(data) - horizon + 1):
        X_seq = data.iloc[i - lookback:i].values
        y_seq = data.iloc[i:i + horizon][target_cols].values  # multi-step
        X.append(X_seq)
        y.append(y_seq)
    return np.array(X), np.array(y)

# ==============================
# 4Ô∏è‚É£ Crear secuencias single-step y multi-step
# ==============================
lookback = 5

# Single-step
X_1, y_1 = create_sequences(data_scaled, lookback=lookback, horizon=1)
# Multi-step (5 d√≠as adelante)
X_5, y_5 = create_sequences(data_scaled, lookback=lookback, horizon=5)

# ==============================
# 5Ô∏è‚É£ Informaci√≥n de shapes
# ==============================
print("Single-step prediction:")
print("X_1.shape:", X_1.shape)
print("y_1.shape:", y_1.shape)

print("\nMulti-step prediction (5 d√≠as):")
print("X_5.shape:", X_5.shape)
print("y_5.shape:", y_5.shape)

# ==============================
# 6Ô∏è‚É£ Fechas correspondientes
# ==============================
dates_all_1 = data_scaled.index[lookback : len(data_scaled) - 1 + 1]  # single-step
dates_all_5 = data_scaled.index[lookback : len(data_scaled) - 5 + 1]  # multi-step


## PARTE 3 ‚Äî Modelo LSTM y entrenamiento

In [None]:
# ==============================
# Callback para m√©tricas y tiempo
# ==============================
class TimeHistory(Callback):
    def on_train_begin(self, logs=None):
        self.times = []
    def on_epoch_begin(self, epoch, logs=None):
        self.epoch_time_start = time.time()
    def on_epoch_end(self, epoch, logs=None):
        elapsed = time.time() - self.epoch_time_start
        self.times.append(elapsed)
        print(f"Epoch {epoch+1} - tiempo: {elapsed:.2f}s - "
              f"loss: {logs['loss']:.6f} - val_loss: {logs['val_loss']:.6f} - "
              f"mae: {logs['mae']:.6f} - val_mae: {logs['val_mae']:.6f}")

# ==============================
# Funci√≥n para crear LSTM
# ==============================
def build_fixed_lstm(input_shape, output_size=2):
    model = Sequential()
    model.add(LSTM(256, input_shape=input_shape, return_sequences=True,
                   kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dropout(0.3))
    model.add(LSTM(128, return_sequences=False,
                   kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dropout(0.2))
    model.add(Dense(output_size))
    optimizer = Adam(learning_rate=0.001)
    model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])
    return model

# ==============================
# Preparar datos
# ==============================
X = X_1
y = y_1.reshape(y_1.shape[0], y_1.shape[2])  # (samples, 2)

X_train, X_val, y_train, y_val, dates_train, dates_val = train_test_split(
    X, y, dates_all_1, test_size=0.2, random_state=SEED
)

input_shape = (X_train.shape[1], X_train.shape[2])
output_size = y_train.shape[1]

# ==============================
# Entrenamiento
# ==============================
model = build_fixed_lstm(input_shape, output_size)
time_callback = TimeHistory()
early_stop = EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=True)

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=200,
    batch_size=32,
    callbacks=[early_stop, time_callback],
    verbose=0
)


### Parte 3.1 - Guardar modelo

### Parte 3.1 - Guardar modelo

In [None]:
import joblib

# ---- Guardar modelo ----
model.save("lstm_model_5days_90col.h5")  # o "lstm_model_5days_savedmodel" si prefieres SavedModel

# ---- Guardar scaler ----
joblib.dump(scaler, "robust_scaler_5days_90col.save")

## PARTE 4 ‚Äî Gr√°ficos de entrenamiento

In [None]:
# Loss (MSE)
plt.figure(figsize=(10,5))
plt.plot(history.history['loss'], label='Train MSE', color='blue')
plt.plot(history.history['val_loss'], label='Val MSE', color='orange')
plt.title('Evoluci√≥n MSE Loss')
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')
plt.ylim(0,1)
plt.legend()
plt.show()

# MAE
plt.figure(figsize=(10,5))
plt.plot(history.history['mae'], label='Train MAE', color='green')
plt.plot(history.history['val_mae'], label='Val MAE', color='red')
plt.title('Evoluci√≥n MAE')
plt.xlabel('Epoch')
plt.ylabel('MAE')
plt.ylim(0,1)
plt.legend()
plt.show()


## PARTE 5 ‚Äî Predicci√≥n y desescalado (RobustScaler)

In [None]:
# ==============================
# Predicci√≥n sobre todos los datos
# ==============================
y_pred_scaled = model.predict(X)

# ==============================
# Desescalado usando RobustScaler
# ==============================
# Rellenar columnas restantes para inverse_transform
def robust_inverse(scaled_values, scaler, total_features):
    padded = np.hstack([scaled_values, np.zeros((scaled_values.shape[0], total_features - scaled_values.shape[1]))])
    return scaler.inverse_transform(padded)[:, :scaled_values.shape[1]]

y_val_inv = robust_inverse(y_val, scaler, data_scaled.shape[1])
y_pred_inv = robust_inverse(y_pred_scaled, scaler, data_scaled.shape[1])


## PARTE 6 ‚Äî DataFrames de validaci√≥n y gr√°ficos

In [None]:
# ==============================
# 1Ô∏è‚É£ Crear DataFrames separados BBVA y SAN
# ==============================
df_val_bbva = pd.DataFrame({
    'date': dates_val,
    'y_real': y_val_inv[:,0],
    'y_pred': y_pred_inv[:,0]
})
df_val_san = pd.DataFrame({
    'date': dates_val,
    'y_real': y_val_inv[:,1],
    'y_pred': y_pred_inv[:,1]
})

# Columna mes para agrupamiento mensual
df_val_bbva['month'] = df_val_bbva['date'].dt.to_period('M')
df_val_san['month'] = df_val_san['date'].dt.to_period('M')

# ==============================
# üîπ Predicci√≥n multi-step 5 d√≠as noviembre 2025
# ==============================
last_X_5 = X_5[-1:]  # √∫ltima secuencia
y_pred_5 = model.predict(last_X_5).reshape(5,2)
future_preds_inv_5 = scaler.inverse_transform(
    np.hstack([y_pred_5, np.zeros((5, data_scaled.shape[1]-2))])
)[:, :2]

future_dates_5 = pd.bdate_range(start='2025-11-01', periods=5)
df_future_bbva_5 = pd.DataFrame({'date': future_dates_5, 'pred': future_preds_inv_5[:,0]})
df_future_san_5  = pd.DataFrame({'date': future_dates_5, 'pred': future_preds_inv_5[:,1]})

In [None]:
# ==============================
# 2Ô∏è‚É£ Gr√°ficos: Serie completa validaci√≥n
# ==============================
plt.figure(figsize=(12,5))
plt.plot(df_val_bbva['y_real'], label='BBVA Real', color='blue')
plt.plot(df_val_bbva['y_pred'], label='BBVA Predicci√≥n', color='red')
plt.title('BBVA: Comparaci√≥n Real vs Predicci√≥n (validaci√≥n)')
plt.xlabel('Muestras')
plt.ylabel('Valor (‚Ç¨)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

plt.figure(figsize=(12,5))
plt.plot(df_val_san['y_real'], label='SAN Real', color='green')
plt.plot(df_val_san['y_pred'], label='SAN Predicci√≥n', color='orange')
plt.title('SAN: Comparaci√≥n Real vs Predicci√≥n (validaci√≥n)')
plt.xlabel('Muestras')
plt.ylabel('Valor (‚Ç¨)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# ==============================
# üîπ Graficar Octubre + primeros 5 d√≠as de Noviembre
# ==============================
oct_start = pd.Timestamp('2025-10-01')
oct_end = pd.Timestamp('2025-10-31')
oct_days = pd.date_range(start=oct_start, end=oct_end, freq='B')

df_oct_bbva = df_val_bbva.set_index('date').reindex(oct_days).interpolate().reset_index()
df_oct_san  = df_val_san.set_index('date').reindex(oct_days).interpolate().reset_index()
df_oct_bbva.rename(columns={'index':'date'}, inplace=True)
df_oct_san.rename(columns={'index':'date'}, inplace=True)
dates_plot = df_oct_bbva['date'].dt.strftime('%d-%m')

# --- BBVA ---
plt.figure(figsize=(12,5))
plt.plot(dates_plot, df_oct_bbva['y_real'], label='BBVA Real', color='blue')
plt.plot(dates_plot, df_oct_bbva['y_pred'], label='BBVA Predicci√≥n Octubre', color='red')
plt.plot(df_future_bbva_5['date'].dt.strftime('%d-%m'), df_future_bbva_5['pred'],
         label='BBVA Predicci√≥n 5 d√≠as Nov', color='purple', marker='x', linestyle='--')
plt.title('BBVA Octubre 2025 + primeros 5 d√≠as Noviembre')
plt.xlabel('D√≠a-Mes')
plt.ylabel('Valor (‚Ç¨)')
plt.xticks(rotation=45)
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()

# --- SAN ---
plt.figure(figsize=(12,5))
plt.plot(dates_plot, df_oct_san['y_real'], label='SAN Real', color='green')
plt.plot(dates_plot, df_oct_san['y_pred'], label='SAN Predicci√≥n Octubre', color='orange')
plt.plot(df_future_san_5['date'].dt.strftime('%d-%m'), df_future_san_5['pred'],
         label='SAN Predicci√≥n 5 d√≠as Nov', color='purple', marker='x', linestyle='--')
plt.title('SAN Octubre 2025 + primeros 5 d√≠as Noviembre')
plt.xlabel('D√≠a-Mes')
plt.ylabel('Valor (‚Ç¨)')
plt.xticks(rotation=45)
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
# ==============================
# 4Ô∏è‚É£ Gr√°ficos Mensuales 2005-2025
# ==============================
df_bbva_monthly = df_val_bbva.copy()
df_san_monthly = df_val_san.copy()

df_bbva_monthly['month'] = df_bbva_monthly['date'].dt.to_period('M')
df_san_monthly['month'] = df_san_monthly['date'].dt.to_period('M')

df_bbva_monthly_grouped = df_bbva_monthly.groupby('month', as_index=False).mean(numeric_only=True)
df_san_monthly_grouped = df_san_monthly.groupby('month', as_index=False).mean(numeric_only=True)

# --- Gr√°fico BBVA Mensual ---
plt.figure(figsize=(15,5))
plt.plot(df_bbva_monthly_grouped['month'].dt.to_timestamp(), df_bbva_monthly_grouped['y_real'], label='BBVA Real', color='blue')
plt.plot(df_bbva_monthly_grouped['month'].dt.to_timestamp(), df_bbva_monthly_grouped['y_pred'], label='BBVA Predicci√≥n', color='red')
plt.title('BBVA: Mensual 2005-2025')
plt.xlabel('Fecha')
plt.ylabel('Valor (‚Ç¨)')
plt.xticks(rotation=45)
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()

# --- Gr√°fico SAN Mensual ---
plt.figure(figsize=(15,5))
plt.plot(df_san_monthly_grouped['month'].dt.to_timestamp(), df_san_monthly_grouped['y_real'], label='SAN Real', color='green')
plt.plot(df_san_monthly_grouped['month'].dt.to_timestamp(), df_san_monthly_grouped['y_pred'], label='SAN Predicci√≥n', color='orange')
plt.title('SAN: Mensual 2005-2025')
plt.xlabel('Fecha')
plt.ylabel('Valor (‚Ç¨)')
plt.xticks(rotation=45)
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()