In [None]:
# --- Montar Google Drive ---
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# --- 1. Instalación e Importación de Librerías ---
!pip install yfinance optuna tensorflow scikit-learn matplotlib seaborn pandas numpy
!pip install optuna-integration[tfkeras]

import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import tensorflow as tf
tf.config.run_functions_eagerly(True)
tf.data.experimental.enable_debug_mode()

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from optuna.integration import TFKerasPruningCallback
import optuna
from google.colab import files
import joblib
import sys

sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (16, 5)
plt.rcParams['lines.linewidth'] = 2


In [None]:
# --- 2. Carga del Archivo de la PC (Excel o CSV) ---
#print("Por favor, sube tu archivo de datos (ej. 'limpio.xlsx' o 'limpio.csv') ahora.")
#uploaded = files.upload()
#file_name = list(uploaded.keys())[0]
file_path_in_drive = '/content/drive/MyDrive/MP_20251/Datos_MediaMovil.xlsx'
try:
    # ✅ Cargar el archivo sin especificar columna de fecha como índice
    df = pd.read_excel(file_path_in_drive)

    # ✅ Mostrar resumen del DataFrame
    print(f"\n✅ Archivo '{file_path_in_drive}' cargado exitosamente desde Google Drive.")
    print("\n📌 Primeras 5 filas del DataFrame:")
    print(df.head())
    print("\n📌 Últimas 5 filas del DataFrame:")
    print(df.tail())
    print("\n📌 Información del DataFrame:")
    df.info()

    # ✅ Verificar si hay valores NaN
    nan_counts = df.isna().sum()
    if nan_counts.sum() == 0:
        print("\n✅ No se encontraron valores faltantes (NaN) en el DataFrame.")
    else:
        print("\n⚠️ Valores faltantes detectados por columna:")
        print(nan_counts[nan_counts > 0])

except Exception as e:
    print(f"\n❌ Ocurrió un error al cargar el archivo: {e}")


In [None]:
# --- 4. Preparación de Características (Features) y Variable Objetivo (Target) ---

df['PM10_SA_IMP_hORA_Next'] = df['PM10_SA_IMP'].shift(-1) # Para 1 hora adelante. Cambia a .shift(-3) para 3 horas adelante.

#df['año'] = df.index.year
#df['mes'] = df.index.month
#df['dia_del_mes'] = df.index.day
#df['dia_de_la_semana'] = df.index.dayofweek
#df['hora'] = df.index.hour

features = [
    'PM10_SA_IMP', 'PM2_5_SA_IMP', 'PM10_SJL_IMP',
    'MA_PM10_SA_IMP_24h', 'MA_PM10_SA_IMP_6h',
    'MA_PM10_SJL_IMP_24h', 'MA_PM10_SJL_IMP_6h',
    'MA_PM2_5_SA_IMP_24h', 'MA_PM2_5_SA_IMP_6h'

]

missing_features = [f for f in features if f not in df.columns]
if missing_features:
    print(f"\nError: Las siguientes columnas de características no se encontraron en el archivo cargado: {missing_features}")
    print("Por favor, verifica que los nombres de las columnas en tu archivo coincidan con los esperados.")
    sys.exit("¡Error crítico: Faltan características necesarias en el DataFrame!")

X = df[features]
y = df['PM10_SA_IMP_hORA_Next']

combined_df = pd.concat([X, y], axis=1).dropna()
X_clean = combined_df[features].copy()
y_clean = combined_df['PM10_SA_IMP_hORA_Next'].copy()


print(f"\nDimensiones de X_clean después de la preparación final: {X_clean.shape}")
print(f"Dimensiones de y_clean después de la preparación final: {y_clean.shape}")


In [None]:

# --- 5. División de Datos (75% Train, 25% Test) ---
train_size = int(len(X_clean) * 0.75)
X_train_df, X_test_df = X_clean.iloc[:train_size], X_clean.iloc[train_size:]
y_train_df, y_test_df = y_clean.iloc[:train_size], y_clean.iloc[train_size:]

print(f"\nDimensiones de X_train_df: {X_train_df.shape}, y_train_df: {y_train_df.shape}")
print(f"Dimensiones de X_test_df: {X_test_df.shape}, y_test_df: {y_test_df.shape}")

# --- 6. Escalado de Datos (MinMaxScaler) ---
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_train_scaled = scaler_X.fit_transform(X_train_df)
X_test_scaled = scaler_X.transform(X_test_df)

y_train_scaled = scaler_y.fit_transform(y_train_df.values.reshape(-1, 1))
y_test_scaled = scaler_y.transform(y_test_df.values.reshape(-1, 1))

print("\nDatos escalados exitosamente.")


In [None]:
import optuna
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import mean_absolute_error
from tensorflow.keras import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense
from tensorflow.keras.optimizers import Adam

# --- Construcción del modelo LSTM parametrizable ---
def build_lstm_model(input_shape, n_layers, n_neurons, dropout, learning_rate):
    model = Sequential()
    model.add(LSTM(n_neurons, return_sequences=(n_layers > 1), input_shape=input_shape))
    model.add(Dropout(dropout))

    for i in range(1, n_layers):
        model.add(LSTM(n_neurons, return_sequences=(i < n_layers - 1)))
        model.add(Dropout(dropout))

    model.add(Dense(1))
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mean_absolute_error')
    return model

# --- Preparar datos en secuencias para LSTM ---
LOOKBACK_WINDOW = 10  # Últimas 30 horas como entrada

def create_lstm_sequences(X_data, y_data, lookback):
    X_seq, y_seq = [], []
    for i in range(len(X_data) - lookback):
        X_seq.append(X_data[i:(i + lookback), :])
        y_seq.append(y_data[i + lookback])
    return np.array(X_seq), np.array(y_seq)

X_train_seq, y_train_seq = create_lstm_sequences(X_train_scaled, y_train_scaled, LOOKBACK_WINDOW)
X_test_seq, y_test_seq = create_lstm_sequences(X_test_scaled, y_test_scaled, LOOKBACK_WINDOW)

# --- Objective Function para Optuna ---
def objective_lstm(trial):
    n_layers = trial.suggest_int('n_layers', 1, 2)
    n_neurons = trial.suggest_int('n_neurons_per_layer', 32, 128, step=32)
    dropout = trial.suggest_float('dropout_rate', 0.1, 0.4, step=0.1)
    learning_rate = trial.suggest_float('learning_rate', 5e-4, 5e-3, log=True)
    batch_size = trial.suggest_categorical('batch_size', [64, 128])
    epochs = 10

    model = build_lstm_model(
        input_shape=X_train_seq.shape[1:],
        n_layers=n_layers,
        n_neurons=n_neurons,
        dropout=dropout,
        learning_rate=learning_rate
    )

    early_stopping = EarlyStopping(patience=5, restore_best_weights=True)

    model.fit(X_train_seq, y_train_seq,
              validation_split=0.2,  # ✅ Validación automática
              epochs=epochs,
              batch_size=batch_size,
              verbose=0,
              callbacks=[early_stopping])

    y_pred_val = model.predict(X_train_seq[int(len(X_train_seq)*0.8):]).flatten()
    y_true_val = y_train_seq[int(len(y_train_seq)*0.8):].flatten()

    mae = mean_absolute_error(y_true_val, y_pred_val)
    return mae

# --- Ejecutar búsqueda con Optuna ---
study_lstm = optuna.create_study(direction='minimize')
study_lstm.optimize(objective_lstm, n_trials=7, timeout=3600)  # Puedes aumentar a 50+

# --- Mostrar mejores hiperparámetros ---
print("Mejores hiperparámetros encontrados:")
print(study_lstm.best_params)

In [None]:
print("\n✅ Mejores hiperparámetros encontrados:")
best_params_lstm = study_lstm.best_params
for key, value in best_params_lstm.items():
    print(f"{key}: {value}")


In [None]:
print("\n🚀 Entrenando el modelo LSTM final con los mejores hiperparámetros...")

final_model_lstm = Sequential()
for i in range(best_params_lstm['n_layers']):
    return_sequences = i < best_params_lstm['n_layers'] - 1
    if i == 0:
        final_model_lstm.add(LSTM(best_params_lstm['n_neurons_per_layer'],
                                  return_sequences=return_sequences,
                                  input_shape=X_train_seq.shape[1:]))
    else:
        final_model_lstm.add(LSTM(best_params_lstm['n_neurons_per_layer'],
                                  return_sequences=return_sequences))
    final_model_lstm.add(Dropout(best_params_lstm['dropout_rate']))

final_model_lstm.add(Dense(1))

final_model_lstm.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=best_params_lstm['learning_rate']),
                         loss='mae')

early_stop = EarlyStopping(patience=10, restore_best_weights=True)

history_final = final_model_lstm.fit(X_train_seq, y_train_seq,
                                     validation_data=(X_test_seq, y_test_seq),
                                     epochs=50,
                                     batch_size=best_params_lstm['batch_size'],
                                     callbacks=[early_stop],
                                     verbose=1)


In [None]:

# --- 10. Métricas de Evaluación ---
y_train_pred_scaled = final_model_lstm.predict(X_train_seq).flatten()
y_train_pred = scaler_y.inverse_transform(y_train_pred_scaled.reshape(-1, 1))
y_train_actual = scaler_y.inverse_transform(y_train_seq.reshape(-1, 1))

y_test_pred_scaled = final_model_lstm.predict(X_test_seq).flatten()
y_test_pred = scaler_y.inverse_transform(y_test_pred_scaled.reshape(-1, 1))
y_test_actual = scaler_y.inverse_transform(y_test_seq.reshape(-1, 1))

mae_train = mean_absolute_error(y_train_actual, y_train_pred)
mse_train = mean_squared_error(y_train_actual, y_train_pred)
rmse_train = np.sqrt(mse_train)
r2_train = r2_score(y_train_actual, y_train_pred)
mape_train = np.mean(np.abs((y_train_actual - y_train_pred) / y_train_actual)) * 100


print("\n--- Métricas de Evaluación en el Conjunto de Entrenamiento (Train) ---")
print(f"MAE (Mean Absolute Error): {mae_train:.4f}")
print(f"MSE (Mean Squared Error): {mse_train:.4f}")
print(f"RMSE (Root Mean Squared Error): {rmse_train:.4f}")
print(f"R2 Score: {r2_train:.4f}")
print(f"MAPE (Mean Absolute Percentage Error): {mape_train:.4f}%")


mae_test = mean_absolute_error(y_test_actual, y_test_pred)
mse_test = mean_squared_error(y_test_actual, y_test_pred)
rmse_test = np.sqrt(mse_test)
r2_test = r2_score(y_test_actual, y_test_pred)
mape_test = np.mean(np.abs((y_test_actual - y_test_pred) / y_test_actual)) * 100

print("\n--- Métricas de Evaluación en el Conjunto de Prueba (Test) ---")
print(f"MAE (Mean Absolute Error): {mae_test:.4f}")
print(f"MSE (Mean Squared Error): {mse_test:.4f}")
print(f"RMSE (Root Mean Squared Error): {rmse_test:.4f}")
print(f"R2 Score: {r2_test:.4f}")
print(f"MAPE (Mean Absolute Percentage Error): {mape_test:.4f}%")



In [None]:
from tensorflow.keras.models import load_model
import joblib
from google.colab import files
import os

# --- Guardar el modelo entrenado ---
final_model_lstm.save('lstm2_PM10_model.h5')
print("✅ Modelo LSTM guardado como 'lstm2_PM10_model.h5'.")

# --- Guardar escaladores ---
joblib.dump(scaler_X, 'scaler_X_lstm2.joblib')
joblib.dump(scaler_y, 'scaler_y_lstm2.joblib')
print("✅ Scalers guardados como 'scaler_X_lstm2.joblib' y 'scaler_y_lstm2.joblib'.")

In [None]:
# --- Descargar archivos generados ---
files.download('lstm2_PM10_model.h5')
files.download('scaler_X_lstm2.joblib')
files.download('scaler_y_lstm2.joblib')

# Si generaste un gráfico de predicción y lo guardaste como imagen:
# files.download('predicciones_LSTM2.png')

print("✅ Archivos de modelo y scalers descargados correctamente.")
