# 03 Forecast Train - Entrenamiento del Modelo Predictivo

Este notebook entrena el modelo LSTM para predecir el estado futuro del Fitotron.
1. Carga datos procesados.
2. Genera secuencias temporales (X, y) usando `FeatureBuilder`.
3. Entrena el modelo `ForecasterLSTM`.
4. Evalua el rendimiento y guarda el modelo.

**Entradas:** `data/processed/processed_*.csv`
**Salidas:** `models/forecaster_lstm.keras`, `models/stats_forecast.json`

In [1]:
import os
import sys
import glob
import json
import pandas as pd
import numpy as np

# Configurar rutas relativas
current_dir = os.getcwd()
repo_root = os.path.abspath(os.path.join(current_dir, "..", ".."))
if repo_root not in sys.path:
    sys.path.insert(0, repo_root)

from fitotron_ai.features.feature_builder import FeatureBuilder
from fitotron_ai.models.forecaster_lstm import ForecasterLSTM

# Directorios
PROCESSED_DIR = os.path.join(repo_root, "data", "processed")
MODELS_DIR = os.path.join(repo_root, "fitotron_ai", "models")
RESULTS_DIR = os.path.join(repo_root, "results")
os.makedirs(MODELS_DIR, exist_ok=True)
os.makedirs(RESULTS_DIR, exist_ok=True)

print(f"Modelos: {MODELS_DIR}")

Modelos: C:\Users\willy\Documents\GitHub\fitotron_ai_control_system\fitotron_ai\models


In [2]:
# 1. Cargar Datos Procesados
processed_files = glob.glob(os.path.join(PROCESSED_DIR, "processed_*.csv"))
processed_files.sort(key=os.path.getmtime, reverse=True)

target_file = processed_files[0] if processed_files else None
df = None

if target_file:
    print(f"Cargando: {target_file}")
    df = pd.read_csv(target_file)
    if "timestamp" in df.columns:
        df["datetime"] = pd.to_datetime(df["timestamp"], unit="s")
        df.set_index("datetime", inplace=True)
else:
    print("No hay datos procesados.")

No hay datos procesados.


In [3]:
# 2. Construir Features (Secuencias)
if df is not None:
    # Configuracion
    WINDOW_SIZE = 60  # 1 minuto historia
    HORIZON = 60      # 1 minuto futuro
    VARIABLES = ["temp", "rh", "co2", "soil", "ph", "ec", "par"]
    
    # Filtrar variables disponibles
    available_vars = [v for v in VARIABLES if v in df.columns]
    print(f"Variables usadas: {available_vars}")
    
    builder = FeatureBuilder(
        window_size=WINDOW_SIZE,
        horizon=HORIZON,
        variables=available_vars,
        db_path="" # No usado cuando pasamos df directo
    )
    
    # Inyectar datos manualmente al builder
    # (FeatureBuilder espera cargar el mismo, pero usamos make_sequences con df)
    builder.data = df[available_vars].interpolate().dropna()
    
    try:
        X, y = builder.make_sequences(builder.data)
        X_norm, y_norm = builder.normalize(X, y, fit=True)
        
        print(f"Secuencias generadas: X={X_norm.shape}, y={y_norm.shape}")
        
        # Guardar stats de normalizacion
        stats_path = os.path.join(MODELS_DIR, "stats_forecast.json")
        # Convertir numpy a lista para JSON
        serializable_stats = {}
        for k, v in builder.norm_stats.items():
            if isinstance(v, np.ndarray):
                serializable_stats[k] = v.tolist()
            else:
                serializable_stats[k] = v
        
        with open(stats_path, "w") as f:
            json.dump(serializable_stats, f, indent=4)
        print(f"Stats guardados en: {stats_path}")
        
    except ValueError as e:
        print(f"Error generando secuencias: {e}")
        X_norm, y_norm = None, None

In [4]:
# 3. Entrenar Modelo
if 'X_norm' in locals() and X_norm is not None and len(X_norm) > 100:
    # Split Train/Test (80/20)
    split_idx = int(len(X_norm) * 0.8)
    X_train, X_test = X_norm[:split_idx], X_norm[split_idx:]
    y_train, y_test = y_norm[:split_idx], y_norm[split_idx:]
    
    forecaster = ForecasterLSTM(
        input_shape=(X_train.shape[1], X_train.shape[2]),
        output_dim=y_train.shape[1],
        hidden_units=64,
        dropout=0.2
    )
    
    try:
        forecaster.build_model()
        history = forecaster.train(
            X_train, y_train,
            X_val=X_test, y_val=y_test,
            epochs=20,
            batch_size=32,
            patience=5
        )
        
        # Guardar modelo
        model_path = os.path.join(MODELS_DIR, "forecaster_lstm.keras")
        forecaster.save(model_path)
        print(f"Modelo entrenado guardado en: {model_path}")
        
        # Guardar metricas CSV
        metrics_df = pd.DataFrame(history.history)
        metrics_path = os.path.join(RESULTS_DIR, "forecast_metrics.csv")
        metrics_df.to_csv(metrics_path, index=False)
        print(f"Metricas guardadas en: {metrics_path}")
        
    except Exception as e:
        print(f"Error en entrenamiento: {e}")
else:
    print("Datos insuficientes para entrenar.")

Datos insuficientes para entrenar.
