# Entrenamiento de Modelos — Cooperativa Nueva Esperanza

Guía para reentrenar los modelos con nuevos datos de la campaña.

**Requisitos**: 
- Datos históricos en `data/raw/` (CSV con columnas: `dias_ciclo, precip_30d, temp_max_prom, ndvi_60d, es_hibrido, prof_suelo_cm, ph_suelo, rendimiento_qq_ha`)
- Python 3.9+, scikit-learn

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, r2_score
import joblib
from pathlib import Path

In [None]:
# Cargar datos históricos (todos los CSV en data/raw/)
raw_dir = Path("data/raw/")
files = list(raw_dir.glob("*.csv"))

dfs = []
for f in files:
    df = pd.read_csv(f)
    # Filtrar solo la fila de 60 días
    df = df[df["dias_ciclo"] == 60].copy()
    dfs.append(df)

data = pd.concat(dfs, ignore_index=True)
print(f"Cargados {len(data)} lotes históricos")
data.head()

In [None]:
# Preparar variables
features = [
    "dias_ciclo", "precip_30d", "temp_max_prom", "ndvi_60d",
    "es_hibrido", "prof_suelo_cm", "ph_suelo"
]
X = data[features]
y = data["rendimiento_qq_ha"]

# Escalar
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Dividir
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=2025
)

In [None]:
# Entrenar modelo
model = Ridge(alpha=1.0)
model.fit(X_train, y_train)

# Evaluar
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"MAE: {mae:.2f} qq/ha")
print(f"R²: {r2:.2f}")

# Guardar
joblib.dump({
    "model": model,
    "scaler": scaler,
    "features": features,
    "metrics": {"mae": mae, "r2": r2}
}, "data/models/yield_predictor_v2.joblib")

print("✅ Modelo actualizado en data/models/")