In [1]:

# ============================================================================
# 03_TRAIN_MODELS.IPYNB
# Entrenamiento y Evaluación de Modelos ML para Agricultura Vertical
# ============================================================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.model_selection import cross_val_score, GridSearchCV
import xgboost as xgb
import lightgbm as lgb
import joblib
import json
import time
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Configuración de visualización
plt.style.use('default')
sns.set_palette("husl")

print("="*60)
print("ENTRENAMIENTO DE MODELOS - AGRICULTURA VERTICAL")
print("="*60)


ENTRENAMIENTO DE MODELOS - AGRICULTURA VERTICAL


1. CARGA DE DATOS FEATURES ENGINEERED

In [2]:
# ============================================================================
# 1. CARGA DE DATOS FEATURES ENGINEERED
# ============================================================================

print("\n1. CARGANDO DATOS CON FEATURES ENGINEERED...")

# Cargar datos con features finales
train_data = pd.read_csv('../data/processed/train_featured.csv')
val_data = pd.read_csv('../data/processed/validation_featured.csv')
test_data = pd.read_csv('../data/processed/test_featured.csv')

# Cargar metadata
with open('../data/processed/feature_engineering_metadata.json', 'r') as f:
    feature_metadata = json.load(f)

selected_features = feature_metadata['selected_features']

print(f"Train shape: {train_data.shape}")
print(f"Validation shape: {val_data.shape}")
print(f"Test shape: {test_data.shape}")
print(f"Features seleccionadas: {len(selected_features)}")

# Separar features y targets
X_train = train_data[selected_features]
X_val = val_data[selected_features]
X_test = test_data[selected_features]

y_train_eficiencia = train_data['eficiencia_fotosintetica_pct']
y_val_eficiencia = val_data['eficiencia_fotosintetica_pct']
y_test_eficiencia = test_data['eficiencia_fotosintetica_pct']

# Para fotoluminiscencia (incluye eficiencia como feature)
X_train_foto = train_data[selected_features + ['eficiencia_fotosintetica_pct']]
X_val_foto = val_data[selected_features + ['eficiencia_fotosintetica_pct']]
X_test_foto = test_data[selected_features + ['eficiencia_fotosintetica_pct']]

y_train_foto = train_data['fotoluminiscencia_intensidad']
y_val_foto = val_data['fotoluminiscencia_intensidad']
y_test_foto = test_data['fotoluminiscencia_intensidad']

print(f"\nDatos separados correctamente:")
print(f"X_train eficiencia: {X_train.shape}")
print(f"X_train fotoluminiscencia: {X_train_foto.shape}")



1. CARGANDO DATOS CON FEATURES ENGINEERED...
Train shape: (30000, 35)
Validation shape: (10000, 35)
Test shape: (10000, 35)
Features seleccionadas: 33

Datos separados correctamente:
X_train eficiencia: (30000, 33)
X_train fotoluminiscencia: (30000, 34)
