In [5]:
# File: models/train_models.py

import pandas as pd
import numpy as np
import os
import json
from datetime import datetime, timezone   # UPDATED: timezone-aware datetime
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
import joblib

# ---------------------------
# Paths
# ---------------------------
RAW_PROCESSED = 'data/processed'
MODELS_DIR = 'models'
os.makedirs(MODELS_DIR, exist_ok=True)

# ---------------------------
# Configuration
# ---------------------------
CROPS = ['Jowar', 'Paddy', 'Maize', 'Cotton']

FEATURES = [
    'fertilizer_kg_ha',
    'irrigation_m3_ha',
    'total_precip_mm',
    'avg_temp_max_C',
    'total_sunshine_h'
]

TARGET = 'yield_kg_ha'

# ---------------------------
# Training Loop
# ---------------------------
for crop in CROPS:

    print(f"\n==============================")
    print(f"Training Model for: {crop}")
    print("==============================")

    csv_path = os.path.join(RAW_PROCESSED, f"{crop.lower()}_model_data.csv")

    # Check file exists
    if not os.path.exists(csv_path):
        print(f"‚ùå Skipping {crop}: {csv_path} not found")
        continue

    # Load crop dataset
    df = pd.read_csv(csv_path)

    # Check enough samples exist
    if len(df) < 10:
        print(f"‚ö†Ô∏è Skipping {crop}: Only {len(df)} samples available.")
        continue

    # Prepare features
    X = df[FEATURES].values
    y = df[TARGET].values

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

    # ---------------------------
    # Model Training
    # ---------------------------
    model = RandomForestRegressor(
        n_estimators=100,
        max_depth=5,
        random_state=42,
        n_jobs=-1
    )

    model.fit(X_train, y_train)

    preds = model.predict(X_test)
    mae = mean_absolute_error(y_test, preds)

    print(f"‚úÖ {crop} Model Trained | MAE = {mae:.2f} kg/ha")

    # ---------------------------
    # Save the Model
    # ---------------------------
    model_path = os.path.join(MODELS_DIR, f"{crop.lower()}_model.joblib")
    joblib.dump(model, model_path)
    print(f"üíæ Model saved to: {model_path}")

    # ---------------------------
    # Save Metadata
    # ---------------------------
    metadata = {
        "features": FEATURES,
        "units": {
            "fertilizer_kg_ha": "kg/ha",
            "irrigation_m3_ha": "m3/ha",
            "total_precip_mm": "mm",
            "avg_temp_max_C": "degC",
            "total_sunshine_h": "hours"
        },
        "trained_at": datetime.now(timezone.utc).isoformat(),   # FIXED UTC format
        "mae_validation": float(mae)
    }

    meta_file = model_path + '.meta.json'

    with open(meta_file, 'w') as f:
        json.dump(metadata, f, indent=4)

    print(f"üìÑ Metadata saved to: {meta_file}")

print("\n==============================")
print("üéâ All crop models trained and saved successfully!")
print("==============================")



Training Model for: Jowar
‚úÖ Jowar Model Trained | MAE = 307.53 kg/ha
üíæ Model saved to: models\jowar_model.joblib
üìÑ Metadata saved to: models\jowar_model.joblib.meta.json

Training Model for: Paddy
‚úÖ Paddy Model Trained | MAE = 549.45 kg/ha
üíæ Model saved to: models\paddy_model.joblib
üìÑ Metadata saved to: models\paddy_model.joblib.meta.json

Training Model for: Maize
‚úÖ Maize Model Trained | MAE = 478.35 kg/ha
üíæ Model saved to: models\maize_model.joblib
üìÑ Metadata saved to: models\maize_model.joblib.meta.json

Training Model for: Cotton
‚úÖ Cotton Model Trained | MAE = 255.30 kg/ha
üíæ Model saved to: models\cotton_model.joblib
üìÑ Metadata saved to: models\cotton_model.joblib.meta.json

üéâ All crop models trained and saved successfully!
