# 03 - Modélisation

**Étapes:**
1. Charger et préparer les données
2. Split train/val/test (60/20/20)
3. Normaliser (StandardScaler)
4. Entraîner Random Forest
5. Évaluer

In [None]:
import sys
sys.path.append('..')

from src.paths import WATER_QUALITY_FILE, LANDSAT_FILE, TERRACLIMATE_FILE
from src.config import TARGETS, ALL_FEATURES
from src.data.load_data import load_all
from src.features import prepare_training, select_model_features
from src.models import split_data, normalize, train_models, evaluate, print_results, get_feature_importance
from src.visualization import plot_predictions, plot_importance

## 1. Charger et préparer les données

In [None]:
# Charger
_, _, _, df_raw = load_all(
    str(WATER_QUALITY_FILE),
    str(LANDSAT_FILE),
    str(TERRACLIMATE_FILE),
    features=ALL_FEATURES,
    fill_na=False
)

# Préparer (nettoyer + features + encoding)
df_train, medians = prepare_training(df_raw)

In [None]:
# Sélectionner X et y
X = select_model_features(df_train)
y = df_train[TARGETS]

feature_names = list(X.columns)
print(f"X: {X.shape}")
print(f"y: {y.shape}")
print(f"\nFeatures: {feature_names}")

## 2. Split train/val/test

In [None]:
X_train, X_val, X_test, y_train, y_val, y_test = split_data(X, y)

## 3. Normaliser

In [None]:
X_train_sc, X_val_sc, X_test_sc, scaler = normalize(X_train, X_val, X_test)

## 4. Entraîner

In [None]:
models = train_models(X_train_sc, y_train, n_estimators=100, max_depth=10)

## 5. Évaluer

In [None]:
# Validation
val_results = evaluate(models, X_val_sc, y_val)
print_results(val_results, "VALIDATION")

# Test
test_results = evaluate(models, X_test_sc, y_test)
print_results(test_results, "TEST")

In [None]:
# Graphiques prédictions vs réalité
plot_predictions(models, X_test_sc, y_test, test_results)

## 6. Importance des features

In [None]:
plot_importance(models, feature_names)

In [None]:
# Top 10 features par target
importances = get_feature_importance(models, feature_names)
for target, df in importances.items():
    print(f"\n{target}:")
    print(df.to_string(index=False))