# Projet : Système Intelligent de Sélection de Modèles via Meta-Learning

## 1. Objectif Général
Concevoir une architecture de meta-learning capable de sélectionner dynamiquement le modèle de classification optimal pour détecter l’occupation d’une salle à partir de mesures environnementales.

In [1]:
import pandas as pd
import numpy as np
import joblib
import os
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Configuration
DATA_DIR = '../data'
MODELS_DIR = '../models'
os.makedirs(MODELS_DIR, exist_ok=True)

## 2. Chargement des Données

In [2]:
def load_data(filename):
    path = os.path.join(DATA_DIR, filename)
    df = pd.read_csv(path)
    return df

print("Loading data...")
train_df = load_data('datatraining.txt')
val_df = load_data('datatest.txt')
test_df = load_data('datatest2.txt')

print(f"Train shape: {train_df.shape}")
print(f"Val shape: {val_df.shape}")
print(f"Test shape: {test_df.shape}")

Loading data...
Train shape: (8143, 7)
Val shape: (2665, 7)
Test shape: (9752, 7)


## 3. Prétraitement

In [3]:
feature_cols = ['Temperature', 'Humidity', 'Light', 'CO2', 'HumidityRatio']
target_col = 'Occupancy'

X_train = train_df[feature_cols]
y_train = train_df[target_col]

X_val = val_df[feature_cols]
y_val = val_df[target_col]

X_test = test_df[feature_cols]
y_test = test_df[target_col]

print("Preprocessing...")
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Save scaler
joblib.dump(scaler, os.path.join(MODELS_DIR, 'scaler.joblib'))

Preprocessing...


['../models/scaler.joblib']

## 4. Entraînement des Classifieurs de Base (Niveau 1)

In [4]:
print("Training Base Classifiers...")
models = {
    'DT': DecisionTreeClassifier(random_state=42),
    'RF': RandomForestClassifier(random_state=42),
    'SVM': SVC(probability=True, random_state=42),
    'NB': GaussianNB()
}

for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    joblib.dump(model, os.path.join(MODELS_DIR, f'{name}.joblib'))
    acc = model.score(X_val_scaled, y_val)
    print(f"{name} Validation Accuracy: {acc:.4f}")

Training Base Classifiers...
DT Validation Accuracy: 0.9069
RF Validation Accuracy: 0.9535
SVM Validation Accuracy: 0.9700
NB Validation Accuracy: 0.9775


## 5. Meta-Learning (Niveau 2)
Extraction des méta-features (Confiance Max et Margin) et entraînement du méta-modèle KNN.

In [5]:
def extract_meta_features(X, y, models):
    meta_features = []
    best_models = []
    
    # Get probabilities from all models
    probs = {}
    preds = {}
    for name, model in models.items():
        probs[name] = model.predict_proba(X)
        preds[name] = model.predict(X)
        
    n_samples = X.shape[0]
    model_names = list(models.keys())
    
    for i in range(n_samples):
        row_meta = []
        max_conf_correct = -1
        best_model_idx = -1
        
        for idx, name in enumerate(model_names):
            p = probs[name][i]
            # Meta-feature 1: Max Confidence
            conf_max = np.max(p)
            # Meta-feature 2: Margin
            margin = abs(p[0] - p[1])
            
            row_meta.extend([conf_max, margin])
            
            # Determine if this model is correct
            if y is not None:
                is_correct = (preds[name][i] == y.iloc[i])
                if is_correct:
                    if conf_max > max_conf_correct:
                        max_conf_correct = conf_max
                        best_model_idx = idx
        
        # Fallback
        if y is not None and best_model_idx == -1:
            max_conf = -1
            for idx, name in enumerate(model_names):
                conf = np.max(probs[name][i])
                if conf > max_conf:
                    max_conf = conf
                    best_model_idx = idx
                    
        meta_features.append(row_meta)
        if y is not None:
            best_models.append(best_model_idx)
            
    return np.array(meta_features), np.array(best_models) if y is not None else None

print("Extracting Meta-Features for Validation Set...")
X_meta_train, y_meta_train = extract_meta_features(X_val_scaled, y_val, models)

print("Training Meta-Model (KNN)...")
meta_model = KNeighborsClassifier(n_neighbors=5)
meta_model.fit(X_meta_train, y_meta_train)
joblib.dump(meta_model, os.path.join(MODELS_DIR, 'meta_model.joblib'))

Extracting Meta-Features for Validation Set...
Training Meta-Model (KNN)...


['../models/meta_model.joblib']

## 6. Évaluation Finale

In [6]:
print("Evaluating on Test Set...")
X_meta_test, _ = extract_meta_features(X_test_scaled, None, models)

# Predict which model to use
selected_model_indices = meta_model.predict(X_meta_test)

final_predictions = []
model_names = list(models.keys())

# Get predictions from all models on test set
test_preds_all = {}
for name, model in models.items():
    test_preds_all[name] = model.predict(X_test_scaled)

for i, model_idx in enumerate(selected_model_indices):
    model_name = model_names[model_idx]
    pred = test_preds_all[model_name][i]
    final_predictions.append(pred)

final_predictions = np.array(final_predictions)

# Metrics
print("\n--- Final Evaluation ---")
print(f"Accuracy: {accuracy_score(y_test, final_predictions):.4f}")
print(f"Precision: {precision_score(y_test, final_predictions):.4f}")
print(f"Recall: {recall_score(y_test, final_predictions):.4f}")
print(f"F1 Score: {f1_score(y_test, final_predictions):.4f}")
print("Confusion Matrix:")
print(confusion_matrix(y_test, final_predictions))

# Compare with individual models
print("\n--- Individual Models on Test Set ---")
for name, model in models.items():
    y_pred = model.predict(X_test_scaled)
    acc = accuracy_score(y_test, y_pred)
    print(f"{name}: {acc:.4f}")

Evaluating on Test Set...

--- Final Evaluation ---
Accuracy: 0.9537
Precision: 0.8570
Recall: 0.9356
F1 Score: 0.8945
Confusion Matrix:
[[7383  320]
 [ 132 1917]]

--- Individual Models on Test Set ---
DT: 0.9475
RF: 0.9722
SVM: 0.9531
NB: 0.9869
