In [1]:
"""
fuzzy_risk_complete_code.py

Script complet pour :
- définir un contrôleur flou (Mamdani) selon la spécification
- générer 2000 échantillons, calculer le risque défuzzifié
- entraîner deux modèles ML (LinearRegression, RandomForest)
- comparer les performances, sauvegarder dataset et modèles

Dépendances : numpy, pandas, scikit-learn, matplotlib, joblib
Exécution : python fuzzy_risk_complete_code.py
"""

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import joblib
import os

# ------------------------
# Fonctions utilitaires
# ------------------------

def tri(x, a, b, c):
    """Fonction d'appartenance triangulaire vectorisée."""
    x = np.array(x, dtype=float)
    y = np.zeros_like(x)
    # montée
    left = (x >= a) & (x <= b)
    if b != a:
        y[left] = (x[left] - a) / (b - a)
    else:
        y[left] = 1.0
    # descente
    right = (x >= b) & (x <= c)
    if c != b:
        y[right] = (c - x[right]) / (c - b)
    else:
        y[right] = 1.0
    y = np.clip(y, 0, 1)
    return y

# ------------------------
# Définition des MFs
# ------------------------

def fuzzify_temperature(T):
    return {
        'basse': tri(T, 0, 0, 40),
        'normale': tri(T, 30, 50, 70),
        'elevee': tri(T, 60, 100, 100)
    }

def fuzzify_vibration(V):
    return {
        'faible': tri(V, 0, 0, 4),
        'moyenne': tri(V, 2, 5, 8),
        'forte': tri(V, 6, 10, 10)
    }

def fuzzify_age(A):
    return {
        'neuf': tri(A, 0, 0, 7),
        'moyen': tri(A, 5, 10, 15),
        'ancien': tri(A, 12, 20, 20)
    }

# Domaine de sortie (risque)
risk_x = np.linspace(0, 10, 401)
risk_fuzzy = {
    'faible': tri(risk_x, 0, 0, 4),
    'moyen': tri(risk_x, 2, 5, 8),
    'eleve': tri(risk_x, 6, 10, 10)
}

# ------------------------
# Règles et inférence
# ------------------------

def infer_risk(T, V, A):
    """Infère le risque (valeur défuzzifiée) pour une triple (T,V,A).
    Méthode: Mamdani (min pour AND, max pour OR), agrégation par max, centre de gravité.
    """
    t_m = fuzzify_temperature(T)
    v_m = fuzzify_vibration(V)
    a_m = fuzzify_age(A)

    # degrés d'activation
    r1 = max(t_m['elevee'], v_m['forte'])                        # OR
    r2 = min(a_m['ancien'], v_m['moyenne'])                      # AND
    r3 = min(t_m['basse'], v_m['faible'], a_m['neuf'])           # AND
    r4 = min(t_m['normale'], a_m['moyen'])                       # AND

    # agrégation (clipping)
    agg = np.zeros_like(risk_x)
    if r1 > 0:
        agg = np.maximum(agg, np.minimum(r1, risk_fuzzy['eleve']))
    if r2 > 0:
        agg = np.maximum(agg, np.minimum(r2, risk_fuzzy['moyen']))
    if r3 > 0:
        agg = np.maximum(agg, np.minimum(r3, risk_fuzzy['faible']))
    if r4 > 0:
        agg = np.maximum(agg, np.minimum(r4, risk_fuzzy['moyen']))

    # défuzzification (centroid)
    if agg.sum() == 0:
        return 0.0
    centroid = (risk_x * agg).sum() / agg.sum()
    return float(centroid)

# ------------------------
# Génération du dataset
# ------------------------

def generate_dataset(N=2000, seed=42):
    np.random.seed(seed)
    temps = np.random.uniform(0, 100, N)
    vibs = np.random.uniform(0, 10, N)
    ages = np.random.uniform(0, 20, N)
    risks = np.array([infer_risk(t, v, a) for t, v, a in zip(temps, vibs, ages)])
    df = pd.DataFrame({'temperature': temps, 'vibration': vibs, 'age': ages, 'risk_fuzzy': risks})
    return df

# ------------------------
# Entraînement des modèles
# ------------------------

def train_models(df, test_size=0.2, random_state=1):
    X = df[['temperature', 'vibration', 'age']].values
    y = df['risk_fuzzy'].values
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

    lr = LinearRegression().fit(X_train, y_train)
    rf = RandomForestRegressor(n_estimators=200, random_state=random_state).fit(X_train, y_train)

    # prédictions
    y_pred_lr = lr.predict(X_test)
    y_pred_rf = rf.predict(X_test)

    def metrics(y_true, y_pred):
        return {
            'MSE': mean_squared_error(y_true, y_pred),
            'RMSE': np.sqrt(mean_squared_error(y_true, y_pred)),
            'R2': r2_score(y_true, y_pred)
        }

    metrics_lr = metrics(y_test, y_pred_lr)
    metrics_rf = metrics(y_test, y_pred_rf)

    # importances
    feat_imp = {
        'features': ['temperature', 'vibration', 'age'],
        'importance': rf.feature_importances_.tolist()
    }

    results = {
        'models': {
            'linear_regression': lr,
            'random_forest': rf
        },
        'metrics': {
            'linear_regression': metrics_lr,
            'random_forest': metrics_rf
        },
        'feature_importances': feat_imp,
        'X_test': X_test,
        'y_test': y_test,
        'y_pred_lr': y_pred_lr,
        'y_pred_rf': y_pred_rf
    }
    return results

# ------------------------
# Visualisations & sauvegarde
# ------------------------

def plot_true_vs_pred(y_true, y_pred, title="True vs Predicted", filename=None):
    plt.figure(figsize=(6,6))
    plt.scatter(y_true, y_pred, alpha=0.4)
    plt.plot([0,10],[0,10], linestyle='--')
    plt.xlabel("Risk (fuzzy, true)")
    plt.ylabel("Risk (predicted)")
    plt.title(title)
    plt.grid(True)
    if filename:
        plt.savefig(filename, bbox_inches='tight')
    else:
        plt.show()
    plt.close()

# ------------------------
# Main
# ------------------------

def main():
    out_dir = 'output'
    os.makedirs(out_dir, exist_ok=True)

    print("Génération du dataset (2000 échantillons)…")
    df = generate_dataset(N=2000, seed=42)
    csv_path = os.path.join(out_dir, 'fuzzy_dataset_2000.csv')
    df.to_csv(csv_path, index=False)
    print(f"Dataset sauvegardé -> {csv_path}")

    print("\nEntraînement des modèles ML…")
    results = train_models(df)

    # afficher les métriques
    print("\nMétriques :")
    for name, m in results['metrics'].items():
        print(f"- {name}: RMSE={m['RMSE']:.4f}, R2={m['R2']:.4f}")

    # importances
    print("\nImportances (Random Forest) :")
    for f, imp in zip(results['feature_importances']['features'], results['feature_importances']['importance']):
        print(f"  {f}: {imp:.4f}")

    # sauvegarder modèles
    lr_path = os.path.join(out_dir, 'linear_regression.joblib')
    rf_path = os.path.join(out_dir, 'random_forest.joblib')
    joblib.dump(results['models']['linear_regression'], lr_path)
    joblib.dump(results['models']['random_forest'], rf_path)
    print(f"\nModèles sauvegardés -> {lr_path}, {rf_path}")

    # sauvegarder performances
    perf_df = pd.DataFrame([
        {'model': 'LinearRegression', **results['metrics']['linear_regression']},
        {'model': 'RandomForest', **results['metrics']['random_forest']}
    ])
    perf_path = os.path.join(out_dir, 'model_performance.csv')
    perf_df.to_csv(perf_path, index=False)
    print(f"Performances sauvegardées -> {perf_path}")

    # exemples de comparaison
    examples = np.array([
        [85, 8.5, 15],
        [25, 1.0, 1],
        [45, 5.0, 10],
        [70, 3.0, 18],
        [55, 6.5, 3],
        [30, 2.0, 12]
    ])
    examples_df = pd.DataFrame(examples, columns=['temperature','vibration','age'])
    examples_df['risk_fuzzy'] = [infer_risk(*row) for row in examples]
    examples_df['risk_lr'] = results['models']['linear_regression'].predict(examples)
    examples_df['risk_rf'] = results['models']['random_forest'].predict(examples)
    examples_path = os.path.join(out_dir, 'examples_comparison.csv')
    examples_df.to_csv(examples_path, index=False)
    print(f"Exemples de comparaison sauvegardés -> {examples_path}")

    # graphiques
    plot_true_vs_pred(results['y_test'], results['y_pred_rf'], title='True vs Predicted (Random Forest)', filename=os.path.join(out_dir, 'true_vs_pred_rf.png'))
    print(f"Graphique True vs Pred sauvegardé -> {os.path.join(out_dir, 'true_vs_pred_rf.png')}")

    print('\nTerminé.')

if __name__ == '__main__':
    main()


Génération du dataset (2000 échantillons)…
Dataset sauvegardé -> output\fuzzy_dataset_2000.csv

Entraînement des modèles ML…

Métriques :
- linear_regression: RMSE=2.0269, R2=0.5042
- random_forest: RMSE=0.3909, R2=0.9816

Importances (Random Forest) :
  temperature: 0.4227
  vibration: 0.4362
  age: 0.1412

Modèles sauvegardés -> output\linear_regression.joblib, output\random_forest.joblib
Performances sauvegardées -> output\model_performance.csv
Exemples de comparaison sauvegardés -> output\examples_comparison.csv
Graphique True vs Pred sauvegardé -> output\true_vs_pred_rf.png

Terminé.
