<a href="https://colab.research.google.com/github/be-next/TP-IA/blob/main/TP_IA_06.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# MLP avec SciKit-Learn

  - Utilisation d'un ``pipeline``.
  - Utilisation d'une ``grid``.

In [None]:
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split

In [None]:
url = "https://raw.githubusercontent.com/be-next/TP-IA/main/datasets/segmentation_test.csv"
D = pd.read_csv(url)

In [None]:
X = D[D.columns[1:]]
y = D['CLASS']

In [None]:
# one hot encoding
y_encoded = pd.get_dummies(y)

In [None]:
# construction des jeux de données train et test
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=1310, random_state=12345)

In [None]:
# déclaration du modèle à l'aide d'un pipeline
model_p1 = make_pipeline(
    StandardScaler(),
    MLPClassifier(
        hidden_layer_sizes=(10,),
        max_iter=300,
        random_state=42))

In [None]:
# entrainement du modèle
model_p1.fit(X_train, y_train)



In [None]:
# évaluation du modèle
model_p1.score(X_test, y_test)

0.9329113924050633

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

predictions = model_p1.predict(X_test)

# Evaluation du modèle
print(confusion_matrix(y_test, predictions))
print(classification_report(y_test, predictions))

[[109   0   0   0   0   0   1]
 [  1  93   2   0   1   0   6]
 [  0   2 102   0   0   0  10]
 [  0   0   0 115   1   0   0]
 [  0   0   0   0 115   0   0]
 [  0   0   0   0   0 112   0]
 [  0   7  22   0   0   0  91]]
              precision    recall  f1-score   support

   BRICKFACE       0.99      0.99      0.99       110
      CEMENT       0.91      0.90      0.91       103
     FOLIAGE       0.81      0.89      0.85       114
       GRASS       1.00      0.99      1.00       116
        PATH       0.98      1.00      0.99       115
         SKY       1.00      1.00      1.00       112
      WINDOW       0.84      0.76      0.80       120

    accuracy                           0.93       790
   macro avg       0.93      0.93      0.93       790
weighted avg       0.93      0.93      0.93       790



# Grid Search CV

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
model_p2 = make_pipeline(
    StandardScaler(),
    MLPClassifier(
        hidden_layer_sizes=(10,),
        max_iter=500,
        activation='relu',
        solver='lbfgs',
        random_state=42,
        learning_rate_init=0.01))

In [None]:
print(model_p2)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('mlpclassifier',
                 MLPClassifier(batch_size=20, hidden_layer_sizes=(10,),
                               learning_rate_init=0.01, max_iter=500,
                               random_state=42, solver='lbfgs'))])


In [None]:
params = {
    'mlpclassifier__hidden_layer_sizes' : [(10,), (10,10), (10,10,10), (50,), (50, 50), (50, 50, 50)],
    'mlpclassifier__solver' : ['lbfgs', 'sgd', 'adam']
}

In [None]:
grid = GridSearchCV(model_p2, param_grid=params, cv=4)

In [None]:
grid.fit(X_train, y_train)

In [None]:
# Affichage du meilleur paramètre et du score correspondant
print("Meilleur paramètre : ", grid.best_params_)
print("Meilleur score : ", grid.best_score_)

Meilleur paramètre :  {'mlpclassifier__hidden_layer_sizes': (10, 10), 'mlpclassifier__solver': 'adam'}
Meilleur score :  0.9595477176102036


In [None]:
predictions = grid.best_estimator_.predict(X_test)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

# Evaluation du modèle
print(confusion_matrix(y_test, predictions))
print(classification_report(y_test, predictions))

[[109   1   0   0   0   0   0]
 [  1  95   2   0   1   0   4]
 [  0   2 110   0   0   1   1]
 [  0   0   0 116   0   0   0]
 [  0   0   0   0 115   0   0]
 [  0   1   0   0   0 111   0]
 [  1   4  15   0   0   0 100]]
              precision    recall  f1-score   support

   BRICKFACE       0.98      0.99      0.99       110
      CEMENT       0.92      0.92      0.92       103
     FOLIAGE       0.87      0.96      0.91       114
       GRASS       1.00      1.00      1.00       116
        PATH       0.99      1.00      1.00       115
         SKY       0.99      0.99      0.99       112
      WINDOW       0.95      0.83      0.89       120

    accuracy                           0.96       790
   macro avg       0.96      0.96      0.96       790
weighted avg       0.96      0.96      0.96       790



Meilleure configuration trouvée avec les paramètres suivants :

        hidden_layer_sizes=(90,90),
        solver='sgd',  # Correction ici
        learning_rate_init=0.05,
        max_iter=500,