In [None]:
import numpy as np
np.set_printoptions(threshold=10000,suppress=True)
from IPython.display import display
import pandas as pd
import warnings
import matplotlib
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, confusion_matrix
warnings.filterwarnings('ignore')

# 1. Chargement de la base de données `Iris.txt`

In [None]:
irisDataframe = pd.read_csv("iris.txt", sep="\t", header=None)
display(irisDataframe)
X = irisDataframe.iloc[:, :4]
print(X)
y = irisDataframe.iloc[:, 4:5]
print(y)

# 2. Découpage de la base en Apprentissage/Test

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# 3. Implémentation d’un Perceptron Multi-classe

In [8]:
class PerceptronMultiClasse():

  def __init__(self, X, y, step=0.1, max_iter=1000):
    print("Setup Multi-class Perceptron")
    
    self.X = X
    self.y = y
    self.classes = np.unique(y)
    self.step = step
    self.max_iter = max_iter
    self.W = np.random.rand(len(self.classes), len(self.X.axes[1]))

    print(f"Input : \n{self.X}")
    print(f"Classes : {self.classes}")
    
    print(f"Initial weights : {self.W} \n")

  def train(self):
    print(f"Start training")
    print(f"y : {self.y}")

    stable = False
    currentStep = 0
    while not stable and currentStep < self.max_iter:
      stable = True
      Cj = 0
      for i in range(len(self.X)):
        # print(f"i : {i} \n")
        x = self.X.values[i]
        # print(f"x : {x} \n")
        Ci = (self.y.values - 1)[i]
        y_training = []
        for k in range(len(self.classes)):
          y_training.append(np.dot(self.W[k], x))
        Cj = np.argmax(y_training)
        # print(f"Ci : {Ci} - Cj : {Cj}")
        if Ci != Cj:
          self.W[Ci] = self.W[Ci] + (self.step) * x
          self.W[Cj] = self.W[Cj] - (self.step) * x
          stable = False
      currentStep+=1

    print(f"Trained weigths : {self.W}")

  def prediction(self, x):
    # print(x)
    y_pred = []
    
    for i in range(len(self.classes)):
      y_pred.append(np.dot(self.W[i], x))
      
    # print(y_pred)
    return np.argmax(y_pred) + 1

  def evaluate(self, X_test, y_test):
    print(f"Start evaluation")

    print(f"X test : {np.shape(X_test)}")
    
    y_pred = pd.concat([pd.DataFrame([self.prediction(X_test.values[i])]) for i in range(len(X_test.axes[0]))], ignore_index=True)
    result_conf_matrix = confusion_matrix(y_test, y_pred)
    score = f1_score(y_test, y_pred, average=None)
    precision = precision_score(y_test, y_pred, average=None)
    recall = recall_score(y_test, y_pred, average=None)

    print(f"y pred : {np.shape(y_pred)}, {np.unique(y_pred)}")
    print(f"y test : {np.shape(y_test)}, {np.unique(y_test)}")
    print(result_conf_matrix)

    print(f"F1 score : {score}")
    print(f"Precision : {precision}")
    print(f"Rappel : {recall}")


# 4. Évaluation des performances du modèle

In [9]:
multi_classe = PerceptronMultiClasse(X_train, y_train, 0.1)
multi_classe.train()
multi_classe.evaluate(X_test, y_test)

Setup Multi-class Perceptron
Input : 
       0    1    2    3
96   5.7  2.9  4.2  1.3
105  7.6  3.0  6.6  2.1
66   5.6  3.0  4.5  1.5
0    5.1  3.5  1.4  0.2
122  7.7  2.8  6.7  2.0
..   ...  ...  ...  ...
71   6.1  2.8  4.0  1.3
106  4.9  2.5  4.5  1.7
14   5.8  4.0  1.2  0.2
92   5.8  2.6  4.0  1.2
102  7.1  3.0  5.9  2.1

[100 rows x 4 columns]
Classes : [1 2 3]
Initial weights : [[0.49195772 0.00134452 0.6121464  0.35942495]
 [0.78077341 0.43711902 0.96833044 0.9039306 ]
 [0.26468469 0.85151942 0.06652709 0.51756701]] 

Start training
y :      4
96   2
105  3
66   2
0    1
122  3
..  ..
71   2
106  3
14   1
92   2
102  3

[100 rows x 1 columns]
Trained weigths : [[  8.64195772  14.75134452 -16.8578536   -8.04057505]
 [  6.60077341  10.24711902  -7.56166956 -14.0760694 ]
 [-13.70531531 -23.70848058  26.06652709  23.89756701]]
Start evaluation
X test : (50, 4)
y pred : (50, 1), [1 2 3]
y test : (50, 1), [1 2 3]
[[19  0  0]
 [ 0 14  1]
 [ 0  0 16]]
F1 score : [1.         0.96551724 0.

# 5. Implémentation d’un Percepron Multi-couches (MLP)

In [29]:
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler

def evaluate(y_pred, y_test):
    print(f"Start evaluation")

    result_conf_matrix = confusion_matrix(y_test, y_pred)
    score = f1_score(y_test, y_pred, average=None)
    precision = precision_score(y_test, y_pred, average=None)
    recall = recall_score(y_test, y_pred, average=None)

    print(f"General Accuracy {accuracy_score(y_true=y_test, y_pred=y_pred)*100}% ")

    print(f"y pred : {np.shape(y_pred)}, {np.unique(y_pred)}")
    print(f"y test : {np.shape(y_test)}, {np.unique(y_test)}")
    print(result_conf_matrix)

    print(f"F1 score : {score}")
    print(f"Precision : {precision}")
    print(f"Rappel : {recall}")

def analyse(filename="iris.txt") :
  dataframe = pd.read_csv(f"./{filename}", delim_whitespace=True, header=None)

  X = dataframe.iloc[:,:-1].values
  Y = dataframe.iloc[:, -1].values

  nbClasses = len(np.unique(Y))
  input_size = len(X[0])
  X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 1/3, random_state = 1)
  scaler = StandardScaler()
  scaler.fit(X_train)
  X_train_norm = scaler.transform(X_train)
  X_test_norm = scaler.transform(X_test)

  print(f"\n\n{filename} MLPClassifier")
  MLP = MLPClassifier(random_state=1)
  MLP.fit(X_train, Y_train)
  y_MLP = MLP.predict(X_test)
  evaluate(y_MLP, Y_test)
  print(f"\n\n{filename} MLPClassifier Normalized")
  MLP = MLPClassifier(random_state=1)
  MLP.fit(X_train_norm, Y_train)
  y_MLP = MLP.predict(X_test_norm)
  evaluate(y_MLP, Y_test)

# 6. Évaluation des performances du modèle MLP

## → Établir les prédictions sur les données de la base de test T.

## → Écrire un programme pour calculer la matrice de confusion, l’Accuracy globale, la précision et le rappel pour chaque classe.

## → Les réseaux de neurones apprennent mieux si les données sont préalablement normalisées, c’est à dire si on a pris soin que la variance des valeurs soit la même pour tous les descripteurs. Il faut bien sûr veiller à ce que cette même normalisation soit appliquée aux données de test.

## → Une comparaison entre normalisation/sans normalisation serait la bienvenue !

In [30]:
analyse()



iris.txt MLPClassifier
Start evaluation
General Accuracy 92.0% 
y pred : (50,), [1 2 3]
y test : (50,), [1 2 3]
[[17  0  0]
 [ 0 15  4]
 [ 0  0 14]]
F1 score : [1.         0.88235294 0.875     ]
Precision : [1.         1.         0.77777778]
Rappel : [1.         0.78947368 1.        ]


iris.txt MLPClassifier Normalized
Start evaluation
General Accuracy 96.0% 
y pred : (50,), [1 2 3]
y test : (50,), [1 2 3]
[[17  0  0]
 [ 0 18  1]
 [ 0  1 13]]
F1 score : [1.         0.94736842 0.92857143]
Precision : [1.         0.94736842 0.92857143]
Rappel : [1.         0.94736842 0.92857143]


## → Ré-appliquer toute la chaine de traitement sur les bases : (glass.txt, breast-cancer-wisconsin.txt, Lsun.txt et Wave) disponibles sur Claroline.

## glass.txt

In [31]:
analyse("glass.txt")



glass.txt MLPClassifier
Start evaluation
General Accuracy 30.555555555555557% 
y pred : (72,), [2]
y test : (72,), [1 2 3 5 6 7]
[[ 0 28  0  0  0  0]
 [ 0 22  0  0  0  0]
 [ 0  7  0  0  0  0]
 [ 0  3  0  0  0  0]
 [ 0  2  0  0  0  0]
 [ 0 10  0  0  0  0]]
F1 score : [0.         0.46808511 0.         0.         0.         0.        ]
Precision : [0.         0.30555556 0.         0.         0.         0.        ]
Rappel : [0. 1. 0. 0. 0. 0.]


glass.txt MLPClassifier Normalized
Start evaluation
General Accuracy 72.22222222222221% 
y pred : (72,), [1 2 5 6 7]
y test : (72,), [1 2 3 5 6 7]
[[23  5  0  0  0  0]
 [ 2 18  0  1  1  0]
 [ 2  5  0  0  0  0]
 [ 0  2  0  1  0  0]
 [ 0  0  0  0  2  0]
 [ 1  1  0  0  0  8]]
F1 score : [0.82142857 0.67924528 0.         0.4        0.8        0.88888889]
Precision : [0.82142857 0.58064516 0.         0.5        0.66666667 1.        ]
Rappel : [0.82142857 0.81818182 0.         0.33333333 1.         0.8       ]


## breast-cancer-wisconsin.txt

In [32]:
analyse("breast-cancer-wisconsin.txt")



breast-cancer-wisconsin.txt MLPClassifier
Start evaluation
General Accuracy 96.13733905579399% 
y pred : (233,), [1 2]
y test : (233,), [1 2]
[[153   2]
 [  7  71]]
F1 score : [0.97142857 0.94039735]
Precision : [0.95625    0.97260274]
Rappel : [0.98709677 0.91025641]


breast-cancer-wisconsin.txt MLPClassifier Normalized
Start evaluation
General Accuracy 98.71244635193133% 
y pred : (233,), [1 2]
y test : (233,), [1 2]
[[153   2]
 [  1  77]]
F1 score : [0.99029126 0.98089172]
Precision : [0.99350649 0.97468354]
Rappel : [0.98709677 0.98717949]


## Lsun.txt

In [33]:
analyse("Lsun.txt")



Lsun.txt MLPClassifier
Start evaluation
General Accuracy 100.0% 
y pred : (134,), [1 2 3]
y test : (134,), [1 2 3]
[[57  0  0]
 [ 0 38  0]
 [ 0  0 39]]
F1 score : [1. 1. 1.]
Precision : [1. 1. 1.]
Rappel : [1. 1. 1.]


Lsun.txt MLPClassifier Normalized
Start evaluation
General Accuracy 100.0% 
y pred : (134,), [1 2 3]
y test : (134,), [1 2 3]
[[57  0  0]
 [ 0 38  0]
 [ 0  0 39]]
F1 score : [1. 1. 1.]
Precision : [1. 1. 1.]
Rappel : [1. 1. 1.]


## Wave.txt

In [34]:
analyse("Wave.txt")



Wave.txt MLPClassifier
Start evaluation
General Accuracy 83.0233953209358% 
y pred : (1667,), [0 1 2]
y test : (1667,), [0 1 2]
[[456  37  45]
 [ 61 463  34]
 [ 66  40 465]]
F1 score : [0.81355932 0.84335155 0.83408072]
Precision : [0.78216123 0.85740741 0.85477941]
Rappel : [0.84758364 0.8297491  0.81436077]


Wave.txt MLPClassifier Normalized
Start evaluation
General Accuracy 83.62327534493102% 
y pred : (1667,), [0 1 2]
y test : (1667,), [0 1 2]
[[449  46  43]
 [ 57 463  38]
 [ 57  32 482]]
F1 score : [0.81562216 0.84258417 0.85008818]
Precision : [0.79751332 0.85582255 0.85612789]
Rappel : [0.83457249 0.8297491  0.8441331 ]


# 7. Bagging de réseaux de neurones

In [142]:
class Bagging():
  def __init__(self, X : pd.DataFrame, Y : pd.DataFrame, nb_sample : int = 10, clasifiers = [MLPClassifier(random_state=1)], normalized=True):
    self.X = X
    self.Y = Y
    self.x_train, self.x_test, self.y_train, self.y_test = train_test_split(X, Y, test_size = 1/3, random_state = 1)
    self.X_samples = []
    self.Y_samples = []
    self.Y_predicts = []
    self.Y_aggregated_predicts = []

    self.X_train_norm = None
    self.X_test_norm = None
    self.normalized = normalized

    if normalized:
      scaler = StandardScaler()
      scaler.fit(self.x_train)
      self.X_train_norm = scaler.transform(self.x_train)
      self.X_test_norm = scaler.transform(self.x_test)

    for i in range(nb_sample):
      self.add_sample()
    self.clasifiers = clasifiers
    self.trained_clasifiers = []

  def add_sample(self):
    new_x_sample = []
    new_y_sample = []
    for i in range(len(self.x_train)):
      rand_index = np.random.randint(len(self.x_train))
      if self.normalized:
        new_x_sample.append(self.x_train.values[rand_index])
        new_y_sample.append(self.y_train.values[rand_index])
      else:
        new_x_sample.append(self.x_train.values[rand_index])
        new_y_sample.append(self.y_train.values[rand_index])

    self.X_samples.append(new_x_sample)
    self.Y_samples.append(new_y_sample)

  def display(self):
    for i in range(len(self.X_samples)):
      print(f"Sample {i} : {self.X_samples[i]}")

  def apply_clasifiers(self):
    print("Apply clasifiers")
    for i in range(len(self.X_samples)):
      current_clasifier = self.clasifiers[np.random.randint(len(self.clasifiers))]
      current_clasifier.fit(self.X_samples[i], self.Y_samples[i])
      
      self.trained_clasifiers.append(current_clasifier)
      self.trained_clasifiers[i].fit(self.X_samples[i], self.Y_samples[i])
      
      if self.normalized:
        self.Y_predicts.append(self.trained_clasifiers[i].predict(self.X_test_norm))
      else:
        self.Y_predicts.append(self.trained_clasifiers[i].predict(self.x_test))

  def aggregating(self):
    print("Aggregating")
    
    self.Y_aggregated_predicts = np.apply_along_axis(lambda x: np.bincount(x).argmax(), 0, self.Y_predicts)
    print(np.shape(self.Y_aggregated_predicts))

  def train(self):
    self.apply_clasifiers()
    self.aggregating()

  def predict(self, x_test):
    print(f"Predict {x_test}")

  def evaluate(self):
    print("Evaluation")
    evaluate(self.Y_aggregated_predicts, self.y_test)

In [144]:
bags = Bagging(X, y, nb_sample=15)
# bags.display()
bags.train()
bags.evaluate()


Apply clasifiers
Aggregating
(50,)
Evaluation
Start evaluation
General Accuracy 80.0% 
y pred : (50,), [1 2 3]
y test : (50, 1), [1 2 3]
[[17  0  0]
 [ 2 12  5]
 [ 1  2 11]]
F1 score : [0.91891892 0.72727273 0.73333333]
Precision : [0.85       0.85714286 0.6875    ]
Rappel : [1.         0.63157895 0.78571429]
