In [2]:
import numpy as np
from sklearn.datasets import fetch_openml
import matplotlib.pyplot as plt
import time
import pandas as pd

#def one_hot(x, k, dtype = np.float32):
#    """Create a one-hot encoding of x of size k"""
#    return np.array(x[:, None] == np.arange(k), dtype)


## Dataset Preparation

In this project, we use the Synthetic dataset. It contains 65000 instances of different attributes and classes, 80% for training (of whice 15% will be used for validation) and 20% for testing, each 1 column.



In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

# Load Data
df = pd.read_csv('synthetic.csv')
x = df.iloc[:, :-1].values  # Features
y = df.iloc[:, -1].values   # Labels

# Normalize
x /= x.max()

# One-hot encode labels
num_labels = 4

#onehot_encoder = OneHotEncoder(sparse_output=False, categories='auto')

#y_new = one_hot(y.astype('int32'), 4)

y = pd.get_dummies(y)
y_new = y.values

# Split data
x_train, x_test, y_train, y_test = train_test_split(x, y_new, test_size=0.2, random_state=42)

# Further split training data for validation
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.15, random_state=42)


# Verify shapes
print("Shapes:")
print("data: ", df.shape)
print("x_train:", x_train.shape)
print("y_train:", y_train.shape)
print("x_val:", x_val.shape)
print("y_val:", y_val.shape)
print("x_test:", x_test.shape)
print("y_test:", y_test.shape)
y_test

Shapes:
data:  (2298, 15)
x_train: (1562, 14)
y_train: (1562, 4)
x_val: (276, 14)
y_val: (276, 4)
x_test: (460, 14)
y_test: (460, 4)


array([[False, False, False,  True],
       [False,  True, False, False],
       [False, False,  True, False],
       ...,
       [ True, False, False, False],
       [False,  True, False, False],
       [ True, False, False, False]])

In [4]:
print("Training data: {} {}".format(x_train.shape, y_train.shape))
print("Test data: {} {}".format(x_test.shape, y_test.shape))

Training data: (1562, 14) (1562, 4)
Test data: (460, 14) (460, 4)


## Initialization
The specific problem that arises when trying to implement this, is that we are trying to transform from 14 nodes to 4 nodes, when instantiationg the DeepNeuralNetwork class, we pass in an array of sizes that defines the number of actiations per layer


In [5]:
from scipy.special import softmax # use built-in function to avoid numerical instability
from itertools import chain

def matmul(a, b):
    """chunked matmul which converts datatypes and filters values too large to 127"""
    c = np.empty((a.shape[0], b.shape[1]), dtype = np.int8) # output
    for i in range(a.shape[0]): # iterate over rows in a 
        aa = a[i].astype(np.int32) # convert one row to extended datatype
        cc = aa @ b # broadcasting means cc is the dtype of aa 
        cc[cc > 127] = 127 # set all future overflows to 127
        # print(cc)
        c[i] = cc.astype(np.int32) # convert dtype back 
    return c

class DeepNeuralNetwork():
    def __init__(self, sizes, activation = 'tanh'):
        self.sizes = sizes
        self.length = len(sizes)
        self.best_loss = float('inf')
        self.patience = 4
        self.best_params = None
        self.test_size = 460
        # Training
        #self.f1_score = 0
        self.acc = 0
        #self.recall = 0
        #self.Precision = 0
        
        # Activation Function
        if activation == 'relu':
            self.activation = self.relu
        elif activation == 'tanh':
            self.activation = self.tanh
        else:
            raise ValueError("Activation function is not supported, please use 'relu' or 'tanh'")
        
        # Save all weights
        self.params = self.initialize()
        # Save all intermediate values i.e. activations
        self.cache = {}
        
    def relu(self, x, derivative=False):
        '''
            Derivative of ReLU is a bit more complicated since it is not differentiable at x = 0
        
            Forward path:
            relu(x) = max(0, x)
            In other word,
            relu(x) = 0, if x < 0
                    = x, if x >= 0

            Backward path:
            ∇relu(x) = 0, if x < 0
                     = 1, if x >=0
        '''
        if derivative:
            return np.where(x < 0, 0, 1)
        return np.maximum(0, x)
    
    def tanh(self, x, derivative=False):
        """
        Z : non activated outputs
        Returns (A : 2d ndarray of activated outputs, df: derivative component wise)
        """
        A = np.empty(x.shape)
        A = 2.0/(1 + np.exp(-2.0*x)) - 1 
        if derivative:
            return 1 - np.square(A)
        return A
    
    
    def softmax(self, Z):
        return softmax(Z, axis=0)
    
    def initialize(self):
        # Number of nodes per layer
        # verify if layers length is 3 or 2
        if(self.length == 2):
            input_layer = 14
            hidden_layer_1 = self.sizes[0]
            hidden_layer_2 = self.sizes[1]
            output_layer = 4
            
            params = {
                "W1": np.random.randn(hidden_layer_1, input_layer) * np.sqrt(1./input_layer),
                "b1": np.zeros((hidden_layer_1, 1)) * np.sqrt(1./input_layer),
                "W2": np.random.randn(hidden_layer_2, hidden_layer_1) * np.sqrt(1./hidden_layer_1),
                "b2": np.zeros((hidden_layer_2, 1)) * np.sqrt(1./hidden_layer_1),
                "W3": np.random.randn(output_layer, hidden_layer_2) * np.sqrt(hidden_layer_2),
                "b3": np.zeros((output_layer, 1)) * np.sqrt(hidden_layer_2),
                
            }
            
        elif(self.length == 3):
            input_layer = 14
            hidden_layer_1 = self.sizes[0]
            hidden_layer_2 = self.sizes[1]
            hidden_layer_3 = self.sizes[2]
            output_layer = 4
            
            params = {
                "W1": np.random.randn(hidden_layer_1, input_layer) * np.sqrt(1./input_layer),
                "b1": np.zeros((hidden_layer_1, 1)) * np.sqrt(1./input_layer),
                "W2": np.random.randn(hidden_layer_2, hidden_layer_1) * np.sqrt(1./hidden_layer_1),
                "b2": np.zeros((hidden_layer_2, 1)) * np.sqrt(1./hidden_layer_1),
                "W3": np.random.randn(hidden_layer_3, hidden_layer_2) * np.sqrt(hidden_layer_2),
                "b3": np.zeros((hidden_layer_3, 1)) * np.sqrt(hidden_layer_2),
                "W4": np.random.randn(output_layer, hidden_layer_3) * np.sqrt(hidden_layer_3),
                "b4": np.zeros((output_layer, 1)) * np.sqrt(hidden_layer_3),
            }
        else:
            raise ValueError("Neuron Structure not supported, please use a length of either 3 or 2")
            
        return params
    
    def feed_forward(self, x):
        """
        y = tanh(wX + b)
        """
        
        if(self.length == 2):
            self.cache["X"] = x
           
            self.cache["Z1"] = np.dot(self.params["W1"], self.cache["X"].T) + self.params["b1"]
            self.cache["A1"] = self.activation(self.cache["Z1"])
            #if(activation == 'relu'):
            #    print("Z1 avant activation ReLu: ", self.cache["Z1"])
       
            #if(activation == 'relu'):
            #    print("A1 apres activation ReLu: ", self.cache["A1"])
            self.cache["Z2"] = np.dot(self.params["W2"], self.cache["A1"]) + self.params["b2"]

            self.cache["A2"] = self.activation(self.cache["Z2"])
            self.cache["Z3"] = np.dot(self.params["W3"], self.cache["A2"]) + self.params["b3"]

            self.cache["A3"] = self.softmax(self.cache["Z3"])
            
            return self.cache["A3"]
            
        elif(self.length == 3):
            self.cache["X"] = x
            
            self.cache["Z1"] = np.dot(self.params["W1"], self.cache["X"].T) + self.params["b1"]

            self.cache["A1"] = self.activation(self.cache["Z1"])
            self.cache["Z2"] = np.dot(self.params["W2"], self.cache["A1"]) + self.params["b2"]

            self.cache["A2"] = self.activation(self.cache["Z2"])
            self.cache["Z3"] = np.dot(self.params["W3"], self.cache["A2"]) + self.params["b3"]
            self.cache["A3"] = self.activation(self.cache["Z3"])
            self.cache["Z4"] = np.dot(self.params["W4"], self.cache["A3"]) + self.params["b4"]
            self.cache["A4"] = self.softmax(self.cache["Z4"])
            
            return self.cache["A4"]
        
        else:
            raise ValueError("Neuron Structure not supported, please use a length of either 3 or 2")
        
    
    
    def back_propagate(self, y, output):
        current_batch_size = y.shape[0]
        
        if(self.length == 2):
            dZ3 = output - y.T
            dW3 = (1./current_batch_size) * matmul(dZ3, self.cache["A2"].T)
            db3 = (1./current_batch_size) * np.sum(dZ3, axis = 1, keepdims = True)
            
            # Second hidden layer gradients
            # dA2 = np.matmul(self.params["W3"].T, dZ3)
            dA2 = self.params["W3"].T @ dZ3
            dZ2 = dA2 * self.activation(self.cache["Z2"], derivative = True)
            dW2 = (1./current_batch_size) * matmul(dZ2, self.cache["A1"].T)
            db2 = (1./current_batch_size) * np.sum(dZ2, axis = 1, keepdims = True)
            
            # First Hidden Layer gradients
            # dA1 = np.matmul(self.params["W2"].T, dZ2)
            dA1 = self.params["W2"].T @ dZ2
            dZ1 = dA1 * self.activation(self.cache["Z1"], derivative = True)
            dW1 = (1./current_batch_size) * (dZ1 @ self.cache["X"])
            db1 = (1./current_batch_size) * np.sum(dZ1, axis = 1, keepdims = True)
            
            self.grads = {"W1": dW1, "b1": db1, "W2": dW2, "b2": db2, "W3": dW3, "b3": db3}

            return self.grads
            
        elif(self.length == 3):
            dZ4 = output - y.T
            dW4 = (1./current_batch_size) * (dZ4 @ self.cache["A3"].T)
            db4 = (1./current_batch_size) * np.sum(dZ4, axis = 1, keepdims = True)
            
            # Third hidden layer gradients
            dA3 = np.matmul(self.params["W4"].T, dZ4)
            dZ3 = dA3 * self.activation(self.cache["Z3"], derivative = True)
            ## Verificar utilización de A2
            dW3 = (1./current_batch_size) * (dZ3 @ self.cache["A2"].T)
            db3 = (1./current_batch_size) * np.sum(dZ3, axis = 1, keepdims = True)
            
            # Second hidden layer gradients
            dA2 = np.matmul(self.params["W3"].T, dZ3)
            dZ2 = dA2 * self.activation(self.cache["Z2"], derivative = True)
            dW2 = (1./current_batch_size) * (dZ2 @ self.cache["A1"].T)
            db2 = (1./current_batch_size) * np.sum(dZ2, axis = 1, keepdims = True)
            
            # First Hidden Layer gradients
            dA1 = np.matmul(self.params["W2"].T, dZ2)
            dZ1 = dA1 * self.activation(self.cache["Z1"], derivative = True)
            dW1 = (1./current_batch_size) * (dZ1 @ self.cache["X"])
            db1 = (1./current_batch_size) * np.sum(dZ1, axis = 1, keepdims = True)
            
            self.grads = {"W1": dW1, "b1": db1, "W2": dW2, "b2": db2, "W3": dW3, "b3": db3, "W4": dW4, "b4": db4}
            
            return self.grads

        else:
            raise ValueError("Neuron Structure not supported, please use a length of either 3 or 2")
            
        # return self.grads
    
    def cross_entropy_loss(self, y, output):
        '''
            L(y, ŷ) = −∑ylog(ŷ).
        '''
        #l_sum = np.sum(np.multiply(y.T, np.log(output)))
        #m = y.shape[0]
        #l = -(1./m) * l_sum
        #return l
        epsilon = 1e-10  # Small value to ensure numerical stability
        m = y.shape[0]
        l_sum = np.sum(np.multiply(y.T, np.log(output + epsilon)))  # Add epsilon to prevent log(0)
        loss = -(1. / m) * l_sum
        return loss
    
    def optimize(self, l_rate=0.1):
        """
        Stochatic Gradient Descent (SGD):
            θ^(t+1) <- θ^t - η∇L(y, ŷ)
        """
        for key in self.params:
            #print("Before: ", self.params[key])
            #np.add(self.params[key], (l_rate * self.grads[key]))
            self.params[key] = self.params[key] - (l_rate * self.grads[key])
            #print("After: ", self.params[key])
            
    def accuracy(self, y, output):
        y_pred = np.argmax(output.T, axis=-1)
        #print("Predicted values:", y_pred)
        y_true = np.argmax(y, axis=-1)
        #print("Actual values:", y_true)
        # Calculate True Positives, True Negatives, False Positives, False Negatives
        TP = np.sum((y_true == 1) & (y_pred == 1))
        TN = np.sum((y_true == 0) & (y_pred == 0))
        FP = np.sum((y_true == 0) & (y_pred == 1))
        FN = np.sum((y_true == 1) & (y_pred == 0))

        # Calculate accuracy
        if((TP + TN + FP + FN) == 0):
            accuracy = 0
        else:
            accuracy = (TP + TN) / (TP + TN + FP + FN)

        # Precision
        #if((TP + FP) == 0):
        #    precision = 0
        #else:
        #    precision = TP / (TP + FP)

        # Recall
        if((TP + FN)==0):
            recall = 0
        else:
            recall = TP / (TP + FN)

        # F1-score
        #if((precision + recall) == 0):
        #    f1_score = 0
        #else:
        #    f1_score = 2 * (precision * recall) / (precision + recall)
        
        return accuracy#, precision, recall, f1_score
    
    def precsision(self, y, output):
        y_pred = np.argmax(output.T, axis=-1)
        #print("Predicted values:", y_pred)
        y_true = np.argmax(y, axis=-1)
        #print("Actual values:", y_true)
        # Calculate True Positives, True Negatives, False Positives, False Negatives
        TP = np.sum((y_true == 1) & (y_pred == 1))
        TN = np.sum((y_true == 0) & (y_pred == 0))
        FP = np.sum((y_true == 0) & (y_pred == 1))
        FN = np.sum((y_true == 1) & (y_pred == 0))
        
        # Precision
        if((TP + FP) == 0):
            precision = 0
        else:
            precision = TP / (TP + FP)
        return precision
    
    def recall_class(self, y, output, value):
        y_pred = np.argmax(output.T, axis=-1)
        y_true = np.argmax(y, axis=-1)
        # Calculate True Positives, True Negatives, False Positives, False Negatives
        TP = np.sum((y_true == value) & (y_pred == value))
        TN = np.sum((y_true != value) & (y_pred != value))
        FP = np.sum((y_true != value) & (y_pred == value))
        FN = np.sum((y_true == value) & (y_pred != value))
        
        if((TP + FN)==0):
            recall = 0
        else:
            recall = TP / (TP + FN)
        
        

    def train(self, x_train, y_train, x_test, y_test,x_val, y_val, epochs=100, batch_size = 64, l_rate=0.1):
        
        self.epochs = epochs
        self.batch_size = batch_size
        num_batches = 390
        
        # Initialize optimizer
        #self.optimizer = optimizer
        
        start_time = time.time()
        template = "Epoch {}: {:.2f}s, train acc={:.2f}, train loss={:.2f}, test acc={:.2f}, test loss={:.2f}"
        #template2 = "train F1 Score = {:.2f}, train Precision={:.2f},  train Recall={:.2f}, test F1 Score = {:.2f}, test Precision={:.2f},  test Recall={:.2f},"
        
        best_epoch = 0
        patience_count = 0
        # Train
        for i in range(self.epochs):
            # Shuffle
            permutation = np.random.permutation(x_train.shape[0])
            x_train_shuffled = x_train[permutation]
            y_train_shuffled = y_train[permutation]
            
            
            for j in range(num_batches):
                # Batch
                begin = j* self.batch_size
                end = min(begin + self.batch_size, x_train.shape[0]-1)
                x = x_train_shuffled[begin:end]
                y = y_train_shuffled[begin:end]
                
                # Forward
                output = self.feed_forward(x)
                #print("Predicted: ", np.argmax(output, axis=-1))
                #print("Actual: ", np.argmax(y, axis=-1))
                # Backprop
                grad = self.back_propagate(y, output)
                # Optimize
                self.optimize(l_rate = l_rate)
            
            # At this point, the weights are supposed to be optimized and thus, we measure with the data
            
            # Evaluate performance
            # Training data
            train_loss = 0
            train_acc = 0
            test_acc = 0
            test_loss = 0
            print("After stochastic Gradient Descent:")
            for j in range(num_batches):
                # Batch
                begin = j* self.batch_size
                end = min(begin + self.batch_size, x_train.shape[0]-1)
                x = x_train_shuffled[begin:end]
                y = y_train_shuffled[begin:end]
                train_output = self.feed_forward(x)
                #print("Predicted: ", np.argmax(train_output, axis=-1))
                #print("Actual: ", np.argmax(y, axis=-1))
                train_acc += self.accuracy(y, train_output)
                train_loss += self.cross_entropy_loss(y, train_output)
            train_acc /= num_batches
            train_loss /= num_batches
            
            test_outputs = []
            actual_test_outpouts = []
            # 115 Batches of 4
            for j in range(115):
                # Batch
                begin = j * self.batch_size
                end = min(begin + self.batch_size, x_test.shape[0]-1)
                x = x_test[begin:end]
                #print(x.shape)
                y = y_test[begin:end]
                #print(y.shape)
                test_output = self.feed_forward(x)
                #test_outputs.append(test_output)
                #actual_test_outpouts.append(y)
                #print("Predicted (Test): ", np.argmax(test_output, axis=-1))
                #print("Actual (test): ", np.argmax(y, axis=-1))
                test_acc += self.accuracy(y, test_output)
                test_loss += self.cross_entropy_loss(y, test_output)
                test_output = np.argmax(test_output, axis=-1)
                test_outputs.append(test_output)
            test_acc /= 92
            test_loss /= 92

            #train_output = self.feed_forward(x_train)
            #print("Predicted (Training data): ", np.argmax(train_output.T, axis=-1))
            #print("Actual (Training data): ", np.argmax(y_train[0], axis=-1))
            #train_acc, train_prec, train_recall, train_f1score = self.accuracy(y_train, train_output)
            #train_acc = self.accuracy(y_train, train_output)
            #train_loss = self.cross_entropy_loss(y_train, train_output)
            
            # Validation
            val_output = self.feed_forward(x_val)
            val_loss = self.cross_entropy_loss(y_val, val_output)

            # Test data
            
            # accuracy, precision, recall, f1_score
            #test_output = self.feed_forward(x_test)
            # test_acc, test_prec, test_recall, test_f1score = self.accuracy(y_test, test_output)
            #test_acc = self.accuracy(y_test, test_output)
            #test_loss = self.cross_entropy_loss(y_test, test_output)
            
            print(template.format(i+1, time.time()-start_time, train_acc, train_loss, test_acc, test_loss))
            #print(template2.format(train_f1score, train_prec, train_recall, test_f1score, test_prec, test_recall))
            
            if val_loss < self.best_loss:
                self.best_loss = val_loss
                self.best_params = self.params.copy
                best_epoch = i
                patience_count = 0
            else:
                patience_count += 1
            
            # Early stopping
            if patience_count >= self.patience:
                print("Early stopping at epoch:", i+1)
                print("Best validation loss:", self.best_loss)
                print("Best epoch:", best_epoch+1)
                self.params = self.best_params
                #self.f1_score = train_f1score
                self.acc = train_acc
                #self.recall = train_recall
                #self.Precision = train_prec
                
                break
                
        
        return test_outputs, actual_test_outpouts, val_loss

                
            

                

## Things to modify: Patience and right NN Structure

In [6]:
# Define architectures and activations
architectures = [(10, 8, 6), (10, 8, 4), (6, 4)]
#activations = ['tanh', 'relu']
activations = ['relu', 'tanh']

#best_models = {'tanh': [], 'relu': []}
best_models = {'relu': [], 'tanh': []}

# Train and evaluate models
for activation in activations:
    for architecture in architectures:
        
        print(f"Architecture: {architecture}, Activation: {activation}")
            
        # Create and train the model
        model = DeepNeuralNetwork(architecture, activation=activation)
        # Evaluate the model on the test set
        
        test_outputs, actual_test_outpouts, best_val_loss = model.train(x_train, y_train, x_test, y_test, x_val, y_val, epochs=100, batch_size=4, l_rate=0.1)
        # Keep track of the model's predictions and test loss
        best_models[activation].append((architecture, test_outputs, best_val_loss))
        
# TODO Sort the models by lowest val loss
# Show best Model by activation function


Architecture: (10, 8, 6), Activation: relu
After stochastic Gradient Descent:
Epoch 1: 0.13s, train acc=0.58, train loss=1.30, test acc=0.66, test loss=1.64
After stochastic Gradient Descent:
Epoch 2: 0.22s, train acc=0.59, train loss=1.30, test acc=0.66, test loss=1.62
After stochastic Gradient Descent:
Epoch 3: 0.32s, train acc=0.57, train loss=1.30, test acc=0.66, test loss=1.63
After stochastic Gradient Descent:
Epoch 4: 0.42s, train acc=0.58, train loss=1.30, test acc=0.66, test loss=1.61
After stochastic Gradient Descent:
Epoch 5: 0.52s, train acc=0.58, train loss=1.30, test acc=0.66, test loss=1.62
After stochastic Gradient Descent:
Epoch 6: 0.61s, train acc=0.59, train loss=1.30, test acc=0.66, test loss=1.62
After stochastic Gradient Descent:
Epoch 7: 0.70s, train acc=0.58, train loss=1.30, test acc=0.66, test loss=1.64
After stochastic Gradient Descent:
Epoch 8: 0.79s, train acc=0.59, train loss=1.30, test acc=0.66, test loss=1.62
Early stopping at epoch: 8
Best validation lo

In [7]:
def accuracy_class(y, output, value):
    TP = np.sum((y == value) & (output == value))
    TN = np.sum((y != value) & (output != value))
    FP = np.sum((y != value) & (output == value))
    FN = np.sum((y == value) & (output != value))
    print("Class ", value, " has: ", TP ," true positives, ", TN, " true negatives ", FP, " false positives and ", FN, " false negatives")
        
    if((TP + TN + FP + FN) == 0):
        accuracy = 0
    else:
        accuracy = (TP + TN) / (TP + TN + FP + FN)
    
    #print("Accuracy for class: ", value, " is: ", accuracy)
    
    return accuracy
            
def accuracy(y, output):
    accuracy = 0
    for i in range(4):
        accuracy += accuracy_class(y, output, i)
    accuracy /= 4
    return accuracy
    
def precision_class(y, output, value):
        TP = np.sum((y == value) & (output == value))
        TN = np.sum((y != value) & (output != value))
        FP = np.sum((y != value) & (output == value))
        FN = np.sum((y == value) & (output != value))
        #print("Class ", value, " has: ", TP ," true positives, ", TN, " true negatives ")
        #print("", FP, " false positives and ", FN, " false negatives")
        
        # Precision
        if((TP + FP) == 0):
            precision = 0
        else:
            precision = TP / (TP + FP)
        #print("Precision for class: ", value, " is: ", precision)
        return precision
    
def precision( y, output):
        precision = 0
        for i in range(4):
            precision += precision_class(y, output, i)
        precision /= 4
        return precision
    
def recall_class(y, output, value):
        # Calculate True Positives, True Negatives, False Positives, False Negatives
        TP = np.sum((y == value) & (output == value))
        TN = np.sum((y != value) & (output != value))
        FP = np.sum((y != value) & (output == value))
        FN = np.sum((y == value) & (output != value))
        #print("Class ", value, " has: ", TP ," true positives, ", TN, " true negatives ")
        #print("", FP, " false positives and ", FN, " false negatives")
        
        if((TP + FN)==0):
            recall = 0
        else:
            recall = TP / (TP + FN)
        #print("Recall for class: ", value, " is: ", recall)
        return recall
    
def recall(y, output):
        recall = 0
        for i in range(4):
            recall += recall_class(y, output, i)
        recall /= 4
        
        return recall
    
def f1_score_class(y, output, value):
        # F1-score
        precision = precision_class(y, output, value)
        recall = recall_class(y, output, value)
        if((precision + recall) == 0):
            f1_score = 0
        else:
            f1_score = 2 * (precision * recall) / (precision + recall)
        
        #print("F1 Score for class: ", value, " is: ", f1_score)
        
        return f1_score
    
def f1_score(y, output):
    f1_score = 0
    for i in range(4):
        f1_score += f1_score_class(y, output, i)
    f1_score /= 4
    return f1_score

def confusion_matrix(expected, actual, num_classes=4):
    # Initialize the confusion matrix with zeros
    matrix = np.zeros((num_classes, num_classes), dtype=int)
    
    # Fill the matrix
    for exp, act in zip(expected, actual):
        matrix[exp][act] += 1
    
    return matrix

def print_confusion_matrix(matrix):
    print("Confusion Matrix:")
    print("Expected \ Predicted ", end="")
    for i in range(matrix.shape[1]):
        print(f"| {i} ", end="")
    print("|")
    print("-" * (5 + 4 * matrix.shape[1]))
    for i in range(matrix.shape[0]):
        print(f"       {i}             |", end="")
        for j in range(matrix.shape[1]):
            print(f" {matrix[i][j]:2d} ", end="")
        print("|")


### We calculate the parameters for each model
For tanh with the structure (10, 8, 6), we convert the data in order to adapt it to our functions

In [8]:
## Trasnformation de y (on enlève le one-hot)       
y_test = np.argmax(y_test, axis = -1)
## NE PAS REPETER

In [9]:
best_models['tanh'][0][0]
# Transformation des outoutputs
test_outputs_tanh_10_8_6 = best_models['tanh'][0][1]
test_outputs_tanh_10_8_6 = list(chain.from_iterable(test_outputs_tanh_10_8_6))
test_outputs_tanh_10_8_6 = np.array(test_outputs_tanh_10_8_6)
# Confusion matrix

matrix = confusion_matrix(y_test, test_outputs_tanh_10_8_6)
print_confusion_matrix(matrix)

accuracy_tanh_10_8_6 = accuracy(y_test,test_outputs_tanh_10_8_6)
precision_tanh_10_8_6 = precision(y_test,test_outputs_tanh_10_8_6)
recall_tanh_10_8_6 = recall(y_test,test_outputs_tanh_10_8_6)
f1_score_tanh_10_8_6 = f1_score(y_test, test_outputs_tanh_10_8_6)
print("General model accuracy: ", accuracy_tanh_10_8_6)
print("General Model Precision:", precision_tanh_10_8_6)
print("General Model Recall:   ",recall_tanh_10_8_6)
print("General Model f1-score: ",f1_score_tanh_10_8_6)

Confusion Matrix:
Expected \ Predicted | 0 | 1 | 2 | 3 |
---------------------
       0             | 35  39  31  51 |
       1             | 47  36  50  33 |
       2             | 24  22  26  17 |
       3             | 15  13  12   9 |
Class  0  has:  35  true positives,  218  true negatives  86  false positives and  121  false negatives
Class  1  has:  36  true positives,  220  true negatives  74  false positives and  130  false negatives
Class  2  has:  26  true positives,  278  true negatives  93  false positives and  63  false negatives
Class  3  has:  9  true positives,  310  true negatives  101  false positives and  40  false negatives
General model accuracy:  0.6152173913043478
General Model Precision: 0.22920862559899993
General Model Recall:    0.2292586862717304
General Model f1-score:  0.21919617340365985


### We calculate the parameters for each model
For tanh with the structure (10, 8, 4), we convert the data in order to adapt it to our functions

In [10]:
best_models['tanh'][1][0] # (10, 8, 4)
# Transformation des outoutputs
test_outputs_tanh_10_8_4 = best_models['tanh'][1][1]
test_outputs_tanh_10_8_4 = list(chain.from_iterable(test_outputs_tanh_10_8_4))
test_outputs_tanh_10_8_4 = np.array(test_outputs_tanh_10_8_4)

# Confusion Matrix
matrix = confusion_matrix(y_test, test_outputs_tanh_10_8_4)
print_confusion_matrix(matrix)

accuracy_tanh_10_8_4  = accuracy(y_test,test_outputs_tanh_10_8_4)
precision_tanh_10_8_4 = precision(y_test,test_outputs_tanh_10_8_4)
recall_tanh_10_8_4    = recall(y_test,test_outputs_tanh_10_8_4)
f1_score_tanh_10_8_4  = f1_score(y_test, test_outputs_tanh_10_8_4)
print("General model accuracy:  ", accuracy_tanh_10_8_4)
print("General model precision: ",precision_tanh_10_8_4)
print("General model recall:    ",recall_tanh_10_8_4)
print("General model f1-score:  ",f1_score_tanh_10_8_4)

Confusion Matrix:
Expected \ Predicted | 0 | 1 | 2 | 3 |
---------------------
       0             | 40  40  47  29 |
       1             | 49  41  37  39 |
       2             | 13  25  26  25 |
       3             | 16  13  11   9 |
Class  0  has:  40  true positives,  226  true negatives  78  false positives and  116  false negatives
Class  1  has:  41  true positives,  216  true negatives  78  false positives and  125  false negatives
Class  2  has:  26  true positives,  276  true negatives  95  false positives and  63  false negatives
Class  3  has:  9  true positives,  318  true negatives  93  false positives and  40  false negatives
General model accuracy:   0.6260869565217391
General model precision:  0.2466580482872516
General model recall:     0.24480162726647864
General model f1-score:   0.2366286116994037


In [11]:
# tanh: (6 4) shape
# Transformation des outoutputs
test_outputs_tanh_6_4 = best_models['tanh'][2][1]
test_outputs_tanh_6_4 = list(chain.from_iterable(test_outputs_tanh_6_4))
test_outputs_tanh_6_4 = np.array(test_outputs_tanh_6_4)

# Confusion matrix 
matrix = confusion_matrix(y_test, test_outputs_tanh_6_4)
print_confusion_matrix(matrix)

accuracy_tanh_6_4  = accuracy(y_test,test_outputs_tanh_6_4)
precision_tanh_6_4 = precision(y_test,test_outputs_tanh_6_4)
recall_tanh_6_4    = recall(y_test,test_outputs_tanh_6_4)
f1_score_tanh_6_4  = f1_score(y_test, test_outputs_tanh_6_4)
print("General model accuracy:  ", accuracy_tanh_6_4)
print("General model precision: ",precision_tanh_6_4)
print("General model recall:    ",recall_tanh_6_4)
print("General model f1-score:  ",f1_score_tanh_6_4)

Confusion Matrix:
Expected \ Predicted | 0 | 1 | 2 | 3 |
---------------------
       0             | 48  47  17  44 |
       1             | 38  36  46  46 |
       2             | 21  21  29  18 |
       3             |  9  13  17  10 |
Class  0  has:  48  true positives,  236  true negatives  68  false positives and  108  false negatives
Class  1  has:  36  true positives,  213  true negatives  81  false positives and  130  false negatives
Class  2  has:  29  true positives,  291  true negatives  80  false positives and  60  false negatives
Class  3  has:  10  true positives,  303  true negatives  108  false positives and  39  false negatives
General model accuracy:   0.633695652173913
General model precision:  0.2680715549310019
General model recall:     0.26362102671352516
General model f1-score:   0.25501197739313486


In [12]:
best_models['relu'][0][0]
# Transformation des outoutputs
test_outputs_relu_10_8_6 = best_models['relu'][0][1]
test_outputs_relu_10_8_6 = list(chain.from_iterable(test_outputs_relu_10_8_6))
test_outputs_relu_10_8_6 = np.array(test_outputs_relu_10_8_6)

matrix = confusion_matrix(y_test, test_outputs_relu_10_8_6)
print_confusion_matrix(matrix)

accuracy_relu_10_8_6 = accuracy(y_test,test_outputs_relu_10_8_6)
precision_relu_10_8_6 = precision(y_test,test_outputs_relu_10_8_6)
recall_relu_10_8_6 = recall(y_test,test_outputs_relu_10_8_6)
f1_score_relu_10_8_6 = f1_score(y_test, test_outputs_relu_10_8_6)

print("General model accuracy: ", accuracy_relu_10_8_6)
print("General model precision: ",precision_relu_10_8_6)
print("General model recall:    ",recall_relu_10_8_6)
print("General model f1-score:  ",f1_score_relu_10_8_6 )

Confusion Matrix:
Expected \ Predicted | 0 | 1 | 2 | 3 |
---------------------
       0             | 156   0   0   0 |
       1             | 166   0   0   0 |
       2             | 89   0   0   0 |
       3             | 49   0   0   0 |
Class  0  has:  156  true positives,  0  true negatives  304  false positives and  0  false negatives
Class  1  has:  0  true positives,  294  true negatives  0  false positives and  166  false negatives
Class  2  has:  0  true positives,  371  true negatives  0  false positives and  89  false negatives
Class  3  has:  0  true positives,  411  true negatives  0  false positives and  49  false negatives
General model accuracy:  0.6695652173913043
General model precision:  0.08478260869565217
General model recall:     0.25
General model f1-score:   0.1266233766233766


In [13]:
best_models['relu'][1][0]
# Transformation des outoutputs
test_outputs_relu_10_8_4 = best_models['relu'][1][1]
test_outputs_relu_10_8_4 = list(chain.from_iterable(test_outputs_relu_10_8_4))
test_outputs_relu_10_8_4 = np.array(test_outputs_relu_10_8_4)

matrix = confusion_matrix(y_test, test_outputs_relu_10_8_4)
print_confusion_matrix(matrix)

accuracy_relu_10_8_4 = accuracy(y_test,test_outputs_relu_10_8_4)
precision_relu_10_8_4 = precision(y_test,test_outputs_relu_10_8_4)
recall_relu_10_8_4 = recall(y_test,test_outputs_relu_10_8_4)
f1_score_relu_10_8_4 = f1_score(y_test, test_outputs_relu_10_8_4)

print("General model accuracy:  ", accuracy_relu_10_8_4)
print("General model precision: ",precision_relu_10_8_4)
print("General model recall:    ",recall_relu_10_8_4)
print("General model f1-score:  ",f1_score_relu_10_8_4)

Confusion Matrix:
Expected \ Predicted | 0 | 1 | 2 | 3 |
---------------------
       0             | 156   0   0   0 |
       1             | 166   0   0   0 |
       2             | 89   0   0   0 |
       3             | 49   0   0   0 |
Class  0  has:  156  true positives,  0  true negatives  304  false positives and  0  false negatives
Class  1  has:  0  true positives,  294  true negatives  0  false positives and  166  false negatives
Class  2  has:  0  true positives,  371  true negatives  0  false positives and  89  false negatives
Class  3  has:  0  true positives,  411  true negatives  0  false positives and  49  false negatives
General model accuracy:   0.6695652173913043
General model precision:  0.08478260869565217
General model recall:     0.25
General model f1-score:   0.1266233766233766


In [14]:
best_models['relu'][2][0]
# Transformation des outoutputs
test_outputs_relu_6_4 = best_models['relu'][2][1]
test_outputs_relu_6_4 = list(chain.from_iterable(test_outputs_relu_6_4))
test_outputs_relu_6_4 = np.array(test_outputs_relu_6_4)

matrix = confusion_matrix(y_test, test_outputs_relu_6_4)
print_confusion_matrix(matrix)

accuracy_relu_6_4 = accuracy(y_test,test_outputs_relu_6_4)
precision_relu_6_4 = precision(y_test,test_outputs_relu_6_4)
recall_relu_6_4 = recall(y_test,test_outputs_relu_6_4)
f1_score_relu_6_4 = f1_score(y_test, test_outputs_relu_6_4)

print("General model accuracy:  ", accuracy_relu_6_4)
print("General model precision: ",precision_relu_6_4)
print("General model recall:    ",recall_relu_6_4)
print("General model f1-score:  ",f1_score_relu_6_4)

Confusion Matrix:
Expected \ Predicted | 0 | 1 | 2 | 3 |
---------------------
       0             | 156   0   0   0 |
       1             | 166   0   0   0 |
       2             | 89   0   0   0 |
       3             | 49   0   0   0 |
Class  0  has:  156  true positives,  0  true negatives  304  false positives and  0  false negatives
Class  1  has:  0  true positives,  294  true negatives  0  false positives and  166  false negatives
Class  2  has:  0  true positives,  371  true negatives  0  false positives and  89  false negatives
Class  3  has:  0  true positives,  411  true negatives  0  false positives and  49  false negatives
General model accuracy:   0.6695652173913043
General model precision:  0.08478260869565217
General model recall:     0.25
General model f1-score:   0.1266233766233766


In [15]:
test_outputs_relu_6_4

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [34]:
import importlib
import tree as t

importlib.reload(tree)

Seuil: 3
Matrix de confusion
Class	0	1	2	3
0	7	52	65	0
1	0	53	135	0	True label
2	1	3	102	0
3	0	15	27	0
	Predicted label

Resultats
Class	TP	FP	FN	TN
0	7	1	117	335
1	53	70	135	202
2	102	227	4	127
3	0	0	42	418

Metric		Average		For each class (0,1,2,3)
Accuracy:	0.6761		[0.7435, 0.5543, 0.4978, 0.9087]
Precision:	0.404		[0.875, 0.4309, 0.31, 0.0]
Recall:		0.3252		[0.0565, 0.2819, 0.9623, 0.0]
F1Score:	0.229		[0.1061, 0.3408, 0.4689, 0.0]

Seuil: 4
Matrix de confusion
Class	0	1	2	3
0	7	52	65	0
1	0	53	135	0	True label
2	1	3	102	0
3	0	15	27	0
	Predicted label

Resultats
Class	TP	FP	FN	TN
0	7	1	117	335
1	53	70	135	202
2	102	227	4	127
3	0	0	42	418

Metric		Average		For each class (0,1,2,3)
Accuracy:	0.6761		[0.7435, 0.5543, 0.4978, 0.9087]
Precision:	0.404		[0.875, 0.4309, 0.31, 0.0]
Recall:		0.3252		[0.0565, 0.2819, 0.9623, 0.0]
F1Score:	0.229		[0.1061, 0.3408, 0.4689, 0.0]

Seuil: 5
Matrix de confusion
Class	0	1	2	3
0	21	47	52	4
1	6	52	129	1	True label
2	3	3	99	1
3	2	9	25	6
	Predicted label

<module 'tree' from '/home/henri/Desktop/S6/IA/projet-ia/tree.py'>

In [35]:
train_indices = df.index[:len(x_train)]
test_indices = df.index[-len(x_test):]

# Create `train_df` and `test_df` using the retrieved indices
train_df = df.iloc[train_indices]
test_df = df.iloc[test_indices]

In [36]:
arbre1,seuil1,arbre2,seuil2 = t.meilleur_seuil(train_df,test_df)

Seuil: 3
Matrix de confusion
Class	0	1	2	3
0	27	36	54	22
1	1	81	92	4	True label
2	0	4	89	5
3	1	10	18	16
	Predicted label

Resultats
Class	TP	FP	FN	TN
0	27	2	112	319
1	81	50	97	232
2	89	164	9	198
3	16	31	29	384

Metric		Average		For each class (0,1,2,3)
Accuracy:	0.7315		[0.7522, 0.6804, 0.6239, 0.8696]
Precision:	0.5604		[0.931, 0.6183, 0.3518, 0.3404]
Recall:		0.4783		[0.1942, 0.4551, 0.9082, 0.3556]
F1Score:	0.4252		[0.3214, 0.5243, 0.5072, 0.3478]

Seuil: 4
Matrix de confusion
Class	0	1	2	3
0	26	40	44	29
1	1	81	92	4	True label
2	0	6	87	5
3	2	9	18	16
	Predicted label

Resultats
Class	TP	FP	FN	TN
0	26	3	113	318
1	81	55	97	227
2	87	154	11	208
3	16	38	29	377

Metric		Average		For each class (0,1,2,3)
Accuracy:	0.7282		[0.7478, 0.6696, 0.6413, 0.8543]
Precision:	0.5374		[0.8966, 0.5956, 0.361, 0.2963]
Recall:		0.4714		[0.1871, 0.4551, 0.8878, 0.3556]
F1Score:	0.4156		[0.3096, 0.516, 0.5133, 0.3233]

Seuil: 5
Matrix de confusion
Class	0	1	2	3
0	29	45	46	19
1	1	103	70	4	True label
2	1	9	84

In [41]:
print(f"Seuil: {seuil1}")
t.evaluateModel(df=test_df,tree=arbre1)
print("\nArbre de décision")
t.print_tree(arbre1)

Seuil: 6
Matrix de confusion
Class	0	1	2	3
0	29	40	44	26
1	1	92	81	4	True label
2	1	6	86	5
3	1	8	20	16
	Predicted label

Resultats
Class	TP	FP	FN	TN
0	29	3	110	318
1	92	54	86	228
2	86	145	12	217
3	16	35	29	380

Metric		Average		For each class (0,1,2,3)
Accuracy:	0.7424		[0.7543, 0.6957, 0.6587, 0.8609]
Precision:	0.5556		[0.9062, 0.6301, 0.3723, 0.3137]
Recall:		0.4897		[0.2086, 0.5169, 0.8776, 0.3556]
F1Score:	0.4408		[0.3391, 0.5679, 0.5228, 0.3333]


Arbre de décision
 [Attribute: Attr_H Split value: 8.847652723860204]
 > Gauche
 -[Attribute: Attr_B Split value: 9.136747228147065]
 -> Gauche
 --[Attribute: Attr_I Split value: 8.432423617046338]
 --> Gauche
 ---[Attribute: Attr_C Split value: 925.1670044107232]
 ---> Gauche
 ----[Attribute: Attr_E Split value: 88.50585711127908]
 ----> Gauche
 -----[Attribute: Attr_A Split value: 9.285330362119794]
 -----> Gauche
 ------[Attribute: Attr_N Split value: 88.70853758327428]
 ------> Gauche
 ------- Class 1 Count: 83
 ------- Class 0 Coun

In [39]:
print(f"Seuil: {seuil2}")
t.evaluateModel(df=test_df,tree=arbre2)
print("\nArbre de décisiion")
t.print_tree(arbre2)

Seuil: 7
Matrix de confusion
Class	0	1	2	3
0	33	41	46	19
1	3	92	79	4	True label
2	1	6	86	5
3	1	14	19	11
	Predicted label

Resultats
Class	TP	FP	FN	TN
0	33	5	106	316
1	92	61	86	221
2	86	144	12	218
3	11	28	34	387

Metric		Average		For each class (0,1,2,3)
Accuracy:	0.7413		[0.7587, 0.6804, 0.6609, 0.8652]
Precision:	0.5314		[0.8684, 0.6013, 0.3739, 0.2821]
Recall:		0.4691		[0.2374, 0.5169, 0.8776, 0.2444]
F1Score:	0.4288		[0.3729, 0.5559, 0.5244, 0.2619]


Arbre de décisiion
 [Attribute: Attr_H Split value: 8.847652723860204]
 > Gauche
 -[Attribute: Attr_B Split value: 9.136747228147065]
 -> Gauche
 --[Attribute: Attr_I Split value: 8.432423617046338]
 --> Gauche
 ---[Attribute: Attr_C Split value: 925.1670044107232]
 ---> Gauche
 ----[Attribute: Attr_E Split value: 88.50585711127908]
 ----> Gauche
 -----[Attribute: Attr_A Split value: 9.285330362119794]
 -----> Gauche
 ------[Attribute: Attr_N Split value: 88.70853758327428]
 ------> Gauche
 -------[Attribute: Attr_F Split value: 971.23