In [16]:
import numpy as np
import pandas as pd

In [17]:
def load_dataset(file_path):
    """
    Load a CSV dataset.

    Parameters:
    - file_path (str): The path to the CSV file.

    Returns:
    - The loaded dataset.
    """
    try:
        return pd.read_csv(file_path)
    except FileNotFoundError:
        print(f"Error: File {file_path} not found.")
        return None

In [18]:
def two_fold_testing(mlp, train_data, test_data, num_cycles):
    """
    Perform two-fold testing: train on one dataset and test on the other, then swap.
    Parameters:
    - mlp: The MLP model to be tested.
    - train_data: The dataset to train on.
    - test_data: The dataset to test on.
    - num_cycles: The number of cycles for training.

    Returns:
    - test_accuracy: The testing accuracy of the model.
    """

    # Initialize confusion matrix for this fold
    conf_matrix = np.zeros((mlp.outputs, mlp.outputs), dtype=int)

    # Training phase
    print("Starting training phase...")
    for cycle in range(num_cycles):
        print(f"Cycle {cycle + 1}/{num_cycles}")
        accuracy = mlp.train(train_data, num_cycles)
        print(f"Cycle {cycle + 1} Training Accuracy: {accuracy * 100:.2f}%")

    # Testing phase
    print("\nTraining complete. Starting testing phase...")
    test_accuracy = mlp.test(test_data, conf_matrix)
    print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")

    print("\nConfusion Matrix (Test):")
    print(conf_matrix)

    return test_accuracy

In [19]:
class MLP:
    def __init__(self, inputs, hidden_neurons, outputs, learning_rate, dropout_rate, lambda_reg, num_cycles):
        """
        Initialize the Multi-Layer Perceptron (MLP) with the given parameters.

        Parameters:
        - inputs: The number of input neurons.
        - hidden_neurons: The number of neurons in the hidden layer.
        - outputs: The number of output neurons (corresponding to the number of classes).
        - learning_rate: The learning rate for gradient descent.
        - dropout_rate: The dropout rate to apply during training for regularization.
        - lambda_reg: The regularization strength for weight decay.
        - num_cycles: The number of training cycles.
        """

        self.inputs = inputs
        self.hidden_neurons = hidden_neurons
        self.outputs = outputs
        self.learning_rate = learning_rate
        self.dropout_rate = dropout_rate
        self.lambda_reg = lambda_reg
        self.num_cycles = num_cycles

        # Initialize weights for input-to-hidden and hidden-to-output layers using Xavier initialization
        self.input_to_hidden_weights = np.random.rand(self.hidden_neurons, self.inputs) * np.sqrt(2 / (self.inputs + self.hidden_neurons))
        self.hidden_to_output_weights = np.random.rand(self.outputs, self.hidden_neurons) * np.sqrt(2 / (self.hidden_neurons + self.outputs))

        # Placeholder for storing hidden layer outputs during feedforward
        self.hidden_layer_outputs = None

        # Mask for dropout layer, used during training to simulate dropout
        self.hidden_layer_dropout_mask = None

    # Activation functions and their derivatives
    @staticmethod
    def relu(x):
        return np.maximum(0, x)

    @staticmethod
    def relu_derivative(x):
        return np.where(x > 0, 1, 0)

    @staticmethod
    def softmax(x):
        exp_x = np.exp(x - np.max(x))
        return exp_x / np.sum(exp_x)

    def feed_forward(self, input_data, training = True):
        """
        Perform the feedforward computation.

        Parameters:
        - input_data: The input features for the network.
        - training: Flag to indicate whether the network is in training mode (for dropout).

        Returns:
        - predicted_outputs: The final predicted outputs from the network.
        """

        # Input to hidden layer (weighted sum of inputs)
        hidden_input = np.dot(self.input_to_hidden_weights, input_data)
        self.hidden_layer_outputs = self.relu(hidden_input)

        if training:
            # Apply dropout (randomly set some hidden neurons to 0)
            self.hidden_layer_dropout_mask = np.random.rand(*self.hidden_layer_outputs.shape) > self.dropout_rate
            self.hidden_layer_outputs *= self.hidden_layer_dropout_mask  # Drop neurons

        # Hidden to Output Layer (weighted sum of hidden layer outputs)
        output_input = np.dot(self.hidden_to_output_weights, self.hidden_layer_outputs)
        predicted_outputs  = self.softmax(output_input)

        return predicted_outputs
    
    def check_prediction_error(self, predicted_output, target_map):
        """
        Calculate if there is an error in the prediction.

        Parameters:
        - predicted_output: The output of the network.
        - target_map: The true labels, in one-hot encoded format.

        Returns:
        - bool: True if prediction is incorrect else False.
        """

        # Find the predicted class (index of max probability)
        predicted_class = np.argmax(predicted_output)

        # Find the true class (index of 1 in target_map)
        true_class = np.argmax(target_map)

        return predicted_class != true_class
    
    def backpropagate_and_update_weights(self, input_data, target_map, final_output):
        """
        Perform backpropagation to update weights.

            Parameters:
        - input_data: The input features for the network.
        - target_map: The true labels in one-hot encoded format.
        - final_output: The output of the network.
        """

        # Output layer errors (difference between target and predicted output)
        output_layer_error = target_map - final_output

        # Hidden layer errors
        hidden_layer_error = np.dot(self.hidden_to_output_weights.T, output_layer_error)
        hidden_layer_error *= self.relu_derivative(self.hidden_layer_outputs)

        # Apply dropout during backpropagation (only update weights for active neurons)
        hidden_layer_error *= self.hidden_layer_dropout_mask

        # Update weights using gradient descent (with L2 regularization)
        self.input_to_hidden_weights += self.learning_rate * (np.outer(hidden_layer_error, input_data) - self.lambda_reg * self.input_to_hidden_weights)
        self.hidden_to_output_weights += self.learning_rate * (np.outer(output_layer_error, self.hidden_layer_outputs) - self.lambda_reg * self.hidden_to_output_weights)

    def train(self, train_data, num_cycles):
        """
        Train the MLP model.

        Parameters:
        - train_data: The training data.
        - num_cycles: The number of cycles for training.

        Returns:
        - accuracy: The training accuracy after the specified number of cycles.
        """

        correct_predictions_count = 0
        total_rows = len(train_data)

        for cycle in range(num_cycles):
            # Reduce learning rate by 10% after every 100 cycles
            if cycle % 100 == 0 and cycle > 0:
                self.learning_rate *= 0.9

        # Iterate through the training data
        for _, row in train_data.iterrows():
            # Features (all columns except the last)
            input_data = row.iloc[:-1].to_numpy()
            # The label (last column)
            target_output = int(row.iloc[-1])

            # Create a one-hot encoded target map
            target_map = np.zeros(self.outputs)
            target_map[target_output] = 1

            # Perform feed forward
            final_outputs = self.feed_forward(input_data)

            # Check for errors and update weights if necessary
            if self.check_prediction_error(final_outputs, target_map):
                self.backpropagate_and_update_weights(input_data, target_map, final_outputs)
            else:
                correct_predictions_count += 1

        # Calculate accuracy
        accuracy = correct_predictions_count / total_rows if total_rows > 0 else 0

        return accuracy

    def test(self, test_data, conf_matrix):
        """
        Test the network using the testing dataset.

        Parameters:
        - test_data: The testing data.

        Returns:
        - accuracy: The accuracy of the model on the test dataset.
        """

        correct_predictions_count = 0
        total_rows = len(test_data)

        for _, row in test_data.iterrows():
            # Features
            input_data = row.iloc[:-1].to_numpy()
            # True label
            target_output = int(row.iloc[-1])

            # Create one-hot encoded target map
            target_map = np.zeros(self.outputs)
            target_map[target_output] = 1

            # Perform feedforward and get final output
            final_output = self.feed_forward(input_data)

            # Check if the prediction is correct
            if not self.check_prediction_error(final_output, target_map):
                correct_predictions_count += 1

            # Update confusion matrix
            predicted_class = np.argmax(final_output)
            conf_matrix[target_output, predicted_class] += 1

        accuracy = correct_predictions_count / total_rows if total_rows > 0 else 0

        return accuracy
        


In [20]:
def main():
    """
    Main function.

    - Loads datasets.
    - Initializes the MLP models.
    - Performs two-fold testing.
    """

    # Parameters for the MLP model
    num_cycles = 300
    inputs = 64
    hidden_neurons = 350
    outputs = 10
    learning_rate = 0.0083
    dropout_rate = 0.3
    lambda_reg = 0.08

    # Dataset file paths
    dataset1_file = 'cw2DataSet1.csv'
    dataset2_file = 'cw2DataSet2.csv'

    # Load datasets
    dataset1 = load_dataset(dataset1_file)
    dataset2 = load_dataset(dataset2_file)

    if dataset1 is None or dataset2 is None:
        print("Dataset is not found!")
        return

    # Initialize two MLP models for two-fold testing
    mlp1 = MLP(inputs, hidden_neurons, outputs, learning_rate, dropout_rate, lambda_reg, num_cycles)
    mlp2 = MLP(inputs, hidden_neurons, outputs, learning_rate, dropout_rate, lambda_reg, num_cycles)

    print("\nStarting Two-Fold Testing...")

    # Fold 1: Train on dataset1, Test on dataset2
    print("\nFold 1: Train on dataset1, Test on dataset2")
    fold1_test_accuracy  = two_fold_testing(mlp1, dataset1, dataset2, num_cycles)

    # Fold 2: Train on dataset2, Test on dataset1
    print("\nFold 2: Train on dataset2, Test on dataset1")
    fold2_test_accuracy  = two_fold_testing(mlp2, dataset2, dataset1, num_cycles)

    # Get the average accuracy of both folds
    average_accuracy = (fold1_test_accuracy  + fold2_test_accuracy ) / 2



    print(f"\n\nFold 1 Testing Accuracy: {fold1_test_accuracy * 100:.2f}%")
    print(f"\nFold 2 Testing Accuracy: {fold2_test_accuracy * 100:.2f}%")
    print(f"\nAverage Accuracy Across Two Folds: {average_accuracy * 100:.2f}%")

if __name__ == "__main__":
    main()


Starting Two-Fold Testing...

Fold 1: Train on dataset1, Test on dataset2
Starting training phase...
Cycle 1/300
Cycle 1 Training Accuracy: 81.17%
Cycle 2/300
Cycle 2 Training Accuracy: 90.39%
Cycle 3/300
Cycle 3 Training Accuracy: 94.30%
Cycle 4/300
Cycle 4 Training Accuracy: 94.34%
Cycle 5/300
Cycle 5 Training Accuracy: 95.83%
Cycle 6/300
Cycle 6 Training Accuracy: 96.30%
Cycle 7/300
Cycle 7 Training Accuracy: 97.12%
Cycle 8/300
Cycle 8 Training Accuracy: 97.72%
Cycle 9/300
Cycle 9 Training Accuracy: 97.79%
Cycle 10/300
Cycle 10 Training Accuracy: 98.18%
Cycle 11/300
Cycle 11 Training Accuracy: 98.40%
Cycle 12/300
Cycle 12 Training Accuracy: 98.40%
Cycle 13/300
Cycle 13 Training Accuracy: 98.43%
Cycle 14/300
Cycle 14 Training Accuracy: 98.47%
Cycle 15/300
Cycle 15 Training Accuracy: 98.50%
Cycle 16/300
Cycle 16 Training Accuracy: 98.40%
Cycle 17/300
Cycle 17 Training Accuracy: 98.47%
Cycle 18/300
Cycle 18 Training Accuracy: 98.83%
Cycle 19/300
Cycle 19 Training Accuracy: 98.86%
Cycl