<a href="https://colab.research.google.com/github/WinsalotNot/MultiClass-Perceptron---Assignment2/blob/main/Assignment_2_Multiclass_Perceptron_Andrew.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **IMPORTS**

In [1]:
import numpy as np
import pandas as pd

# **DATA ORGANIZATION**

In [2]:
# Stores data in data_file (does not include header!)
data_file = pd.read_csv('https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv')
print(data_file)

# Get the unqiue values in variety for classification
unique_variety = data_file['variety'].unique()
print(unique_variety)

# Take 80% of the dataset RANDOMLY as training data, random_state is the random seed used to ensure reproducibility
training_data_80 = data_file.sample(frac=0.8, random_state=25)
# Take 20% of the dataset by ELIMINATING the training data
testing_data_20 = data_file.drop(training_data_80.index)
print(f'Training Data 1: {(len(training_data_80)/len(data_file)*100)}% Testing Data 1: {(len(testing_data_20)/len(data_file)*100)}%')

# Take 70% of the dataset RANDOMLY as training data, random_state is the random seed used to ensure reproducibility
training_data_70 = data_file.sample(frac=0.7, random_state=24)
# Take 30% of the dataset by ELIMINATING the training data
testing_data_30 = data_file.drop(training_data_70.index)
print(f'Training Data 2: {(len(training_data_70)/len(data_file)*100)}% Testing Data 2: {(len(testing_data_30)/len(data_file)*100)}%')

# Take 60% of the dataset RANDOMLY as training data, random_state is the random seed used to ensure reproducibility
training_data_60 = data_file.sample(frac=0.6, random_state=23)
# Take 40% of the dataset by ELIMINATING the training data
testing_data_40 = data_file.drop(training_data_60.index)
print(f'Training Data 3: {(len(training_data_60)/len(data_file)*100)}% Testing Data 3: {(len(testing_data_40)/len(data_file)*100)}%')


     sepal.length  sepal.width  petal.length  petal.width    variety
0             5.1          3.5           1.4          0.2     Setosa
1             4.9          3.0           1.4          0.2     Setosa
2             4.7          3.2           1.3          0.2     Setosa
3             4.6          3.1           1.5          0.2     Setosa
4             5.0          3.6           1.4          0.2     Setosa
..            ...          ...           ...          ...        ...
145           6.7          3.0           5.2          2.3  Virginica
146           6.3          2.5           5.0          1.9  Virginica
147           6.5          3.0           5.2          2.0  Virginica
148           6.2          3.4           5.4          2.3  Virginica
149           5.9          3.0           5.1          1.8  Virginica

[150 rows x 5 columns]
['Setosa' 'Versicolor' 'Virginica']
Training Data 1: 80.0% Testing Data 1: 20.0%
Training Data 2: 70.0% Testing Data 2: 30.0%
Training Data 3: 60.0%

#**CONFUSION MATRIX**

In [6]:
def compute_confusion_matrix(actual_labels, predicted_labels, num_classes):
    # Initialize a num_classes x num_classes matrix with zeros
    confusion_matrix = [[0] * num_classes for _ in range(num_classes)]

    # Iterate over actual and predicted labels to populate the confusion matrix
    for actual, predicted in zip(actual_labels, predicted_labels):
        # Increment the corresponding cell in the matrix:
        # Row = actual class, Column = predicted class
        confusion_matrix[actual][predicted] += 1

    # Return the computed confusion matrix
    return confusion_matrix

# **MULTICLASS PERCEPTRON ALGORITHM**

In [7]:
def multiclass_perceptron(data, weight_2D_array, learning_rate, epoch, isTesting):
  # Takes all row and all except last [:, :-1], then converts to numpy compatible array
  data_numpy = data.iloc[:, :-1].to_numpy()

  # Each unqiue data in 'variety' are represented sequentially from 0
  # Takes all rows and ONLY the last column [:, -1], then converts to numpy compatible array
  result_each_indexes = data.iloc[:, -1].to_numpy()
  # Gets an array of the labels of each uniques and an array of the unique values mapped to their respective indexes
  unique_labels, results_given_int = np.unique(result_each_indexes, return_inverse=True)
  # Shows which unqiue values are assigned to which index
  label_to_index = {label: idx for idx, label in enumerate(unique_labels)}

  print("Label Mapping:", label_to_index)
  print("Converted Indexes:", results_given_int)

  # Copy the original weight so that it can be updated
  updated_weights = weight_2D_array.copy()

  # Placeholders
  best_weights = updated_weights.copy()
  best_accuracy = 0.0
  best_epoch = 0
  confusion_matrix = []

  # In case of testing is True, execute testing block of code only
  if isTesting:
    # Uses dot matrix calculation as well as tansposing the updated_weights (turning the features into rows and classes in columns)
    weighted_sums_testing = np.dot(data_numpy, updated_weights.T)
    # Because classes are now rows, take the biggest one from each row
    results_calculated_testing = np.argmax(weighted_sums_testing, axis=1)

    # Gives amount of correct predictions
    correct_predictions_testing = np.sum(results_calculated_testing == results_given_int)
    # Gives accuracy in percentage
    accuracy_testing = (correct_predictions_testing / len(data_numpy) * 100)
    # Get confusion matrix
    confusion_matrix = compute_confusion_matrix(results_given_int, results_calculated_testing, len(unique_labels))

    print(f'Testing Accuracy: {accuracy_testing}%')
    print(f'Testing Results: {results_calculated_testing}')
    print(f'Compared To: {results_given_int}')

    return accuracy_testing, confusion_matrix


  else:
    iteration = 0
    success = False
    while (iteration < epoch):
      # Uses dot matrix calculation as well as tansposing the updated_weights (turning the features into rows and classes in columns)
      weighted_sums = np.dot(data_numpy, updated_weights.T)
      # Because classes are now rows, take the biggest one from each row
      results_calculated = np.argmax(weighted_sums, axis=1)

      # Gives amount of correct predictions
      correct_predictions = np.sum(results_calculated == results_given_int)
      # Gives accuracy in floating
      accuracy = correct_predictions / len(data_numpy)

      # Track best accuracy and corresponding weights, epoch, and confusion matrix
      if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_weights = updated_weights.copy()
        best_epoch = iteration + 1
        confusion_matrix = compute_confusion_matrix(results_given_int, results_calculated, len(unique_labels))

      # Show comparison between correct predictions and total data
      print(f'Epoch {iteration + 1}: {correct_predictions}/{len(data_numpy)} samples correctly classified.')

      # If 100% correct, terminate early
      if correct_predictions == len(data_numpy):
        print(f'Converged at epoch {iteration + 1}')
        success = True
        break

      # Iterate over both the predicted results and expected results
      for index, (result_calculated, result_given_int) in enumerate(zip(results_calculated, results_given_int)):
        # If misclassified, update weights
        if result_calculated != result_given_int:
            # Increases weights of expected class
            updated_weights[result_given_int] += learning_rate * data_numpy[index]
            # Decreases weights of expected class
            updated_weights[result_calculated] -= learning_rate * data_numpy[index]

      iteration += 1

    # Show that it did not converged given the epoch amount
    if not success:
      print(f'Given {epoch} epoch, weights did not converge: {updated_weights}')

    # Show the best accuracy in percentage, weights, epoch and return them
    print(f'Best accuracy: {best_accuracy * 100:.2f}%')
    print(f'Best weights:\n{best_weights}')
    print(f'Best weight achieved at epoch {best_epoch}')
    return best_weights, (best_accuracy * 100), best_epoch, confusion_matrix


#**DISPLAY METHODS**

In [8]:
def display_confusion_matrix(conf_matrices):
    # Extract all unique split cases (e.g., "80/20", "70/30", "60/40") from conf_matrices
    split_cases = set(s for s, _, _, _, _ in conf_matrices)

    # Iterate through each unique split in sorted order for better readability
    for split_name in sorted(split_cases):
        # Display header for each split
        print(f"\n{'='*80}\nSPLIT: {split_name}\n{'='*80}")

        # Extract unique (weight initialization, learning rate) pairs for the current split
        for weight_name, lr in set((w, l) for s, w, l, _, _ in conf_matrices if s == split_name):
            # Placeholders
            train_matrix = None
            test_matrix = None

            # Iterate through all confusion matrix entries to find the ones that match current conditions
            for s, w, l, m_type, matrix in conf_matrices:
                # Match the current split, weight, and learning rate
                if s == split_name and w == weight_name and l == lr:
                    if m_type == "Training":
                        # Store the training confusion matrix
                        train_matrix = matrix
                    elif m_type == "Testing":
                        # Store the testing confusion matrix
                        test_matrix = matrix

            # Ensure both training and testing matrices are available before displaying them
            if train_matrix is not None and test_matrix is not None:
                # Convert the training confusion matrix into a pandas dataframe for better formatting
                train_df = pd.DataFrame(train_matrix,
                                        index=[f"Actual {i}" for i in range(len(train_matrix))],
                                        columns=[f"Pred {i}" for i in range(len(train_matrix[0]))])

                # Convert the testing confusion matrix into a pandas dataframe for better formatting
                test_df = pd.DataFrame(test_matrix,
                                       index=[f"Actual {i}" for i in range(len(test_matrix))],
                                       columns=[f"Pred {i}" for i in range(len(test_matrix[0]))])

                # Concatenate training and testing matrices side by side
                combined_df = pd.concat([train_df, test_df], axis=1, keys=["Training", "Testing"])

                # Display results in a structured format
                print(f"\nInit Weights: {weight_name}, Learning Rate: {lr}")
                print(f'-----------------------------------------------------------------------')
                print(combined_df.to_string())  # Convert dataframe to string for display


def display_results(results):
    # Create a pandas dataframe from the results list for structured display
    df = pd.DataFrame(results, columns=["Split", "Init Weights", "Learning Rate", "Train Accuracy", "Epochs", "Test Accuracy"])
    # Print the dataframe without index for a cleaner look
    print(df.to_string(index=False))

def display_weights(results):
    # Create a pandas dataframe from the results list for structured display
    df = pd.DataFrame(results, columns=["Split", "Init Weights", "Learning Rate", "Best_Weights"])
    # Print the dataframe without index for a cleaner look
    print(df.to_string(index=False))

#**EXECUTION**

In [10]:
# Set random seed for reproducibility
np.random.seed(42)

# Define parameters
num_features = 4  # Number of input features
num_classes = 3   # Number of possible output classes

# Initialize weights
# weight_2D_range: Randomly initialized weights with values in range [-0.5, 0.5]
weight_2D_range = np.random.uniform(low=-0.5, high=0.5, size=(num_classes, num_features))

# weight_2D_zeros: Weights initialized to zero
weight_2D_zeros = np.zeros((num_classes, num_features))

# Store experiment results
experiment_results = []  # List to store training/testing accuracy and epochs
confusion_matrices = []  # List to store confusion matrices for training and testing
best_weights = []        # List to store the best trained weights

# Define data splits (train/test)
# Each split consists of a training set and a corresponding testing set
splits = [("80/20", training_data_80, testing_data_20),
          ("70/30", training_data_70, testing_data_30),
          ("60/40", training_data_60, testing_data_40)]

# Define learning rates
learning_rates = [0.1, 0.01]

# Initial weight configurations:
# "Zeros" -> Initialize perceptron with zero weights
# "Range" -> Initialize perceptron with random weights from the range (-0.5, 0.5)
initial_weights = [("Zeros", weight_2D_zeros), ("Range", weight_2D_range)]

# Run experiments for each data split, weight initialization, and learning rate
for split_name, train_data, test_data in splits:
    for weight_name, weight_matrix in initial_weights:
        for lr in learning_rates:
            # Train the perceptron on the training data
            # Returns trained weights, training accuracy, epochs used, and training confusion matrix
            trained_weights, training_accuracy, epochs, confusion_matrix_training = multiclass_perceptron(train_data, weight_matrix, lr, 1000, False)

            # Test the perceptron on the testing data
            # Returns testing accuracy and testing confusion matrix
            testing_accuracy, confusion_matrix_testing = multiclass_perceptron(test_data, trained_weights, None, None, True)

            # Store trained weights for future analysis
            best_weights.append((split_name, weight_name, lr, trained_weights))

            # Store experiment results including training and testing accuracy
            experiment_results.append([split_name, weight_name, lr, training_accuracy, epochs, testing_accuracy])

            # Store confusion matrices for both training and testing
            confusion_matrices.append((split_name, weight_name, lr, "Training", confusion_matrix_training))
            confusion_matrices.append((split_name, weight_name, lr, "Testing", confusion_matrix_testing))

# Display summary of experimental results
display_results(experiment_results)

# Display the best-trained weights for each configuration
display_weights(best_weights)

# Display all confusion matrices (Training & Testing) for each configuration
print("\nConfusion Matrices:")
display_confusion_matrix(confusion_matrices)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 238: 103/105 samples correctly classified.
Epoch 239: 103/105 samples correctly classified.
Epoch 240: 103/105 samples correctly classified.
Epoch 241: 103/105 samples correctly classified.
Epoch 242: 103/105 samples correctly classified.
Epoch 243: 102/105 samples correctly classified.
Epoch 244: 98/105 samples correctly classified.
Epoch 245: 100/105 samples correctly classified.
Epoch 246: 103/105 samples correctly classified.
Epoch 247: 103/105 samples correctly classified.
Epoch 248: 103/105 samples correctly classified.
Epoch 249: 103/105 samples correctly classified.
Epoch 250: 103/105 samples correctly classified.
Epoch 251: 103/105 samples correctly classified.
Epoch 252: 103/105 samples correctly classified.
Epoch 253: 103/105 samples correctly classified.
Epoch 254: 103/105 samples correctly classified.
Epoch 255: 102/105 samples correctly classified.
Epoch 256: 98/105 samples correctly classified.
Epoch 