# Import Packages

In [24]:
# Introduction to Neural Networks (CSE 40868/60868)
# University of Notre Dame, Spring 2023
# Final Project Portion 3: Multi Layer Perceptron (MLP) for Genre Classification
# Based upon MLP used in Practical 1 (Thomas Summe, Zheng Ning, Adam Czajka, February 2023)
# _________________________________________________________________________
# Christine Van Kirk, Mia Manabat, Camille Knott (April 2023)

import torch
import pandas as pd
import torch.nn as nn
import numpy as np
import argparse
import math

from torch.utils.data import Dataset, random_split, DataLoader
from sklearn.preprocessing import LabelEncoder


# Step 1: Build PyTorch Dataset for Genre Data

In [25]:
# dataset class
class GenreData(Dataset):
    
    # constructor for Pytorch dataset class
    def __init__(self, path):

        # read dataset from path
        data = pd.read_csv(path, header=0)
        
        # instantiate label encoder
        le = LabelEncoder()

        # numberize the attributed features
        for col in data.columns:
            data[col] = le.fit(data[col]).transform(data[col])
        self.data = data

    # returns the number of samples in our dataset
    def __len__(self):
        return len(self.data)

    # loads and returns a sample from the dataset at the given index "item"
    def __getitem__(self, item):
        x = self.data.drop(['genre'], axis=1).values
        x = torch.tensor(x).float()

        # normalize the data
        feat = (x/torch.max(x))[item, :]
        y = self.data['genre'].values
        label = torch.tensor(y).float().unsqueeze(1)[item, :]

        return feat, label

# Step 2: Specify Network-Related Hyper-Parameters

In [38]:
data_path = "ER_EchoNest_AudioFeatures.csv" # specify location of Genre.csv
input_dim = 8               # equal to number of features describing each Genre
hidden_dim = 90             # number of hidden neurons
output_dim = 1              # number of output neurons
device = 'cpu'              # we will be using CPU in this practical
batch_size = 200            # specify batch size

# Step 3: Define Model Evaluation Function

In [39]:
# evaluates the trained model
def evaluate(model, loader):

    # we need to switch the model into the evaluation mode
    model.eval()

    # create a list to store the prediction results
    res_store = []
    for batch in loader:
        x, y = batch
        
        # make a prediction for a data sample "x"
        pred = model(x)
        pred = (pred > 0.5).float().squeeze(1)
        y = y.squeeze(1)

        # if the prediction is correct, append True; else append False
        res_store += (pred == y).tolist()

    # return the classification accuracy
    acc = sum(res_store)/len(res_store)
    return acc

# Step 4: Building The Multi-Layer Perceptron By Hand

In [40]:
# Multi-layer perceptron (MLP) model class
class MLP(nn.Module):

    # constructor for the MLP model
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(MLP, self).__init__()

        # Define the hidden layer with "hidden_dim" linear neurons 
        # and accepting inputs of size "input_dim"
        # (Use "torch.randn()" Pytorch function to create a matrix of size input_dim x hidden_dim)
        self.W1 = torch.randn(input_dim, hidden_dim)

        # Define the ReLU activation function (we will use it in the hidden layer)
        # (Define ReLU with torch.max and derivative of ReLU using torch.gt and keep constants in a tensor)
        self.ReLU = lambda x : torch.max(x,torch.tensor(0))
        ## self.ReLU = lambda x : torch.max(x)
        self.ReLU_prime = lambda x : torch.gt(x, 0).float()
      
        # Define the output layer with "output_dim" linear neurons
        # and accepting inputs of size "hidden_dim"
        self.W2 = torch.randn(hidden_dim, output_dim)

        # Finally , define the sigmoid activation function 
        # (Define sigmoid with torch.exp and derivative of sigmoid)
        self.sigmoid = lambda x : 1 / (1 + torch.exp(-x))
        self.sigmoid_prime = lambda x : self.sigmoid(x) * (1 - self.sigmoid(x))

    # define the forward procedure for the network
    def forward(self, x):
      
        # Pass the input to the first layer
        self.z1 = torch.matmul(x, self.W1)
        
        # Apply the activation function in this first layer
        self.y1 = self.ReLU(self.z1)
        
        # Pass the output of the first layer to the next (output) layer
        self.z2 = torch.matmul(self.y1, self.W2)

        # Apply the activation function in the output layer
        y_hat = self.sigmoid(self.z2)

        return y_hat

    # define the backward procedure for the network
    def backward(self, X, d_cost_d_y, y_hat):
        
        # d_cost_d_z2 = ... # requires sigmoid prime
        d_cost_d_z2 = d_cost_d_y * self.sigmoid_prime(self.z2)

        # d_cost_d_y1 = ... # requires torch.matmul
        d_cost_d_y1 = torch.matmul(d_cost_d_z2, torch.t(self.W2))

        # d_cost_d_z1 = ... # requires ReLU prime
        d_cost_d_z1 = d_cost_d_y1 * self.ReLU_prime(self.z1)
        
        # d_cost_d_W1 = ... # requires torch.matmul
        d_cost_d_W1 = torch.matmul(torch.t(d_cost_d_z1), X)

        # d_cost_d_W2 = ... # requires torch.matmul
        d_cost_d_W2 = torch.matmul(torch.t(self.y1), d_cost_d_z2)

        self.W1 -= torch.t(d_cost_d_W1)
        self.W2 -= d_cost_d_W2


## Instantiate model and dataset

In [41]:
# Seed the random number generator for all devices (both CPU and CUDA)
torch.manual_seed(0)

# Instantiate the dataset
data = GenreData(data_path)

# Instantiate the MLP model: 22 features (input size), 90 neurons in the hidden layer, and 1 output neuron
# (you may experiment with these numbers to see what happens!)
mlp = MLP(input_dim, hidden_dim, output_dim)

# Here we use torch random_split() function to split the data into training set, validation set and test set 
# e.g., with the following proportions: 0.6 : 0.2 : 0.2; hint: len(data) will give you number of samples in our dataset
# (see https://pytorch.org/docs/stable/data.html?highlight=random_split#torch.utils.data.random_split)
train_set_size = 0.6
val_set_size = 0.2
test_set_size = 0.2
train_set, val_set, test_set = torch.utils.data.random_split(data,[train_set_size,val_set_size,test_set_size])

# Wrap the dataset into Pytorch dataloader to pass samples in "minibatches"
train_dataloader = DataLoader(train_set, batch_size=batch_size, shuffle=True, drop_last=False)
val_dataloader = DataLoader(val_set, batch_size=batch_size, shuffle=True, drop_last=False)
test_dataloader = DataLoader(test_set, batch_size=batch_size, shuffle=False, drop_last=False)


## Run Randomly-Initialized Network

In [42]:
acc = evaluate(mlp, test_dataloader)
print(f"Test accuracy = {acc}")

Test accuracy = 0.48375


# Step 5: Training The Multi-Layer Perceptron

## Specify Training-Related Hyper-Parameters

In [44]:
save = "best_model"
epochs = 20
learning_rate = 0.01

## Define Loss Function And Its Derivative

In [45]:
def MSE(y_hat, y_star):
    return torch.mean(torch.square(torch.sub(y_hat,y_star)))

def MSE_prime(y_hat, y_star):
    return 2 * torch.sub(y_hat, y_star)

## Run Training Loop

In [46]:
acc_best = 0.0
for epoch in range(epochs):
    mlp.train()
    print(f"epoch:{epoch}")

    # iterate batches in dataloader
    for batch in train_dataloader:

        x, y_star = batch

        y = mlp(x)
        cost = MSE(y,y_star) # Calculate cost
        d_cost_d_y = MSE_prime(y,y_star)*learning_rate # calculate output gradient and multiply by learning rate

        # performs a single optimization step (weights update)
        mlp.backward(x,d_cost_d_y,y)

    # evaluate the model 
    acc = evaluate(mlp, val_dataloader)

    if acc > acc_best and save:
        torch.save(mlp.W1, save + "_W1")
        torch.save(mlp.W2, save + "_W2")
    
    # if (epoch+1) % 5 == 0: <- use this if you want to print the validation accuracy every 5 epochs
    print(f"Epoch: #{epoch+1}: validation accuracy = {acc*100:.2f}%; loss={cost}")

epoch:0
Epoch: #1: validation accuracy = 49.88%; loss=0.5042392611503601
epoch:1
Epoch: #2: validation accuracy = 54.50%; loss=0.47956496477127075
epoch:2
Epoch: #3: validation accuracy = 76.12%; loss=0.23532545566558838
epoch:3
Epoch: #4: validation accuracy = 76.12%; loss=0.2285148948431015
epoch:4
Epoch: #5: validation accuracy = 73.88%; loss=0.17348459362983704
epoch:5
Epoch: #6: validation accuracy = 77.25%; loss=0.17446598410606384
epoch:6
Epoch: #7: validation accuracy = 76.50%; loss=0.19375532865524292
epoch:7
Epoch: #8: validation accuracy = 77.00%; loss=0.28695279359817505
epoch:8
Epoch: #9: validation accuracy = 77.12%; loss=0.237042635679245
epoch:9
Epoch: #10: validation accuracy = 77.12%; loss=0.1875089406967163
epoch:10
Epoch: #11: validation accuracy = 72.25%; loss=0.23906998336315155
epoch:11
Epoch: #12: validation accuracy = 76.75%; loss=0.24193742871284485
epoch:12
Epoch: #13: validation accuracy = 76.75%; loss=0.2220839560031891
epoch:13
Epoch: #14: validation accur

## Test The Model (with unknown test data)

In [48]:
# Load the model’s weights
mlp.W1 = torch.load(save + "_W1")
mlp.W2 = torch.load(save + "_W2")
acc = evaluate(mlp, test_dataloader)
print(f"Test accuracy = {acc}")

Test accuracy = 0.7575
