# Import Packages

In [1]:
# Introduction to Neural Networks (CSE 40868/60868)
# University of Notre Dame, Spring 2023
# Final Project Portion 3: Multi Layer Perceptron (MLP) for Genre Classification
# Based upon MLP used in Practical 1 (Thomas Summe, Zheng Ning, Adam Czajka, February 2023)
# _________________________________________________________________________
# Christine Van Kirk, Mia Manabat, Camille Knott (April 2023)

import torch
import pandas as pd
import torch.nn as nn
import numpy as np
import argparse
import math

from torch.utils.data import Dataset, random_split, DataLoader
from sklearn.preprocessing import LabelEncoder


# Step 1: Build PyTorch Dataset for Genre Data

In [2]:
# dataset class
class GenreData(Dataset):
    
    # constructor for Pytorch dataset class
    def __init__(self, path):

        # read dataset from path
        data = pd.read_csv(path, header=0)
        
        # instantiate label encoder
        le = LabelEncoder()

        # numberize the attributed features
        for col in data.columns:
            data[col] = le.fit(data[col]).transform(data[col])
        self.data = data

    # returns the number of samples in our dataset
    def __len__(self):
        return len(self.data)

    # loads and returns a sample from the dataset at the given index "item"
    def __getitem__(self, item):
        x = self.data.drop(['genre'], axis=1).values
        x = torch.tensor(x).float()

        # normalize the data
        feat = (x/torch.max(x))[item, :]
        y = self.data['genre'].values
        label = torch.tensor(y).float().unsqueeze(1)[item, :]

        return feat, label

# Step 2: Specify Network-Related Hyper-Parameters

In [3]:
data_path = "ER_EchoNest_AudioFeatures.csv" # specify location of Genre.csv
input_dim = 8               # equal to number of features describing each Genre
hidden_dim = 90             # number of hidden neurons
output_dim = 1              # number of output neurons
device = 'cpu'              # we will be using CPU in this practical
batch_size = 200            # specify batch size

# Step 3: Define Model Evaluation Function

In [4]:
# evaluates the trained model
def evaluate(model, loader):

    # we need to switch the model into the evaluation mode
    model.eval()

    # create a list to store the prediction results
    res_store = []
    for batch in loader:
        x, y = batch
        
        # make a prediction for a data sample "x"
        pred = model(x)
        pred = (pred > 0.5).float().squeeze(1)
        y = y.squeeze(1)

        # if the prediction is correct, append True; else append False
        res_store += (pred == y).tolist()

    # return the classification accuracy
    acc = sum(res_store)/len(res_store)
    return acc

# Step 4: Building The Multi-Layer Perceptron By Hand

In [5]:
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.sigmoid(out)
        return out

 

## Instantiate model and dataset

In [44]:
# Seed the random number generator for all devices (both CPU and CUDA)
torch.manual_seed(0)

# Instantiate the dataset
data = GenreData(data_path)

# Instantiate the MLP model: 22 features (input size), 90 neurons in the hidden layer, and 1 output neuron
# (you may experiment with these numbers to see what happens!)
mlp = MLP(input_dim, hidden_dim, output_dim)

# Here we use torch random_split() function to split the data into training set, validation set and test set 
# e.g., with the following proportions: 0.6 : 0.2 : 0.2; hint: len(data) will give you number of samples in our dataset
# (see https://pytorch.org/docs/stable/data.html?highlight=random_split#torch.utils.data.random_split)
train_set_size = 0.6
val_set_size = 0.2
test_set_size = 0.2
train_set, val_set, test_set = torch.utils.data.random_split(data,[train_set_size,val_set_size,test_set_size])

# Wrap the dataset into Pytorch dataloader to pass samples in "minibatches"
train_dataloader = DataLoader(train_set, batch_size=batch_size, shuffle=True, drop_last=False)
val_dataloader = DataLoader(val_set, batch_size=batch_size, shuffle=True, drop_last=False)
test_dataloader = DataLoader(test_set, batch_size=batch_size, shuffle=False, drop_last=False)


## Run Randomly-Initialized Network

In [45]:
acc = evaluate(mlp, test_dataloader)
print(f"Test accuracy = {acc}")

Test accuracy = 0.57125


# Step 5: Training The Multi-Layer Perceptron

## Specify Training-Related Hyper-Parameters

In [46]:
save = "best_model"
epochs = 20
learning_rate = 0.01

## Run Training Loop

In [47]:
import torch.optim as optim

# define the loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.SGD(mlp.parameters(), lr=0.01)

# loop over the dataset for num_epochs epochs
acc_best = 0.0
for epoch in range(epochs):
    print(f"epoch:{epoch}")

    running_loss = 0.0
    for i, data in enumerate(train_dataloader, 0):
        # get the inputs and labels
        inputs, labels = data
        
        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward + backward + optimize
        outputs = mlp(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    # evaluate the model 
    acc = evaluate(mlp, val_dataloader)

    if acc > acc_best and save:
        torch.save(mlp.state_dict(), save + "mlp")

    print(f"Epoch: #{epoch+1}: validation accuracy = {acc*100:.2f}%; loss={loss}")


epoch:0
Epoch: #1: validation accuracy = 56.62%; loss=0.685326099395752
epoch:1
Epoch: #2: validation accuracy = 59.38%; loss=0.6928468346595764
epoch:2
Epoch: #3: validation accuracy = 63.00%; loss=0.6834285259246826
epoch:3
Epoch: #4: validation accuracy = 65.12%; loss=0.6811364889144897
epoch:4
Epoch: #5: validation accuracy = 66.38%; loss=0.6798668503761292
epoch:5
Epoch: #6: validation accuracy = 67.50%; loss=0.6778132915496826
epoch:6
Epoch: #7: validation accuracy = 68.88%; loss=0.6723321676254272
epoch:7
Epoch: #8: validation accuracy = 69.50%; loss=0.6729267835617065
epoch:8
Epoch: #9: validation accuracy = 70.50%; loss=0.6712745428085327
epoch:9
Epoch: #10: validation accuracy = 70.38%; loss=0.674710750579834
epoch:10
Epoch: #11: validation accuracy = 70.50%; loss=0.6611984968185425
epoch:11
Epoch: #12: validation accuracy = 70.38%; loss=0.6659801006317139
epoch:12
Epoch: #13: validation accuracy = 71.12%; loss=0.6688480973243713
epoch:13
Epoch: #14: validation accuracy = 71.

## Test The Model (with unknown test data)

In [49]:
# Load the model’s weights
mlp.load_state_dict(torch.load(save + "mlp"))
acc = evaluate(mlp, test_dataloader)
print(f"Test accuracy = {acc}")

Test accuracy = 0.715
