<a href="https://colab.research.google.com/github/dagnybrand/music-genre-network-project/blob/main/Semester_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# import libraries
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

import torch
import torchvision
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR100, CIFAR10
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision.transforms.functional import resize
from torchvision.transforms import CenterCrop
from torchvision.transforms import ToTensor
from torchvision.io import read_image
from torchsummary import summary
from tqdm import tqdm
import numpy as np
import pandas as pd

import cv2
from sklearn import preprocessing

In [3]:
# mount drive
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [64]:
# design network
# based on the CNN explainer https://poloclub.github.io/cnn-explainer/
class CNN(nn.Module):
  def __init__(self, numChannels = 3, numClasses = 10):
    super(CNN, self).__init__()
    self.classes = numClasses

    self.conv1 = nn.Conv2d(in_channels = numChannels, out_channels=64, kernel_size=(7, 7), stride = (6, 6))
    self.conv2 = nn.Conv2d(in_channels = 64, out_channels=128, kernel_size=(3, 3), stride = (1, 1))
    self.conv4 = nn.Conv2d(in_channels = 128, out_channels=64, kernel_size=(3, 3), stride = (1, 1))

    self.relu = nn.ReLU()

    self.maxpool = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))

    self.fc1 = nn.Linear(in_features=10240, out_features=1024)
    self.fc2 = nn.Linear(in_features=1024, out_features=10)


  # this evaluate function was taken from our Class Practicals
  def evaluate(self, model, dataloader, classes):

    # We need to switch the model into the evaluation mode
    model.eval()

    # Prepare to count predictions for each class
    correct_pred = {classname: 0 for classname in classes}
    total_pred = {classname: 0 for classname in classes}

    # For all test data samples:
    for data in dataloader:
        images, labels = data
        outputs = model(images)
        _, predictions = torch.max(outputs, 1)

        # Count the correct predictions for each class
        for label, prediction in zip(labels, predictions):

          # If you want to see real and predicted labels for all samples:
          # print("Real class: " + classes[label] + ", predicted = " + classes[prediction])

          if label == prediction:
            correct_pred[classes[label]] += 1
          total_pred[classes[label]] += 1

    # Calculate the overall accuracy on the test set
    acc = sum(correct_pred.values()) / sum(total_pred.values())

    return acc


  def forward(self, x):
     #x = resize(x, size=[256])

     x = self.conv1(x)
     x = self.relu(x)

     x = self.conv2(x)
     x = self.relu(x)
     x = self.maxpool(x)

     x = self.conv4(x)
     x = self.relu(x)
     x = self.maxpool(x)

     x = torch.flatten(x, start_dim=1)
     x = self.fc1(x)
     x = self.relu(x)
     x = self.fc2(x)

     return x

In [25]:
class CustomImageDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file).drop(columns='Unnamed: 0')
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

        self.le = preprocessing.LabelEncoder()
        self.le.fit(['Blues', 'Classical', 'Country', 'Disco', 'Hip Hop', 'Jazz', 'Metal', 'Pop', 'Reggae', 'Rock'])

        self.img_labels['Genre'] = self.le.transform(self.img_labels['Genre'])


    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = self.img_dir + self.le.inverse_transform([self.img_labels.iloc[idx, 1]])[0].lower().replace(" ", "") + '/' + self.img_labels.iloc[idx, 0]
        image = cv2.imread(img_path)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

In [61]:
epochs = 50
batch_size = 80

train_path = '/content/drive/MyDrive/Neural Networks/Semester Project/Project Data/train_data.csv'
validate_path = '/content/drive/MyDrive/Neural Networks/Semester Project/Project Data/validate_data.csv'

img_dir = '/content/drive/MyDrive/Neural Networks/Semester Project/Project Data/'

train_data = CustomImageDataset(train_path, img_dir, transform = ToTensor())
validate_data = CustomImageDataset(validate_path, img_dir, transform = ToTensor())

# Prepare data loaders for train, validation and test data splits
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, drop_last=False, num_workers=2)
val_loader = DataLoader(validate_data, batch_size=batch_size, shuffle=True, drop_last=False, num_workers=2)

In [None]:
my_starting_weights = "/content/drive/MyDrive/Neural Networks/Semester Project/first_model.pth"

if __name__ == '__main__':
    mode = 'train'

    # Path where you plan to save the best model during training
    my_best_model = "/content/drive/MyDrive/Neural Networks/Semester Project/first_model_new.pth"
    classes = ['Blues', 'Classical', 'Country', 'Disco', 'Hip Hop', 'Jazz', 'Metal', 'Pop', 'Reggae', 'Rock']


    # Initialize the model and print out its configuration
    model = CNN(numChannels = 3, numClasses = 10)

    print("\n\nModel summary:\n\n")
    summary(model, input_size=(3, 288, 432))

    if mode == "train":

        print("\n\nTraining starts!\n\n")

        model.train()
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        #optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

        #if my_starting_weights:
            #print(f"Loading the weights from {my_starting_weights} ...")
            #model.load_state_dict(torch.load(my_starting_weights))
            #print("Successfully loaded the model checkpoint!")

        running_loss = .0
        best_acc = .0
        for epoch in range(epochs):
            print(f"Starting epoch {epoch + 1}")
            for idx, data in tqdm(enumerate(train_loader), total=len(train_loader)):

                # Get the inputs (data is a list of [inputs, labels])
                inputs, labels = data
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                running_loss += loss

            # Evaluate the accuracy after each epoch
            acc = model.evaluate(model, val_loader, classes)
            if acc > best_acc:
                print(f"Better validation accuracy achieved: {acc * 100:.2f}%")
                best_acc = acc
                print(f"Saving this model as: {my_best_model}")
                torch.save(model.state_dict(), my_best_model)

    # And here we evaluate the trained model with the test data
    elif mode == "eval":

        print("\n\nValidating the trained model:")
        print(f"Loading checkpoint from {my_best_model}")
        model.load_state_dict(torch.load(my_best_model))
        acc = model.evaluate(model, test_loader, classes, device)
        print(f"Accuracy on the test (unknown) data: {acc * 100:.2f}%")

    else:
        print("'mode' argument should either be 'train' or 'eval'")



Model summary:


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 47, 71]           9,472
              ReLU-2           [-1, 64, 47, 71]               0
            Conv2d-3          [-1, 128, 45, 69]          73,856
              ReLU-4          [-1, 128, 45, 69]               0
         MaxPool2d-5          [-1, 128, 22, 34]               0
            Conv2d-6           [-1, 64, 20, 32]          73,792
              ReLU-7           [-1, 64, 20, 32]               0
         MaxPool2d-8           [-1, 64, 10, 16]               0
            Linear-9                 [-1, 1024]      10,486,784
             ReLU-10                 [-1, 1024]               0
           Linear-11                   [-1, 10]          10,250
Total params: 10,654,154
Trainable params: 10,654,154
Non-trainable params: 0
----------------------------------------------------------------
Input

100%|██████████| 9/9 [00:44<00:00,  4.95s/it]


Better validation accuracy achieved: 16.00%
Saving this model as: /content/drive/MyDrive/Neural Networks/Semester Project/first_model_new.pth
Starting epoch 2


100%|██████████| 9/9 [00:44<00:00,  4.95s/it]


Better validation accuracy achieved: 16.67%
Saving this model as: /content/drive/MyDrive/Neural Networks/Semester Project/first_model_new.pth
Starting epoch 3


100%|██████████| 9/9 [00:44<00:00,  4.89s/it]


Better validation accuracy achieved: 17.00%
Saving this model as: /content/drive/MyDrive/Neural Networks/Semester Project/first_model_new.pth
Starting epoch 4


100%|██████████| 9/9 [00:42<00:00,  4.77s/it]


Better validation accuracy achieved: 25.00%
Saving this model as: /content/drive/MyDrive/Neural Networks/Semester Project/first_model_new.pth
Starting epoch 5


100%|██████████| 9/9 [00:43<00:00,  4.80s/it]


Better validation accuracy achieved: 29.67%
Saving this model as: /content/drive/MyDrive/Neural Networks/Semester Project/first_model_new.pth
Starting epoch 6


100%|██████████| 9/9 [00:42<00:00,  4.77s/it]


Starting epoch 7


100%|██████████| 9/9 [00:44<00:00,  4.89s/it]


Better validation accuracy achieved: 32.33%
Saving this model as: /content/drive/MyDrive/Neural Networks/Semester Project/first_model_new.pth
Starting epoch 8


100%|██████████| 9/9 [00:43<00:00,  4.88s/it]


Better validation accuracy achieved: 41.00%
Saving this model as: /content/drive/MyDrive/Neural Networks/Semester Project/first_model_new.pth
Starting epoch 9


100%|██████████| 9/9 [00:43<00:00,  4.82s/it]


Starting epoch 10


100%|██████████| 9/9 [00:42<00:00,  4.74s/it]


Better validation accuracy achieved: 41.67%
Saving this model as: /content/drive/MyDrive/Neural Networks/Semester Project/first_model_new.pth
Starting epoch 11


100%|██████████| 9/9 [00:43<00:00,  4.79s/it]


Better validation accuracy achieved: 44.00%
Saving this model as: /content/drive/MyDrive/Neural Networks/Semester Project/first_model_new.pth
Starting epoch 12


100%|██████████| 9/9 [00:43<00:00,  4.82s/it]


Better validation accuracy achieved: 46.00%
Saving this model as: /content/drive/MyDrive/Neural Networks/Semester Project/first_model_new.pth
Starting epoch 13


100%|██████████| 9/9 [00:43<00:00,  4.84s/it]


Starting epoch 14


100%|██████████| 9/9 [00:43<00:00,  4.83s/it]


Better validation accuracy achieved: 48.00%
Saving this model as: /content/drive/MyDrive/Neural Networks/Semester Project/first_model_new.pth
Starting epoch 15


100%|██████████| 9/9 [00:44<00:00,  4.93s/it]


Better validation accuracy achieved: 53.33%
Saving this model as: /content/drive/MyDrive/Neural Networks/Semester Project/first_model_new.pth
Starting epoch 16


100%|██████████| 9/9 [00:43<00:00,  4.85s/it]


Better validation accuracy achieved: 55.00%
Saving this model as: /content/drive/MyDrive/Neural Networks/Semester Project/first_model_new.pth
Starting epoch 17


100%|██████████| 9/9 [00:43<00:00,  4.83s/it]


Starting epoch 18


100%|██████████| 9/9 [00:42<00:00,  4.69s/it]


Better validation accuracy achieved: 59.00%
Saving this model as: /content/drive/MyDrive/Neural Networks/Semester Project/first_model_new.pth
Starting epoch 19


100%|██████████| 9/9 [00:42<00:00,  4.76s/it]


Starting epoch 20


100%|██████████| 9/9 [00:42<00:00,  4.70s/it]


Starting epoch 21


100%|██████████| 9/9 [00:43<00:00,  4.81s/it]


Starting epoch 22


100%|██████████| 9/9 [00:45<00:00,  5.01s/it]


Starting epoch 23


100%|██████████| 9/9 [00:43<00:00,  4.80s/it]


Starting epoch 24


100%|██████████| 9/9 [00:42<00:00,  4.76s/it]


Starting epoch 25


100%|██████████| 9/9 [00:43<00:00,  4.81s/it]


Starting epoch 26


100%|██████████| 9/9 [00:42<00:00,  4.76s/it]


Starting epoch 27


100%|██████████| 9/9 [00:43<00:00,  4.79s/it]


Starting epoch 28


100%|██████████| 9/9 [00:43<00:00,  4.89s/it]


Better validation accuracy achieved: 59.67%
Saving this model as: /content/drive/MyDrive/Neural Networks/Semester Project/first_model_new.pth
Starting epoch 29


100%|██████████| 9/9 [00:44<00:00,  4.91s/it]


Starting epoch 30


100%|██████████| 9/9 [00:43<00:00,  4.82s/it]


Starting epoch 31


100%|██████████| 9/9 [00:42<00:00,  4.77s/it]


Starting epoch 32


100%|██████████| 9/9 [00:43<00:00,  4.80s/it]


Starting epoch 33


100%|██████████| 9/9 [00:43<00:00,  4.78s/it]


Starting epoch 34


100%|██████████| 9/9 [00:43<00:00,  4.79s/it]


Starting epoch 35


100%|██████████| 9/9 [00:43<00:00,  4.83s/it]


Starting epoch 36


100%|██████████| 9/9 [00:43<00:00,  4.87s/it]


Starting epoch 37


100%|██████████| 9/9 [00:44<00:00,  4.97s/it]


Starting epoch 38


100%|██████████| 9/9 [00:44<00:00,  4.92s/it]


Starting epoch 39


100%|██████████| 9/9 [00:43<00:00,  4.86s/it]


Starting epoch 40


100%|██████████| 9/9 [00:43<00:00,  4.80s/it]


Starting epoch 41


100%|██████████| 9/9 [00:43<00:00,  4.84s/it]


Starting epoch 42


100%|██████████| 9/9 [00:43<00:00,  4.78s/it]


Starting epoch 43


100%|██████████| 9/9 [00:43<00:00,  4.79s/it]


Starting epoch 44


100%|██████████| 9/9 [00:43<00:00,  4.79s/it]


Starting epoch 45


100%|██████████| 9/9 [00:43<00:00,  4.88s/it]


Starting epoch 46


100%|██████████| 9/9 [00:43<00:00,  4.82s/it]


Starting epoch 47


100%|██████████| 9/9 [00:43<00:00,  4.80s/it]


Starting epoch 48


100%|██████████| 9/9 [00:43<00:00,  4.85s/it]


Starting epoch 49


100%|██████████| 9/9 [00:43<00:00,  4.86s/it]


Starting epoch 50


100%|██████████| 9/9 [00:43<00:00,  4.83s/it]


In [65]:
model = CNN(numChannels = 3, numClasses = 10)
best_model = my_best_model = "/content/drive/MyDrive/Neural Networks/Semester Project/first_model_new.pth"
model.load_state_dict(torch.load(best_model))
classes = ['Blues', 'Classical', 'Country', 'Disco', 'Hip Hop', 'Jazz', 'Metal', 'Pop', 'Reggae', 'Rock']


train_acc = model.evaluate(model, train_loader, classes)
val_acc = model.evaluate(model, val_loader, classes)

print(f"Accuracy when testing on training data: {train_acc*100}%")
print(f"Accuracy when testing on validation data: {val_acc*100}%")

Accuracy when testing on training data: 85.12160228898426%
Accuracy when testing on validation data: 59.66666666666667%
