### Date Preproscessing

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from glob import glob
import librosa
import librosa.display
import IPython.display as ipd

In [2]:
metadata = pd.read_csv('musicnet_metadata.csv')
train_data_files = glob('musicnet/musicnet/train_data/*.wav')
test_data_files = glob('musicnet/musicnet/test_data/*.wav')

In [3]:
def wav_to_mel_spec(path):
  y, sr = librosa.load(path)
  S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=512)
  return np.abs(librosa.amplitude_to_db(S, ref=np.max))

In [4]:
train_data = [wav_to_mel_spec(path) for path in train_data_files]
test_data = [wav_to_mel_spec(path) for path in test_data_files]

In [6]:
train_data_ids = [int(path[-8:-4]) for path in train_data_files]
test_data_ids = [int(path[-8:-4]) for path in test_data_files]

train_labels = [metadata[metadata['id'] == i]['ensemble'].values[0] for i in train_data_ids]
test_labels = [metadata[metadata['id'] == i]['ensemble'].values[0] for i in test_data_ids]

In [7]:
labels_to_nums = {label: i for i, label in enumerate(set(train_labels))}
nums_to_labels = {i: label for label, i in labels_to_nums.items()}

### Model

In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [62]:
x_train = np.array([x[:, :1024].reshape(1, 512, 1024) for x in train_data])
y_train = np.array([labels_to_nums[label] for label in train_labels])

x_test = np.array([x[:, :1024].reshape(1, 512, 1024) for x in test_data])
y_test = np.array([labels_to_nums[label] for label in test_labels])

x_train_tensor = torch.from_numpy(x_train).float()
y_train_tensor = torch.from_numpy(y_train).long()

x_test_tensor = torch.from_numpy(x_test).float()
y_test_tensor = torch.from_numpy(y_test).long()

train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
test_dataset = TensorDataset(x_test_tensor, y_test_tensor)

In [63]:
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [64]:
class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    self.conv1 = nn.Conv2d(1, 32, 3)
    self.pool = nn.MaxPool2d(2, 2)
    self.conv2 = nn.Conv2d(32, 16, 3)
    self.conv3 = nn.Conv2d(16, 8, 3)
    self.fc1 = nn.Linear(62496, 128)
    self.fc2 = nn.Linear(128, 21)

  def forward(self, x):
    x = self.pool(F.relu(self.conv1(x)))
    x = self.pool(F.relu(self.conv2(x)))
    x = self.pool(F.relu(self.conv3(x)))
    x = torch.flatten(x, 1)
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    return x

In [65]:
net = Net()
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

In [66]:
def compute_metrics(data_loader):
  loss_sum = 0.0
  num_correct = 0
  num_total = 0
  
  net.eval()
  with torch.no_grad():
    for inputs, labels in data_loader:
      outputs = net(inputs)
      loss = loss_fn(outputs, labels)
      loss_sum += loss.item()
      _, predicted = torch.max(outputs, 1)
      num_correct += (predicted == labels).sum().item()
      num_total += labels.size(0)

  return loss_sum / len(data_loader), num_correct / num_total

In [67]:
num_epochs = 20

for epoch in range(num_epochs):
  net.train()
  for inputs, labels in train_loader:
    optimizer.zero_grad()
    outputs = net(inputs)
    loss = loss_fn(outputs, labels)
    loss.backward()
    optimizer.step()

  net.eval()
  with torch.no_grad():
    train_loss = 0.0
    correct_train = 0
    total_train = 0
    for inputs, labels in train_loader:
      outputs = net(inputs)
      loss = loss_fn(outputs, labels)
      train_loss += loss.item()
      _, predicted = torch.max(outputs, 1)
      total_train += labels.size(0)
      correct_train += (predicted == labels).sum().item()

    test_loss = 0.0
    correct_test = 0
    total_test = 0
    for inputs, labels in test_loader:
      outputs = net(inputs)
      loss = loss_fn(outputs, labels)
      test_loss += loss.item()
      _, predicted = torch.max(outputs, 1)
      total_test += labels.size(0)
      correct_test += (predicted == labels).sum().item()

    avg_train_loss = train_loss / len(train_loader)
    train_acc = correct_train / total_train
    print('epoch:', epoch + 1, 'train loss: {:.4f}, train accuracy: {:.4f}'.format(avg_train_loss, train_acc))

    avg_test_loss = test_loss / len(test_loader)
    test_acc = correct_test / total_test
    print('epoch:', epoch + 1, 'test loss: {:.4f}, test accuracy: {:.4f}'.format(avg_test_loss, test_acc))

epoch: 1 train loss: 2.9938, train accuracy: 0.4031
epoch: 1 test loss: 3.0313, test accuracy: 0.0000
epoch: 2 train loss: 2.7395, train accuracy: 0.5250
epoch: 2 test loss: 2.8064, test accuracy: 0.3000
epoch: 3 train loss: 1.9712, train accuracy: 0.5469
epoch: 3 test loss: 2.4754, test accuracy: 0.4000
epoch: 4 train loss: 1.9434, train accuracy: 0.6375
epoch: 4 test loss: 2.3217, test accuracy: 0.4000
epoch: 5 train loss: 2.9351, train accuracy: 0.4781
epoch: 5 test loss: 2.9800, test accuracy: 0.3000
epoch: 6 train loss: 2.9437, train accuracy: 0.4781
epoch: 6 test loss: 2.9742, test accuracy: 0.3000
epoch: 7 train loss: 2.3648, train accuracy: 0.4781
epoch: 7 test loss: 2.7332, test accuracy: 0.3000
epoch: 8 train loss: 1.7140, train accuracy: 0.6344
epoch: 8 test loss: 2.4362, test accuracy: 0.4000
epoch: 9 train loss: 1.4123, train accuracy: 0.6719
epoch: 9 test loss: 1.8260, test accuracy: 0.6000
epoch: 10 train loss: 1.0764, train accuracy: 0.7406
epoch: 10 test loss: 2.3628, 