In [1]:
import os
import warnings
warnings.filterwarnings(action='ignore')

import numpy as np
import pandas as pd

import torch
import torchaudio
import torchvision
import torch.nn as nn

from tqdm.notebook import tqdm
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn import metrics

from matplotlib import pyplot as plt
import seaborn as sns

import pickle


# Load Data

In [2]:
class AlbumGenreDataset(Dataset):
    def __init__(self, data, group='debug'):
        """
        data (Pandas.DataFrame): ['amazon_id', 'title', 'artist', 'img_path', 'genre']
        group (String): split group e.g. train, valid
        """
        self.data = data
        self.group = group

    @classmethod
    def available_groups(cls):
        return ['train', 'valid', 'test', 'debug']

    def __getitem__(self, index):
        data = self.data.iloc[index]
        
        # 공사중~
        

    def __len__(self):
        return len(self.data)


In [3]:
from torchvision import transforms

In [30]:
trans = transforms.Compose([transforms.Resize((224, 224)),
                            transforms.ToTensor(),
                            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
dataset = torchvision.datasets.ImageFolder(root = './data/MuMu_dataset/album_imgs/', transform = trans)

data_num = len(dataset)
train_valid_num = int(data_num*0.6) + int(data_num*0.2)
train_data, valid_data, test_data = torch.utils.data.random_split(dataset, [int(data_num*0.6), int(data_num*0.2), data_num - train_valid_num])

batch_size = 16
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, drop_last=True)
valid_loader = DataLoader(valid_data, batch_size=batch_size, shuffle=True, drop_last=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True, drop_last=True)

In [44]:
class TestModel(nn.Module):
  def __init__(self, n_class=16):
    super(TestModel, self).__init__()
    self.pre_model = torchvision.models.resnet101(pretrained=True)
    self.linear = nn.Linear(1000, n_class)

  def forward(self, x):
    out = self.pre_model(x)
    out = self.linear(x.squeeze(-1))
    x = nn.Softmax()(x)
    return out

In [45]:
class Runner(object):
  def __init__(self, model, lr, weight_decay, sr, tags):
    """
    Args:
      model (nn.Module): pytorch model
      lr (float): learning rate
      weight_decay (float): weight_decay
      sr (float): stopping rate
      tags (list): tags with index
    """
    self.optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    self.scheduler = ReduceLROnPlateau(self.optimizer, mode='min', factor=0.2, patience=5, verbose=True)
    self.learning_rate = lr
    self.stopping_rate = sr
    self.device = torch.device('cpu')
    self.model = model.to(self.device)
    self.criterion = torch.nn.CrossEntropyLoss().to(self.device)
    self.tags = tags

  # Running model for train, test and validation. mode: 'train' for training, 'eval' for validation and test
  def run(self, dataloader, epoch, mode='TRAIN'):
    self.model.train() if mode is 'TRAIN' else self.model.eval()

    epoch_loss = 0
    pbar = tqdm(dataloader, desc=f'{mode} Epoch {epoch:02}')  # progress bar
    for x, y in pbar:
      # Move mini-batch to the desired device.
      x = x.to(self.device)
      y = y.to(self.device)
      # Feed forward the model.
      prediction = self.model(x)
      # Compute the loss.
      loss = self.criterion(prediction, y)
      if mode is 'TRAIN':
        # Perform backward propagation to compute gradients.
        loss.backward()
        # Update the parameters.
        self.optimizer.step()
        # Reset the computed gradients.
        self.optimizer.zero_grad()

      batch_size = len(x)
      epoch_loss += batch_size * loss.item()
    epoch_loss = epoch_loss / len(dataloader.dataset)
    return epoch_loss

  def test(self, dataloader):
    self.model.eval()
    epoch_loss = 0
    predictions = []
    labels = []
    pbar = tqdm(dataloader, desc=f'TEST')  # progress bar
    for x, y in pbar:
      x = x.to(self.device)
      y = y.to(self.device)
      x = x.transpose(1,0) # pre-batch in audio loader (chunk, 1, waveform)
      prediction = self.model(x)
      prediction = prediction.mean(dim=0, keepdim=True) # average chunk audio
      loss = self.criterion(prediction, y) 
      batch_size = len(x)
      epoch_loss += batch_size * loss.item()
      predictions.extend(prediction.detach().cpu().numpy())
      labels.extend(y.detach().cpu().numpy())
    epoch_loss = epoch_loss / len(test_loader.dataset)
    roc_aucs, tag_wise_rocaucs = self.get_auc(predictions, labels)
    return roc_aucs, epoch_loss, tag_wise_rocaucs

  # Early stopping function for given validation loss, you can use this part!
  def early_stop(self, loss, epoch):
    self.scheduler.step(loss, epoch)
    self.learning_rate = self.optimizer.param_groups[0]['lr']
    stop = self.learning_rate < self.stopping_rate
    return stop

  def get_auc(self, predictions, labels):
    roc_aucs  = metrics.roc_auc_score(labels, predictions, average='macro')
    tag_wise_predictions = np.stack(predictions).T
    tag_wise_labels = np.stack(labels).T
    tag_wise_rocaucs = {}
    for tag, logit, label in zip(self.tags, tag_wise_predictions, tag_wise_labels):
      tag_wise_rocaucs[tag] = metrics.roc_auc_score(label, logit)
    return roc_aucs, tag_wise_rocaucs

In [46]:
# Training setup.
LR = 1e-3  # learning rate
SR = 1e-5  # stopping rate
NUM_EPOCHS = 10
WEIGHT_DECAY = 1e-5  # L2 regularization weight

In [47]:
# Iterate over epochs.
TAGS =  ['Blues', 'Jazz', 'Rock', 'R&B', 'Alternative Rock', 'Latin Music', 'Country',
 'Rap & Hip-Hop', 'Dance & Electronic', 'Reggae', 'Classical', 'Metal', 'Pop',
 'New Age', 'Folk', 'Gospel']
model = TestModel()
runner = Runner(model=model, lr = LR, weight_decay = WEIGHT_DECAY, sr = SR, tags=TAGS)
for epoch in range(NUM_EPOCHS):
  train_loss = runner.run(train_loader, epoch, 'TRAIN')
  valid_loss = runner.run(valid_loader, epoch, 'VALID')
  print("[Epoch %d/%d] [Train Loss: %.4f] [Valid Loss: %.4f]" %
        (epoch + 1, NUM_EPOCHS, train_loss, valid_loss))
  if runner.early_stop(valid_loss, epoch + 1):
    break

TRAIN Epoch 00:   0%|          | 0/84 [00:00<?, ?it/s]

RuntimeError: mat1 and mat2 shapes cannot be multiplied (12288x256 and 1000x16)

In [None]:
torch.cuda.empty_cache()