In [1]:
!nvidia-smi

Sun Apr 24 14:18:15 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   40C    P0    27W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
!unzip /content/gdrive/MyDrive/Kaggle/Pog_Music_Classification/data/mel_spec_44100/mel_spec_test.zip > /dev/null

In [4]:
!pip install -q pytorch_lightning timm torchtoolbox wandb

[?25l[K     |▋                               | 10 kB 31.0 MB/s eta 0:00:01[K     |█▏                              | 20 kB 24.2 MB/s eta 0:00:01[K     |█▊                              | 30 kB 17.4 MB/s eta 0:00:01[K     |██▎                             | 40 kB 15.2 MB/s eta 0:00:01[K     |██▉                             | 51 kB 6.4 MB/s eta 0:00:01[K     |███▍                            | 61 kB 7.6 MB/s eta 0:00:01[K     |████                            | 71 kB 8.0 MB/s eta 0:00:01[K     |████▌                           | 81 kB 8.4 MB/s eta 0:00:01[K     |█████                           | 92 kB 9.3 MB/s eta 0:00:01[K     |█████▋                          | 102 kB 7.7 MB/s eta 0:00:01[K     |██████▏                         | 112 kB 7.7 MB/s eta 0:00:01[K     |██████▊                         | 122 kB 7.7 MB/s eta 0:00:01[K     |███████▎                        | 133 kB 7.7 MB/s eta 0:00:01[K     |███████▉                        | 143 kB 7.7 MB/s eta 0:00:01[K 

In [5]:
!pip install -q albumentations==1.1.0

[K     |████████████████████████████████| 102 kB 10.4 MB/s 
[K     |████████████████████████████████| 47.8 MB 1.7 MB/s 
[?25h

In [6]:
!pip uninstall -y opencv-python-headless==4.5.5.62

Found existing installation: opencv-python-headless 4.5.5.64
Uninstalling opencv-python-headless-4.5.5.64:
  Successfully uninstalled opencv-python-headless-4.5.5.64


In [7]:
!pip install -q opencv-python-headless==4.5.2.52

[K     |████████████████████████████████| 38.2 MB 1.2 MB/s 
[?25h

In [46]:
import torch
import pandas as pd
import numpy as np
import pytorch_lightning as pl
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as transforms
import tqdm
import albumentations as alb
from albumentations.pytorch import ToTensorV2
import sys
import torch.multiprocessing as mp
import os
from tqdm.notebook import tqdm
from IPython.display import display

In [47]:
class TransformationType:
    TORCHVISION = "torchvision"
    ALB = "albumentations"

class Models:
    RESNET34 = "resnet34"
    RESNET50 = "resnet50"
    RESNEXT50 = "resnext50_32x4d"
    EFFNET_B0 = "tf_efficientnet_b0_ns"
    EFFNET_B4 = "tf_efficientnet_b4_ns"

class ImgStats:
    IMAGENET_MEAN = [0.485, 0.456, 0.406]
    IMAGENET_STD = [0.229, 0.224, 0.225]    

class WandbConfig:
    WANDB_KEY = "c5e2877bf080e6b62fcc57231c91e3a1455f97d0"
    WANDB_RUN_NAME = "melspec_resnext50_run5"
    WANDB_PROJECT = "Pog_MusicClf_Train"
    USE_WANDB = True    

class SchedulerConfig:
    SCHEDULER_PATIENCE = 4
    SCHEDULER = 'LinearWithWarmup'
    T_0 = 10 # for CosineAnnealingWarmRestarts
    MIN_LR = 5e-7 # for CosineAnnealingWarmRestarts
    MAX_LR = 1e-2
    STEPS_PER_EPOCH = 0

# CONSTANTS
class Config:
    # whether to use mel spectrograms generated using audio augmentations ( multiple mel spec for one audio)
    USE_MEL_SPEC_AUG = False
    RUNTIME = "COLAB"
    RESUME_FROM_CHKPT = None
    NUM_CLASSES = 19
    BATCH_SIZE = 128
    NUM_FOLDS = 5
    UNFREEZE_EPOCH_NO = 1
    NUM_EPOCHS = 25
    NUM_WORKERS = mp.cpu_count()
    INPUT_IMAGE_SIZE = (224,224)
    IMG_MEAN = ImgStats.IMAGENET_MEAN
    IMG_STD = ImgStats.IMAGENET_STD
    FAST_DEV_RUN = False
    PRECISION = 16    
    PATIENCE = 7    
    SUBSET_ROWS_FRAC = 0.05
    TRAIN_ON_SUBSET = False
    RANDOM_SEED = 42
    MODEL_TO_USE = Models.EFFNET_B4
    PRETRAINED = False    
    FIND_LR = False
    WEIGHT_DECAY = 1e-6
    USE_MIXUP = False
    # Parameter used to sample lambda values from the beta distribution. Recommended value 0.2   
    MIXUP_ALPHA = 0.2    
    DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')    
    # model hyperparameters
    MODEL_PARAMS = {    
        "drop_out": 0.25,
        # learning rate discovered using trainer.tune
        "lr": 0.001,
        "warmup_prop": 0.05
    }

In [48]:
if Config.RUNTIME == "COLAB":
    Config.DATA_ROOT_FOLDER = "/content/gdrive/MyDrive/Kaggle/Pog_Music_Classification/data/"
    Config.IMG_ROOT_FOLDER = "/content/kaggle/processed_test/mel_spec/"
    Config.MODEL_PATH_BASE = Config.DATA_ROOT_FOLDER + "model/CV/EFFNET_B4_44100/"
elif Config.RUNTIME == "KAGGLE":
    Config.DATA_ROOT_FOLDER = "/content/gdrive/MyDrive/Kaggle/Pog_Music_Classification/data/"
    Config.IMG_ROOT_FOLDER = "/content/kaggle/processed_test/mel_spec/"
else:
    Config.DATA_ROOT_FOLDER = "./data/"
    Config.IMG_ROOT_FOLDER = "./data/processed_train/mel_spec/"

In [49]:
# FOLD_BEST_MODELS = [
#     "fold0_resnext50_32x4d_best_model_epoch=10_val_loss=1.4678.ckpt",
#     "fold1_resnext50_32x4d_best_model_epoch=10_val_loss=1.4466.ckpt",
#     "fold2_resnext50_32x4d_best_model_epoch=9_val_loss=1.4566.ckpt",
#     "fold3_resnext50_32x4d_best_model_epoch=13_val_loss=1.4518.ckpt",
#     "fold4_resnext50_32x4d_best_model_epoch=12_val_loss=1.4903.ckpt"
# ]

In [50]:
FOLD_BEST_MODELS = [
    "fold0_tf_efficientnet_b4_ns_best_model_epoch=7_val_loss=1.4609.ckpt",
    "fold1_tf_efficientnet_b4_ns_best_model_epoch=11_val_loss=1.4520.ckpt",
    "fold2_tf_efficientnet_b4_ns_best_model_epoch=9_val_loss=1.4423.ckpt",
    "fold3_tf_efficientnet_b4_ns_best_model_epoch=10_val_loss=1.3980.ckpt",
    "fold4_tf_efficientnet_b4_ns_best_model_epoch=10_val_loss=1.4489.ckpt"
]

In [51]:
class AudioMelSpecImgTestDataset(Dataset):
    def __init__(self, df, file_name_col, img_root_folder, transform=None):
        self.df = df
        self.file_name_col = file_name_col        
        self.img_root_folder = img_root_folder
        self.transform = transform        

    def __getitem__(self, index):        
        file_name_noext = self.df.loc[index, self.file_name_col].split(".")[0]
        img_path = self.img_root_folder + "/" + file_name_noext + ".jpg"
        img = Image.open(img_path)
        img_arr = np.array(img)
        if self.transform is not None:
            augmented = self.transform(image=img_arr)
            img_tfmd = augmented["image"]
        song_id = self.df.loc[index, "song_id"]        
        return song_id, img_tfmd

    def __len__(self):
        return len(self.df)

In [52]:
test_transform = alb.Compose([
        alb.CenterCrop(Config.INPUT_IMAGE_SIZE[0], Config.INPUT_IMAGE_SIZE[1]),
        alb.Normalize(mean=Config.IMG_MEAN, std=Config.IMG_STD),
        ToTensorV2()        
])

In [53]:
df_test = pd.read_csv(Config.DATA_ROOT_FOLDER + "test.csv")
# filter out records without any corresponding mel spectrogram image
df_test["mspec_exists"] = df_test.filename.map(
    lambda fp: os.path.exists(Config.IMG_ROOT_FOLDER + fp.split(".")[0] + ".jpg")
)
df_test_invalid = df_test[~df_test.mspec_exists]
df_test_invalid

Unnamed: 0,song_id,filename,filepath,mspec_exists
3546,22612,022612.ogg,test/022612.ogg,False
4249,24013,024013.ogg,test/024013.ogg,False


In [54]:
df_test = df_test[df_test.mspec_exists].reset_index(drop=True)

In [55]:
ds_test = AudioMelSpecImgTestDataset(
            df_test, 
            file_name_col="filename",        
            img_root_folder=Config.IMG_ROOT_FOLDER, 
            transform=test_transform
        )        

dl_test = DataLoader(ds_test, batch_size=Config.BATCH_SIZE)

In [56]:
from transformers import get_linear_schedule_with_warmup

def get_linear_lr_scheduler(optimizer):
    # Scheduler and math around the number of training steps.    
    num_train_steps = Config.NUM_EPOCHS * SchedulerConfig.STEPS_PER_EPOCH
    num_warmup_steps = int(Config.MODEL_PARAMS["warmup_prop"] * Config.NUM_EPOCHS * SchedulerConfig.STEPS_PER_EPOCH)    
    print(f"num_train_steps = {num_train_steps}")
    print(f"num_warmup_steps = {num_warmup_steps}")
    lr_scheduler = get_linear_schedule_with_warmup(
            optimizer=optimizer,
            num_warmup_steps=num_warmup_steps,
            num_training_steps=num_train_steps
        )
    return lr_scheduler    

In [57]:
from torch.optim.lr_scheduler import CosineAnnealingLR, CosineAnnealingWarmRestarts, ReduceLROnPlateau, OneCycleLR

def get_optimizer(lr, params):
    model_optimizer = torch.optim.Adam(
            filter(lambda p: p.requires_grad, params), 
            lr=lr,
            weight_decay=Config.WEIGHT_DECAY
        )
    interval = "epoch"
    if SchedulerConfig.SCHEDULER == "CosineAnnealingWarmRestarts":
        lr_scheduler = CosineAnnealingWarmRestarts(
                            model_optimizer, 
                            T_0=SchedulerConfig.T_0, 
                            T_mult=1, 
                            eta_min=SchedulerConfig.MIN_LR, 
                            last_epoch=-1
                        )
    elif SchedulerConfig.SCHEDULER == "OneCycleLR":
        lr_scheduler = OneCycleLR(
            optimizer=model_optimizer,
            max_lr=SchedulerConfig.MAX_LR,
            epochs=Config.NUM_EPOCHS,
            steps_per_epoch=SchedulerConfig.STEPS_PER_EPOCH,
            verbose=True
        )
        interval = "step"
    elif SchedulerConfig.SCHEDULER == "CosineAnnealingLR":
        lr_scheduler = CosineAnnealingLR(model_optimizer, eta_min=SchedulerConfig.MIN_LR, T_max=Config.NUM_EPOCHS)
    elif SchedulerConfig.SCHEDULER == "LinearWithWarmup":
        lr_scheduler = get_linear_lr_scheduler(model_optimizer)
        interval = "step"
    else:
        # ReduceLROnPlateau throws an error is parameters are filtered, 
        # refer: https://github.com/PyTorchLightning/pytorch-lightning/issues/8720
        model_optimizer = torch.optim.Adam(
            params, 
            lr=lr,
            weight_decay=Config.WEIGHT_DECAY
        )
        lr_scheduler = ReduceLROnPlateau(
                            model_optimizer, 
                            mode="min",                                                                
                            factor=0.1,
                            patience=SchedulerConfig.SCHEDULER_PATIENCE,
                            min_lr=SchedulerConfig.MIN_LR,                                
                            verbose=True
                        )   
    return {
        "optimizer": model_optimizer, 
        "lr_scheduler": {
            "scheduler": lr_scheduler,
            "interval": interval,
            "monitor": "val_loss",
            "frequency": 1
        }
    }

In [58]:
from torchtoolbox.tools import mixup_data, mixup_criterion
import torch.nn as nn
from torch.nn.functional import cross_entropy
import torchmetrics
import timm

class MusicClfLitModel(pl.LightningModule):
    def __init__(self, num_classes, hparams, model_to_use):
        super().__init__()
        self.save_hyperparameters()
        self.lr = hparams["lr"]
        self.num_classes = num_classes
        self.f1 = torchmetrics.F1Score(num_classes=num_classes)
        # no fine tuning begins
        self.model = timm.create_model(model_to_use, pretrained=Config.PRETRAINED)
        if model_to_use in [Models.RESNET34, Models.RESNET50, Models.RESNEXT50]:
            n_features = self.model.fc.in_features
            self.model.fc = nn.Linear(n_features, num_classes)
        elif model_to_use in [Models.EFFNET_B0, Models.EFFNET_B4]:
            n_features = self.model.classifier.in_features
            self.model.classifier = nn.Linear(n_features, num_classes)
        # no fine tuning ends
        # self.backbone, self.classifier = self.get_backbone_classifier(model_to_use, hparams["drop_out"], num_classes) 

    @staticmethod
    def get_backbone_classifier(model_to_use, drop_out, num_classes):
        pt_model = timm.create_model(model_to_use, pretrained=Config.PRETRAINED)
        backbone = None
        classifier = None
        if model_to_use in [Models.RESNET34, Models.RESNET50, Models.RESNEXT50]:            
            backbone = nn.Sequential(*list(pt_model.children())[:-1])
            in_features = pt_model.fc.in_features
            classifier = nn.Sequential(
                nn.Dropout(drop_out),
                nn.Linear(in_features, num_classes)
            )
        if model_to_use in [Models.EFFNET_B0, Models.EFFNET_B4]:
            backbone = nn.Sequential(*list(pt_model.children())[:-1])
            in_features = pt_model.classifier.in_features
            classifier = nn.Linear(in_features, num_classes)
                    
        return backbone, classifier

    def forward(self, x):
        # features = self.backbone(x)
        # features = torch.flatten(features, 1)                
        # x = self.classifier(features)
        x = self.model(x)        
        return x

    def configure_optimizers(self):
        return get_optimizer(lr=self.lr, params=self.parameters())

    def train_with_mixup(self, X, y):
        X, y_a, y_b, lam = mixup_data(X, y, alpha=Config.MIXUP_ALPHA)
        y_pred = self(X)
        loss_mixup = mixup_criterion(cross_entropy, y_pred, y_a, y_b, lam)
        return loss_mixup

    def training_step(self, batch, batch_idx):
        id, X, y = batch        
        if Config.USE_MIXUP:
            loss = self.train_with_mixup(X, y)
        else:
            y_pred = self(X)
            loss = cross_entropy(y_pred, y)                
        #train_f1 = torchmetrics.functional.f1(preds=y_pred, target=y, num_classes=self.num_classes, average="micro")
        self.log("train_loss", loss, on_step=True, on_epoch=True, logger=True, prog_bar=True)
        #self.log("train_f1", train_f1, on_step=True, on_epoch=True, logger=True, prog_bar=True)
        return loss        

    def validation_step(self, batch, batch_idx):
        id, X, y = batch
        y_pred = self(X)
        val_loss = cross_entropy(y_pred, y)
        current_lr = self.trainer.optimizers[0].param_groups[0]['lr']
        #val_f1 = torchmetrics.functional.f1(preds=y_pred, target=y, num_classes=self.num_classes, average="micro")
        val_f1 = self.f1(preds=y_pred, target=y)
        self.log("val_loss", val_loss, on_step=True, on_epoch=True, logger=True, prog_bar=True)
        self.log("val_f1", val_f1, on_step=True, on_epoch=True, logger=True, prog_bar=True)
        self.log("cur_lr", current_lr, prog_bar=True, on_step=True, on_epoch=True, logger=True)
        return {"loss": val_loss, "val_f1": val_f1, "cur_lr": current_lr}

In [59]:
for fold in range(Config.NUM_FOLDS):
    fold_best_model = MusicClfLitModel.load_from_checkpoint(
                            checkpoint_path=Config.MODEL_PATH_BASE + FOLD_BEST_MODELS[fold],
                            num_classes=Config.NUM_CLASSES, 
                            hparams=Config.MODEL_PARAMS,        
                            model_to_use=Config.MODEL_TO_USE
                        )
    fold_best_model.to(Config.DEVICE)
    print(f"Using fold {fold} best model = {FOLD_BEST_MODELS[fold]} for test prediction")
    # For each class there is one predicted probability column
    if Config.NUM_FOLDS == 1:
        pred_proba_cols = [f"proba_{i}" for i in range(Config.NUM_CLASSES)]
    else:
        pred_proba_cols = [f"fold{fold}_proba_{i}" for i in range(Config.NUM_CLASSES)]
    with torch.no_grad():    
        for id, X in tqdm(dl_test):
            id = id.cpu().detach().numpy()
            # y_preds = [batch_size, num_classes]
            y_preds_proba = fold_best_model(X.to(Config.DEVICE))            
            y_preds_proba = y_preds_proba.cpu().detach().numpy()
            df_test.loc[df_test.song_id.isin(id), pred_proba_cols] = y_preds_proba                            

Using fold 0 best model = fold0_tf_efficientnet_b4_ns_best_model_epoch=7_val_loss=1.4609.ckpt for test prediction


  0%|          | 0/40 [00:00<?, ?it/s]

Using fold 1 best model = fold1_tf_efficientnet_b4_ns_best_model_epoch=11_val_loss=1.4520.ckpt for test prediction


  0%|          | 0/40 [00:00<?, ?it/s]

Using fold 2 best model = fold2_tf_efficientnet_b4_ns_best_model_epoch=9_val_loss=1.4423.ckpt for test prediction


  0%|          | 0/40 [00:00<?, ?it/s]

Using fold 3 best model = fold3_tf_efficientnet_b4_ns_best_model_epoch=10_val_loss=1.3980.ckpt for test prediction


  0%|          | 0/40 [00:00<?, ?it/s]

Using fold 4 best model = fold4_tf_efficientnet_b4_ns_best_model_epoch=10_val_loss=1.4489.ckpt for test prediction


  0%|          | 0/40 [00:00<?, ?it/s]

In [61]:
# Take the mean of the probability predictions for each model
if Config.NUM_FOLDS > 1:
    for i in range(Config.NUM_CLASSES):
        cols = [f"fold{j}_proba_{i}" for j in range(Config.NUM_FOLDS)]
        df_test[f"proba_{i}"] = df_test[cols].mean(axis=1)
        df_test = df_test.drop(cols, axis=1)    
    df_test.to_csv(Config.MODEL_PATH_BASE + "test_preds_proba_effnetb4.csv")    

In [62]:
df_test.head()

Unnamed: 0,song_id,filename,filepath,mspec_exists,proba_0,proba_1,proba_2,proba_3,proba_4,proba_5,...,proba_9,proba_10,proba_11,proba_12,proba_13,proba_14,proba_15,proba_16,proba_17,proba_18
0,7072,007072.ogg,test/007072.ogg,True,1.021398,2.723285,1.673641,2.075321,-1.608137,-0.023659,...,-2.296966,1.086261,-0.02277,-6.014047,-0.75517,-3.848211,-4.875053,-2.86871,-3.798427,-6.339758
1,10207,010207.ogg,test/010207.ogg,True,-0.076092,0.088777,-1.426596,2.38123,-0.256824,-0.946161,...,-1.27524,-1.163799,-0.828314,11.202047,-0.880759,-5.294304,-3.256554,-0.134596,-0.147794,0.866233
2,20008,020008.ogg,test/020008.ogg,True,4.258605,1.304438,0.036229,2.843056,-0.452197,-2.853454,...,-2.417543,1.55195,-3.174908,-5.560495,-1.574196,-5.669688,-4.02737,-3.719967,-5.692308,-6.741453
3,10924,010924.ogg,test/010924.ogg,True,4.664347,-0.122058,-1.109949,1.153946,1.723676,-3.731079,...,-1.826131,0.894827,-3.881859,-5.240276,-2.110742,-4.961221,-0.330837,-4.877005,-4.177182,-5.406746
4,21896,021896.ogg,test/021896.ogg,True,2.04043,1.192919,0.530569,0.220946,5.704256,-0.310388,...,-0.327988,-1.626397,-4.666799,-5.875151,-3.426336,-2.529221,-2.506224,-2.469284,-1.340434,-3.423287


In [63]:
df_test.columns

Index(['song_id', 'filename', 'filepath', 'mspec_exists', 'proba_0', 'proba_1',
       'proba_2', 'proba_3', 'proba_4', 'proba_5', 'proba_6', 'proba_7',
       'proba_8', 'proba_9', 'proba_10', 'proba_11', 'proba_12', 'proba_13',
       'proba_14', 'proba_15', 'proba_16', 'proba_17', 'proba_18'],
      dtype='object')