In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
!unzip /content/gdrive/MyDrive/Kaggle/Pog_Music_Classification/data/melspec_256by313/mel_spec_test.zip > /dev/null

In [3]:
!pip install -q pytorch_lightning timm torchtoolbox wandb

[K     |████████████████████████████████| 582 kB 4.1 MB/s 
[K     |████████████████████████████████| 431 kB 57.2 MB/s 
[K     |████████████████████████████████| 84 kB 3.9 MB/s 
[K     |████████████████████████████████| 1.8 MB 56.4 MB/s 
[K     |████████████████████████████████| 408 kB 56.8 MB/s 
[K     |████████████████████████████████| 136 kB 65.0 MB/s 
[K     |████████████████████████████████| 596 kB 64.0 MB/s 
[K     |████████████████████████████████| 1.1 MB 58.5 MB/s 
[K     |████████████████████████████████| 4.0 MB 52.8 MB/s 
[K     |████████████████████████████████| 144 kB 67.6 MB/s 
[K     |████████████████████████████████| 181 kB 71.4 MB/s 
[K     |████████████████████████████████| 63 kB 2.0 MB/s 
[K     |████████████████████████████████| 94 kB 1.6 MB/s 
[K     |████████████████████████████████| 271 kB 66.7 MB/s 
[K     |████████████████████████████████| 144 kB 71.4 MB/s 
[K     |████████████████████████████████| 895 kB 59.9 MB/s 
[K     |██████████████████████

In [4]:
!pip install -q albumentations==1.1.0

[K     |████████████████████████████████| 102 kB 5.5 MB/s 
[K     |████████████████████████████████| 47.8 MB 1.9 MB/s 
[?25h

In [5]:
!pip uninstall -y opencv-python-headless==4.5.5.62

Found existing installation: opencv-python-headless 4.5.5.64
Uninstalling opencv-python-headless-4.5.5.64:
  Successfully uninstalled opencv-python-headless-4.5.5.64


In [6]:
!pip install -q opencv-python-headless==4.5.2.52

[K     |████████████████████████████████| 38.2 MB 1.1 MB/s 
[?25h

In [7]:
import torch
import pandas as pd
import numpy as np
import pytorch_lightning as pl
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as transforms
import tqdm
import albumentations as alb
from albumentations.pytorch import ToTensorV2
import sys
import torch.multiprocessing as mp
import os
from tqdm.notebook import tqdm
from IPython.display import display

In [20]:
class TransformationType:
    TORCHVISION = "torchvision"
    ALB = "albumentations"

class Models:
    RESNET34 = "resnet34"
    RESNET50 = "resnet50"
    RESNEXT50 = "resnext50_32x4d"
    EFFNET_B0 = "tf_efficientnet_b0_ns"
    EFFNET_B4 = "tf_efficientnet_b4_ns"

class ImgStats:
    IMAGENET_MEAN = [0.485, 0.456, 0.406]
    IMAGENET_STD = [0.229, 0.224, 0.225]    

class WandbConfig:
    WANDB_KEY = ""
    WANDB_RUN_NAME = "melspec_256by313_effnet_run1"
    WANDB_PROJECT = "Pog_MusicClf_Resnext"
    USE_WANDB = False    

class SchedulerConfig:
    SCHEDULER_PATIENCE = 4
    SCHEDULER = 'CosineAnnealingWarmRestarts'
    T_0 = 10 # for CosineAnnealingWarmRestarts
    MIN_LR = 5e-7 # for CosineAnnealingWarmRestarts
    MAX_LR = 1e-2
    STEPS_PER_EPOCH = 0

# CONSTANTS
class Config:
    # whether to use mel spectrograms generated using audio augmentations ( multiple mel spec for one audio)
    USE_MEL_SPEC_AUG = True
    RUNTIME = "COLAB"
    RESUME_FROM_CHKPT = None
    NUM_CLASSES = 19
    BATCH_SIZE = 32
    NUM_FOLDS = 5
    UNFREEZE_EPOCH_NO = 1
    NUM_EPOCHS = 2
    NUM_WORKERS = mp.cpu_count()
    INPUT_IMAGE_SIZE = (128,128)
    IMG_MEAN = ImgStats.IMAGENET_MEAN
    IMG_STD = ImgStats.IMAGENET_STD
    FAST_DEV_RUN = False
    PRECISION = 16    
    PATIENCE = 10    
    SUBSET_ROWS_FRAC = 0.05
    TRAIN_ON_SUBSET = False
    RANDOM_SEED = 42
    MODEL_TO_USE = Models.RESNEXT50
    PRETRAINED = False        
    FIND_LR = False
    WEIGHT_DECAY = 1e-6
    DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')    
    # model hyperparameters
    MODEL_PARAMS = {    
        "drop_out": 0.25,
        "lr": 0.00043
    }

In [21]:
if Config.RUNTIME == "COLAB":
    Config.DATA_ROOT_FOLDER = "/content/gdrive/MyDrive/Kaggle/Pog_Music_Classification/data/"
    Config.IMG_ROOT_FOLDER = "/content/kaggle/processed_test/mel_spec/"
    Config.MODEL_PATH_BASE = Config.DATA_ROOT_FOLDER + "model/"
elif Config.RUNTIME == "KAGGLE":
    Config.DATA_ROOT_FOLDER = "/content/gdrive/MyDrive/Kaggle/Pog_Music_Classification/data/"
    Config.IMG_ROOT_FOLDER = "/content/kaggle/processed_test/mel_spec/"
else:
    Config.DATA_ROOT_FOLDER = "./data/"
    Config.IMG_ROOT_FOLDER = "./data/processed_train/mel_spec/"

In [22]:
FOLD_BEST_MODELS = [
    "fold0_resnext50_32x4d_best_model_epoch=7_val_loss=1.4910.ckpt",
    "fold1_resnext50_32x4d_best_model_epoch=9_val_loss=1.4835.ckpt",
    "fold2_resnext50_32x4d_best_model_epoch=9_val_loss=1.5110.ckpt",
    "fold3_resnext50_32x4d_best_model_epoch=9_val_loss=1.5210.ckpt",
    "fold4_resnext50_32x4d_best_model_epoch=9_val_loss=1.4781.ckpt"
]

In [23]:
class AudioMelSpecImgTestDataset(Dataset):
    def __init__(self, df, file_name_col, img_root_folder, transform=None):
        self.df = df
        self.file_name_col = file_name_col        
        self.img_root_folder = img_root_folder
        self.transform = transform        

    def __getitem__(self, index):        
        file_name_noext = self.df.loc[index, self.file_name_col].split(".")[0]
        img_path = self.img_root_folder + "/" + file_name_noext + ".jpg"
        img = Image.open(img_path)
        img_arr = np.array(img)
        if self.transform is not None:
            augmented = self.transform(image=img_arr)
            img_tfmd = augmented["image"]
        song_id = self.df.loc[index, "song_id"]        
        return song_id, img_tfmd

    def __len__(self):
        return len(self.df)

In [24]:
test_transform = alb.Compose([
        alb.CenterCrop(Config.INPUT_IMAGE_SIZE[0], Config.INPUT_IMAGE_SIZE[1]),
        alb.Normalize(mean=Config.IMG_MEAN, std=Config.IMG_STD),
        ToTensorV2()        
])

In [25]:
df_test = pd.read_csv(Config.DATA_ROOT_FOLDER + "test.csv")
# filter out records without any corresponding mel spectrogram image
df_test["mspec_exists"] = df_test.filename.map(
    lambda fp: os.path.exists(Config.IMG_ROOT_FOLDER + fp.split(".")[0] + ".jpg")
)
df_test_invalid = df_test[~df_test.mspec_exists]
df_test_invalid

Unnamed: 0,song_id,filename,filepath,mspec_exists
3546,22612,022612.ogg,test/022612.ogg,False
4249,24013,024013.ogg,test/024013.ogg,False


In [26]:
df_test = df_test[df_test.mspec_exists].reset_index(drop=True)

In [27]:
ds_test = AudioMelSpecImgTestDataset(
            df_test, 
            file_name_col="filename",        
            img_root_folder=Config.IMG_ROOT_FOLDER, 
            transform=test_transform
        )        

dl_test = DataLoader(ds_test, batch_size=Config.BATCH_SIZE)

In [28]:
import torchvision.models as models
import torch.nn as nn
from torch.nn.functional import cross_entropy
import torchmetrics
import timm

class MusicClfLitModel(pl.LightningModule):
    def __init__(self, num_classes, hparams, model_to_use):
        super().__init__()
        self.save_hyperparameters()
        self.lr = hparams["lr"]
        self.num_classes = num_classes              
        self.backbone, self.classifier = self.get_backbone_classifier(model_to_use, hparams["drop_out"], num_classes) 

    @staticmethod
    def get_backbone_classifier(model_to_use, drop_out, num_classes):
        pt_model = timm.create_model(model_to_use, pretrained=Config.PRETRAINED)
        backbone = None
        classifier = None
        if model_to_use in [Models.RESNET34, Models.RESNET50, Models.RESNEXT50]:            
            backbone = nn.Sequential(*list(pt_model.children())[:-1])
            in_features = pt_model.fc.in_features
            classifier = nn.Sequential(
                nn.Dropout(drop_out),
                nn.Linear(in_features, num_classes)
            )    
        return backbone, classifier

    def forward(self, x):
        features = self.backbone(x)
        features = torch.flatten(features, 1)                
        x = self.classifier(features)
        return x

    def configure_optimizers(self):
        model_optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=self.lr)
        lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(model_optimizer, eta_min=1e-5, T_max=Config.NUM_EPOCHS)
        #lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(model_optimizer, "min")        
        return {
            "optimizer": model_optimizer, 
            "lr_scheduler": {
                "scheduler": lr_scheduler,
                "monitor": "val_loss",
                "frequency": 1
            }
        }

    def training_step(self, batch, batch_idx):
        id, X, y = batch
        y_pred = self(X)
        loss = cross_entropy(y_pred, y)
        train_f1 = torchmetrics.functional.f1(preds=y_pred, target=y, num_classes=self.num_classes, average="micro")
        self.log("train_loss", loss, on_step=True, on_epoch=True, logger=True, prog_bar=True)
        self.log("train_f1", train_f1, on_step=True, on_epoch=True, logger=True, prog_bar=True)
        return loss        

    def validation_step(self, batch, batch_idx):
        id, X, y = batch
        y_pred = self(X)
        val_loss = cross_entropy(y_pred, y)
        val_f1 = torchmetrics.functional.f1(preds=y_pred, target=y, num_classes=self.num_classes, average="micro")
        self.log("val_loss", val_loss, on_step=True, on_epoch=True, logger=True, prog_bar=True)
        self.log("val_f1", val_f1, on_step=True, on_epoch=True, logger=True, prog_bar=True)
        return {"loss": val_loss, "val_f1": val_f1}

In [29]:
df_submission = pd.read_csv(Config.DATA_ROOT_FOLDER + "sample_submission.csv")
# records present both in df_test and df_submission
df_subm_test = pd.merge(
    left=df_test,
    right=df_submission,
    how="right",
    on="song_id",
    suffixes=("_test", "_subm")
)
print(f"len(df_subm_test) = {len(df_subm_test)}")
display(df_subm_test.head())

len(df_subm_test) = 5078


Unnamed: 0,song_id,filename,filepath,mspec_exists,genre_id
0,7072,007072.ogg,test/007072.ogg,True,0
1,10207,010207.ogg,test/010207.ogg,True,0
2,20008,020008.ogg,test/020008.ogg,True,0
3,10924,010924.ogg,test/010924.ogg,True,0
4,21896,021896.ogg,test/021896.ogg,True,0


In [30]:
#df = df_subm_test[df_subm_test.filename.notnull()]

In [31]:
for fold in range(Config.NUM_FOLDS):
    fold_best_model = MusicClfLitModel.load_from_checkpoint(
                            checkpoint_path=Config.MODEL_PATH_BASE + FOLD_BEST_MODELS[fold],
                            num_classes=Config.NUM_CLASSES, 
                            hparams=Config.MODEL_PARAMS,        
                            model_to_use=Config.MODEL_TO_USE
                        )
    fold_best_model.to(Config.DEVICE)
    print(f"Using fold {fold} best model = {FOLD_BEST_MODELS[fold]} for test prediction")
    with torch.no_grad():    
        for id, X in tqdm(dl_test):
            id = id.cpu().detach().numpy()        
            y_preds = torch.argmax(fold_best_model(X.to(Config.DEVICE)), dim=1)
            y_preds = y_preds.cpu().detach().numpy().astype(int)                    
            df_test.loc[df_test.song_id.isin(id), f"fold{fold}_genre_id"] = y_preds
df_test.to_csv(Config.DATA_ROOT_FOLDER + "test_fold_preds.csv")

Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-rsb-weights/resnext50_32x4d_a1h-0146ab0a.pth" to /root/.cache/torch/hub/checkpoints/resnext50_32x4d_a1h-0146ab0a.pth


Using fold 0 best model = fold0_resnext50_32x4d_best_model_epoch=7_val_loss=1.4910.ckpt for test prediction


  0%|          | 0/159 [00:00<?, ?it/s]

Using fold 1 best model = fold1_resnext50_32x4d_best_model_epoch=9_val_loss=1.4835.ckpt for test prediction


  0%|          | 0/159 [00:00<?, ?it/s]

Using fold 2 best model = fold2_resnext50_32x4d_best_model_epoch=9_val_loss=1.5110.ckpt for test prediction


  0%|          | 0/159 [00:00<?, ?it/s]

Using fold 3 best model = fold3_resnext50_32x4d_best_model_epoch=9_val_loss=1.5210.ckpt for test prediction


  0%|          | 0/159 [00:00<?, ?it/s]

Using fold 4 best model = fold4_resnext50_32x4d_best_model_epoch=9_val_loss=1.4781.ckpt for test prediction


  0%|          | 0/159 [00:00<?, ?it/s]

In [34]:
def combine_preds(test_row):
    preds = np.zeros(Config.NUM_CLASSES)
    for fold in range(Config.NUM_FOLDS):
        fold_pred = int(test_row[f"fold{fold}_genre_id"])
        preds[fold_pred] += 1
    return np.argmax(preds)

df_test["genre_id"] = df_test.apply(lambda row:combine_preds(row), axis=1)
df_test.to_csv(Config.DATA_ROOT_FOLDER + "test_fold_preds.csv")    

In [37]:
df_invalid = df_subm_test[df_subm_test.filename.isnull()][["song_id", "genre_id"]]
df_preds = pd.concat([df_test[["song_id", "genre_id"]], df_invalid], axis=0)
df_preds.genre_id = df_preds.genre_id.astype(int)
print(f"len(df_preds) = {len(df_preds)}")

len(df_preds) = 5078


In [38]:
df_preds.head()

Unnamed: 0,song_id,genre_id
0,7072,1
1,10207,12
2,20008,0
3,10924,6
4,21896,4


In [39]:
df_preds.to_csv(Config.DATA_ROOT_FOLDER + "submission_resnext50_5folds.csv", index=False)