In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython import display as ipd
from glob import glob
import librosa
import seaborn as sns
import librosa.display
import skimage.io
import os
import torch
from sklearn import model_selection
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import pytorch_lightning as pl

In [2]:
class Config:
    NUM_CLASSES = 5
    NUM_MFCC = 40
    BATCH_SIZE = 256
    NUM_FOLDS = 5
    UNFREEZE_EPOCH_NO = 1
    NUM_EPOCHS = 10
    NUM_WORKERS = 8    
    FAST_DEV_RUN = False
    PRECISION = 16
    DATA_ROOT_FOLDER = "./data/"
    PATIENCE = 10    
    RANDOM_SEED = 42
    # model hyperparameters
    MODEL_PARAMS = {    
        "drop_out": 0.25,
        "lr": 0.00036
    }

class AudioConfig:
    # settings
    # number of samples per time-step in spectrogram. Defaults to win_length / 4
    hop_length = 512 
    # number of bins in spectrogram. Height of image
    n_mels = 224 
    # number of time-steps. Width of image
    time_steps = 223 
    # number of samples per second
    sampling_rate = 22050
    # sec
    duration = 10 
    fmin = 20
    fmax = sampling_rate // 2
    # length of the windowed signal after padding with zeros. Default value = 2048 ( for music signals)    
    n_fft = hop_length * 4
    # Each frame of audio is windowed by window of length win_length and then padded with zeros to match n_fft. Defaults to n_fft
    win_length = hop_length * 4    
    padmode = 'constant'
    samples = sampling_rate * duration

In [3]:
def mfcc_exists(mfcc):
    if mfcc is not None:
        if len(mfcc) > 0:
            return True
    return False

In [4]:
df_train = pd.read_pickle(Config.DATA_ROOT_FOLDER + "df_train_mfcc.pkl")
df_train["mfcc_exists"] = df_train.mfcc.map(lambda mfcc: mfcc_exists(mfcc))
df_train.head()

Unnamed: 0,song_id,filename,filepath,genre_id,genre,file_exists,mfcc,mfcc_exists
0,10150,010150.ogg,train/010150.ogg,7,Instrumental,True,"[-256.21112, 125.60442, -0.42895874, 85.35346,...",True
1,7358,007358.ogg,train/007358.ogg,2,Punk,True,"[-37.284946, 120.587944, -26.513258, 50.83251,...",True
2,20573,020573.ogg,train/020573.ogg,5,Folk,True,"[-364.31793, 154.59741, -48.284782, 15.837085,...",True
3,11170,011170.ogg,train/011170.ogg,12,Old-Time / Historic,True,"[-346.21207, 202.10587, -74.181465, -60.517387...",True
4,16662,016662.ogg,train/016662.ogg,1,Rock,True,"[-48.237347, 141.68365, -49.396336, 69.53004, ...",True


In [5]:
df_train[~df_train.mfcc_exists]

Unnamed: 0,song_id,filename,filepath,genre_id,genre,file_exists,mfcc,mfcc_exists
1359,3137,003137.ogg,train/003137.ogg,1,Rock,False,,False
4668,11088,011088.ogg,train/011088.ogg,2,Punk,False,,False
5150,16312,016312.ogg,train/016312.ogg,7,Instrumental,False,,False
8764,24899,024899.ogg,train/024899.ogg,0,Electronic,False,,False
10155,4040,004040.ogg,train/004040.ogg,4,Hip-Hop,False,,False
10873,9963,009963.ogg,train/009963.ogg,3,Experimental,False,,False
11386,15980,015980.ogg,train/015980.ogg,4,Hip-Hop,False,,False
11497,22698,022698.ogg,train/022698.ogg,4,Hip-Hop,False,,False
14377,23078,023078.ogg,train/023078.ogg,5,Folk,False,,False
14827,17940,017940.ogg,train/017940.ogg,0,Electronic,False,,False


In [6]:
df_test = pd.read_csv(Config.DATA_ROOT_FOLDER + "test.csv")
df_test["file_exists"] = df_test.filepath.map(lambda fp: os.path.exists(Config.DATA_ROOT_FOLDER + fp))
df_test.head()

Unnamed: 0,song_id,filename,filepath,file_exists
0,7072,007072.ogg,test/007072.ogg,True
1,10207,010207.ogg,test/010207.ogg,True
2,20008,020008.ogg,test/020008.ogg,True
3,10924,010924.ogg,test/010924.ogg,True
4,21896,021896.ogg,test/021896.ogg,True


In [7]:
Config.NUM_CLASSES = len(df_train.genre_id.unique())

In [8]:
# split the training dataframe into kfolds for cross validation. We do this before any processing is done
# on the data. We use stratified kfold if the target distribution is unbalanced
def strat_kfold_dataframe(df, target_col_name, num_folds=Config.NUM_FOLDS):
    # we create a new column called kfold and fill it with -1
    df["kfold"] = -1
    # randomize of shuffle the rows of dataframe before splitting is done
    df = df.sample(frac=1, random_state=Config.RANDOM_SEED).reset_index(drop=True)
    # get the target data
    y = df[target_col_name].values
    skf = model_selection.StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=Config.RANDOM_SEED)
    for fold, (train_index, val_index) in enumerate(skf.split(X=df, y=y)):
        df.loc[val_index, "kfold"] = fold    
    return df     

df_train = df_train[df_train.mfcc_exists]
df_train = strat_kfold_dataframe(df_train, target_col_name="genre_id")
df_train.head()    

Unnamed: 0,song_id,filename,filepath,genre_id,genre,file_exists,mfcc,mfcc_exists,kfold
0,2296,002296.ogg,train/002296.ogg,1,Rock,True,"[-51.503376, 90.0839, -29.26633, 51.95741, -1....",True,3
1,8198,008198.ogg,train/008198.ogg,17,Blues,True,"[-219.78317, 179.04573, -47.457195, 30.100721,...",True,2
2,17663,017663.ogg,train/017663.ogg,12,Old-Time / Historic,True,"[-308.57285, 210.7832, -61.69949, -59.704113, ...",True,2
3,7167,007167.ogg,train/007167.ogg,9,International,True,"[-204.00072, 187.04646, -35.93575, 29.989346, ...",True,3
4,11242,011242.ogg,train/011242.ogg,6,Chiptune / Glitch,True,"[-75.96725, 91.53988, -38.52349, 64.71302, -28...",True,3


In [9]:
def read_audio(conf, pathname, trim_long_data):
    y, sr = librosa.load(pathname, sr=None, res_type="kaiser_fast")
    # trim silence
    if 0 < len(y): # workaround: 0 length causes error
        y, _ = librosa.effects.trim(y) # trim, top_db=default(60)
    # extract a fixed length window
    start_sample = 0 # starting at beginning
    length_samples = conf.time_steps * conf.hop_length    
    # make it unified length to conf.samples
    if len(y) > conf.samples: # long enough
        if trim_long_data:
            y = y[start_sample : start_sample+length_samples]        
    else: # pad blank
        padding = length_samples - len(y)    # add padding at both ends
        offset = padding // 2
        y = np.pad(y, (offset, conf.samples - len(y) - offset), conf.padmode)
    return y, sr

In [10]:
def extract_mfcc_features(filename):
    mfccs_processed = None
    audio_path = Config.DATA_ROOT_FOLDER + "train/" + filename
    if os.path.exists(audio_path):
        audio, sample_rate = read_audio(AudioConfig, audio_path, True)
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=Config.NUM_MFCC)
        mfccs_processed = np.mean(mfccs.T,axis=0)     
    return mfccs_processed

In [11]:
# df_train["mfcc"] = df_train.filename.apply(lambda filename: extract_mfcc_features(DATA_PATH + "train/" + filename))
# df_train.to_pickle("df_train_mfcc.pkl")

In [12]:
# from joblib import delayed, Parallel

# df_train = df_train.head(100)
# delayed_funcs_train = [delayed(extract_mfcc_features)(row["filename"]) for i, row in df_train.iterrows()]
# results_train = Parallel(n_jobs=-1, verbose=5)(delayed_funcs_train)    

In [13]:
# A dataset contains the logic to fetch, load and if required transform data to bring it to a format
# that can be used by dataloaders for training. 
class AudioMfccDataset(Dataset):
    def __init__(self, df, mfcc_col, target_col, transform=None, target_transform=None):
        self.df = df
        self.mfcc_col = mfcc_col
        self.target_col = target_col        
        self.transform = transform
        self.target_transform = target_transform

    def __getitem__(self, index):                
        mfcc = self.df.loc[index, self.mfcc_col]
        target = self.df.loc[index, self.target_col]
        if self.transform is not None:
            mfcc_tfmd = self.transform(mfcc)            
        if self.target_transform is not None:
            target = self.target_transform(target)
        return mfcc_tfmd, target

    def __len__(self):
        return len(self.df)

In [14]:
def get_fold_dls(fold, df_imgs):
    df_train = df_imgs[df_imgs["kfold"] != fold].reset_index(drop=True)
    df_val = df_imgs[df_imgs["kfold"] == fold].reset_index(drop=True)    
    ds_train = AudioMfccDataset(
        df_train, 
        mfcc_col="mfcc",
        target_col="genre_id",        
        transform=torch.as_tensor,
        target_transform=torch.as_tensor
    )
    ds_val = AudioMfccDataset(
        df_val, 
        mfcc_col="mfcc",
        target_col="genre_id",        
        transform=torch.as_tensor,
        target_transform=torch.as_tensor
    )        
    dl_train = DataLoader(ds_train, batch_size=Config.BATCH_SIZE, shuffle=True, num_workers=Config.NUM_WORKERS)    
    dl_val = DataLoader(ds_val, batch_size=Config.BATCH_SIZE, num_workers=Config.NUM_WORKERS)
    return dl_train, dl_val, ds_train, ds_val

In [15]:
dl_train, dl_val, ds_train, ds_val = get_fold_dls(0, df_train)

In [16]:
ds_val[0]

(tensor([ 77.1201,  74.4379, -41.6419,  43.3034, -36.6637,  51.3666, -30.3715,
          33.0450, -20.6842,  34.3427, -23.1814,  27.6178, -15.7877,  20.3962,
         -10.9349,  12.6420,  -5.7834,  11.1199,  -3.6817,   0.7359,  -0.5042,
           1.7668,   9.0716,  -2.0096,   3.4431,  -7.1916,   8.2092,  -7.3172,
           9.1665,  -6.9760,   8.0860,  -6.1567,   7.5181,  -6.5443,   6.9241,
          -4.6341,   3.5342,  -2.2990,   3.0409,  -1.7411]),
 tensor(3))

In [17]:
class MfccAudioClfNet(nn.Module):
    def __init__(self, num_mfcc, num_classes): 
        super().__init__()       
        self.fc1 = nn.Linear(in_features=num_mfcc, out_features=256)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout()
        self.fc2 = nn.Linear(in_features=256, out_features=128)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout()
        self.classifier = nn.Linear(in_features=128, out_features=num_classes)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.dropout2(x)
        return self.classifier(x)

In [18]:
from torch.nn.functional import cross_entropy
import torchmetrics

class AudioClfMfccLitModel(pl.LightningModule):
    def __init__(self, num_classes, num_mfcc, hparams):
        super().__init__()
        self.save_hyperparameters()
        self.lr = hparams["lr"]
        self.num_classes = num_classes              
        self.net = MfccAudioClfNet(num_mfcc=num_mfcc, num_classes=num_classes)

    def forward(self, x):        
        return self.net(x)

    def configure_optimizers(self):
        model_optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=self.lr)
        lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(model_optimizer, "min")        
        return {
            "optimizer": model_optimizer, 
            "lr_scheduler": {
                "scheduler": lr_scheduler,
                "monitor": "val_loss",
                "frequency": 1
            }
        }

    def training_step(self, batch, batch_idx):
        X, y = batch
        y_pred = self(X)
        loss = cross_entropy(y_pred, y)
        train_f1 = torchmetrics.functional.f1(preds=y_pred, target=y, num_classes=self.num_classes, average="micro")
        self.log("train_loss", loss, on_step=True, on_epoch=True, logger=True, prog_bar=True)
        self.log("train_f1", train_f1, on_step=True, on_epoch=True, logger=True, prog_bar=True)
        return loss        

    def validation_step(self, batch, batch_idx):
        X, y = batch
        y_pred = self(X)
        val_loss = cross_entropy(y_pred, y)
        val_f1 = torchmetrics.functional.f1(preds=y_pred, target=y, num_classes=self.num_classes, average="micro")
        self.log("val_loss", val_loss, on_step=True, on_epoch=True, logger=True, prog_bar=True)
        self.log("val_f1", val_f1, on_step=True, on_epoch=True, logger=True, prog_bar=True)
        return {"loss": val_loss, "val_f1": val_f1}

In [19]:
from pytorch_lightning.callbacks import ModelCheckpoint, BackboneFinetuning, EarlyStopping

# For results reproducibility 
# sets seeds for numpy, torch, python.random and PYTHONHASHSEED.
pl.seed_everything(Config.RANDOM_SEED, workers=True)

Global seed set to 42


42

In [20]:
from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.callbacks import Callback

class MetricsAggCallback(Callback):
    def __init__(self, metric_to_monitor, mode):
        self.metric_to_monitor = metric_to_monitor
        self.metrics = []
        self.best_metric = None
        self.mode = mode
        self.best_metric_epoch = None

    def on_validation_epoch_end(self, trainer: Trainer, pl_module: LightningModule):
        metric_value = trainer.callback_metrics[self.metric_to_monitor].cpu().detach().item()
        val_loss = trainer.callback_metrics["val_loss"].cpu().detach().item()
        print(f"metric {self.metric_to_monitor} = {metric_value}, val_loss={val_loss}")        
        self.metrics.append(metric_value)
        if self.mode == "max":
            self.best_metric = max(self.metrics)
            self.best_metric_epoch = self.metrics.index(self.best_metric)    

In [21]:
from pytorch_lightning.loggers import WandbLogger

def run_training(fold, dl_train, dl_val, fold_loss, fold_f1, find_lr=True):
        fold_str = f"fold{fold}"
        print(f"Running training for {fold_str}")
        logger = None
        val_loss_chkpt = "best_model_{epoch}_{val_loss:.4f}"
        val_f1_chkpt = "best_model_{epoch}_{val_f1:.4f}"
        early_stopping_callback = EarlyStopping(monitor="val_loss", patience=Config.PATIENCE, mode="min", verbose=True)        
        if fold is not None:       
            val_loss_chkpt = fold_str + "_" + val_loss_chkpt
            val_f1_chkpt = fold_str + "_" + val_f1_chkpt
        logger = WandbLogger(name="mfcc_baseline", project="Pog_Music_Clf")                    
        audio_model = AudioClfMfccLitModel(
            num_classes=Config.NUM_CLASSES, 
            num_mfcc=Config.NUM_MFCC,
            hparams=Config.MODEL_PARAMS
        )    
        val_loss_chkpt_callback = ModelCheckpoint(dirpath="./model", verbose=True, monitor="val_loss", mode="min", filename=val_loss_chkpt)
        val_f1_chkpt_callback = ModelCheckpoint(dirpath="./model", verbose=True, monitor="val_f1", mode="max", filename=val_f1_chkpt)
        acc_chkpt_callback = MetricsAggCallback(metric_to_monitor="val_f1", mode="max")
        trainer = pl.Trainer(
            gpus=1,
            # For results reproducibility 
            deterministic=True,
            auto_select_gpus=True,
            progress_bar_refresh_rate=20,
            max_epochs=Config.NUM_EPOCHS,
            logger=None,
            auto_lr_find=True,    
            precision=Config.PRECISION,            
            weights_summary="full", 
            fast_dev_run=Config.FAST_DEV_RUN,                   
            callbacks=[val_loss_chkpt_callback, val_f1_chkpt_callback, acc_chkpt_callback, early_stopping_callback]
        )
        if find_lr:
            trainer.tune(model=audio_model, train_dataloaders=dl_train)
            print(audio_model.lr)
        trainer.fit(audio_model, train_dataloaders=dl_train, val_dataloaders=dl_val)                
        if not Config.FAST_DEV_RUN:
            fold_loss.append((val_loss_chkpt_callback.best_model_score.cpu().detach().item(), val_loss_chkpt_callback.best_model_path))
            fold_f1.append((acc_chkpt_callback.best_metric, val_f1_chkpt_callback.best_model_path))
            print(f"Loss for {fold_str} = {fold_loss[fold]}, f1 = {fold_f1[fold]}")
        del trainer, audio_model, early_stopping_callback, acc_chkpt_callback, val_loss_chkpt_callback, val_f1_chkpt_callback 

In [22]:
import statistics

def print_exp_statistics(fold_loss, fold_acc):
    print("val loss across folds")
    print(fold_loss)
    print("val f1 across folds")
    print(fold_acc)
    #mean_loss = statistics.mean(fold_loss)
    #mean_acc = statistics.mean(fold_acc)
    #std_loss = statistics.stdev(fold_loss)
    #std_acc = statistics.stdev(fold_acc)
    #print(f"mean loss across folds = {mean_loss}, loss stdev across fold = {std_loss}")
    #print(f"mean accuracy across folds = {mean_acc}, accuracy stdev across fold = {std_acc}")

In [23]:
find_lr = True
fold_loss = []
fold_acc = []
for fold in range(Config.NUM_FOLDS):
    dl_train, dl_val, ds_train, ds_val = get_fold_dls(fold, df_train)
    run_training(fold, dl_train, dl_val, fold_loss, fold_acc, find_lr)
    break  
print_exp_statistics(fold_loss, fold_acc)       

Running training for fold0


  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
Using native 16bit precision.
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  rank_zero_warn(f"you defined a {step_name} but have no {loader_name}. Skipping {stage} loop")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type            | Params
---------------------------------------------------
0 | net            | MfccAudioClfNet | 45.8 K
1 | net.fc1        | Linear          | 10.5 K
2 | net.relu1      | ReLU            | 0     
3 | net.dropout1   | Dropout         | 0     
4 | net.fc2        | Linear          | 32.9 K
5 | net.relu2      | ReLU            | 0     
6 | net.dropout2   | Dropout         | 0     
7 | net.classifier | Linear          | 2.5 K 
---------------------------------------------------
45.8 K    Trainable params
0         Non-trainable params
45.8 K    Total params
0.183     Total estimated model params size (M

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

Restoring states from the checkpoint file at /home/bk_anupam/code/ML/KaggleChallenges/AUDIO/MusicClassification/lr_find_temp_model.ckpt
Restored all states from the checkpoint file at /home/bk_anupam/code/ML/KaggleChallenges/AUDIO/MusicClassification/lr_find_temp_model.ckpt
Learning rate set to 0.003981071705534969
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type            | Params
---------------------------------------------------
0 | net            | MfccAudioClfNet | 45.8 K
1 | net.fc1        | Linear          | 10.5 K
2 | net.relu1      | ReLU            | 0     
3 | net.dropout1   | Dropout         | 0     
4 | net.fc2        | Linear          | 32.9 K
5 | net.relu2      | ReLU            | 0     
6 | net.dropout2   | Dropout         | 0     
7 | net.classifier | Linear          | 2.5 K 
---------------------------------------------------
45.8 K    Trainable params
0         Non-trainable params
45.8 K    Total params
0.183     Total estimated model params si

0.003981071705534969


Validation sanity check: 0it [00:00, ?it/s]

Global seed set to 42


metric val_f1 = 0.0703125, val_loss=7.652549743652344


Training: 36it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Metric val_loss improved. New best score: 2.265
Epoch 0, global step 62: val_loss reached 2.26545 (best 2.26545), saving model to "/home/bk_anupam/code/ML/KaggleChallenges/AUDIO/MusicClassification/model/fold0_best_model_epoch=0_val_loss=2.2655.ckpt" as top 1
Epoch 0, global step 62: val_f1 reached 0.25063 (best 0.25063), saving model to "/home/bk_anupam/code/ML/KaggleChallenges/AUDIO/MusicClassification/model/fold0_best_model_epoch=0_val_f1=0.2506.ckpt" as top 1


metric val_f1 = 0.2506278157234192, val_loss=2.265454053878784


Validating: 0it [00:00, ?it/s]

Metric val_loss improved by 0.143 >= min_delta = 0.0. New best score: 2.123
Epoch 1, global step 125: val_loss reached 2.12265 (best 2.12265), saving model to "/home/bk_anupam/code/ML/KaggleChallenges/AUDIO/MusicClassification/model/fold0_best_model_epoch=1_val_loss=2.1227.ckpt" as top 1
Epoch 1, global step 125: val_f1 reached 0.30010 (best 0.30010), saving model to "/home/bk_anupam/code/ML/KaggleChallenges/AUDIO/MusicClassification/model/fold0_best_model_epoch=1_val_f1=0.3001.ckpt" as top 1


metric val_f1 = 0.3001004457473755, val_loss=2.1226511001586914


Validating: 0it [00:00, ?it/s]

Metric val_loss improved by 0.033 >= min_delta = 0.0. New best score: 2.090
Epoch 2, global step 188: val_loss reached 2.08991 (best 2.08991), saving model to "/home/bk_anupam/code/ML/KaggleChallenges/AUDIO/MusicClassification/model/fold0_best_model_epoch=2_val_loss=2.0899.ckpt" as top 1
Epoch 2, global step 188: val_f1 reached 0.30814 (best 0.30814), saving model to "/home/bk_anupam/code/ML/KaggleChallenges/AUDIO/MusicClassification/model/fold0_best_model_epoch=2_val_f1=0.3081.ckpt" as top 1


metric val_f1 = 0.30813661217689514, val_loss=2.0899057388305664


Validating: 0it [00:00, ?it/s]

Metric val_loss improved by 0.010 >= min_delta = 0.0. New best score: 2.080
Epoch 3, global step 251: val_loss reached 2.07959 (best 2.07959), saving model to "/home/bk_anupam/code/ML/KaggleChallenges/AUDIO/MusicClassification/model/fold0_best_model_epoch=3_val_loss=2.0796.ckpt" as top 1
Epoch 3, global step 251: val_f1 was not in top 1


metric val_f1 = 0.2950778603553772, val_loss=2.0795934200286865


Validating: 0it [00:00, ?it/s]

Metric val_loss improved by 0.003 >= min_delta = 0.0. New best score: 2.076
Epoch 4, global step 314: val_loss reached 2.07638 (best 2.07638), saving model to "/home/bk_anupam/code/ML/KaggleChallenges/AUDIO/MusicClassification/model/fold0_best_model_epoch=4_val_loss=2.0764.ckpt" as top 1
Epoch 4, global step 314: val_f1 was not in top 1


metric val_f1 = 0.29432445764541626, val_loss=2.0763773918151855


Validating: 0it [00:00, ?it/s]

Metric val_loss improved by 0.011 >= min_delta = 0.0. New best score: 2.065
Epoch 5, global step 377: val_loss reached 2.06539 (best 2.06539), saving model to "/home/bk_anupam/code/ML/KaggleChallenges/AUDIO/MusicClassification/model/fold0_best_model_epoch=5_val_loss=2.0654.ckpt" as top 1
Epoch 5, global step 377: val_f1 reached 0.31215 (best 0.31215), saving model to "/home/bk_anupam/code/ML/KaggleChallenges/AUDIO/MusicClassification/model/fold0_best_model_epoch=5_val_f1=0.3122.ckpt" as top 1


metric val_f1 = 0.31215471029281616, val_loss=2.065389633178711


Validating: 0it [00:00, ?it/s]

Metric val_loss improved by 0.041 >= min_delta = 0.0. New best score: 2.024
Epoch 6, global step 440: val_loss reached 2.02431 (best 2.02431), saving model to "/home/bk_anupam/code/ML/KaggleChallenges/AUDIO/MusicClassification/model/fold0_best_model_epoch=6_val_loss=2.0243.ckpt" as top 1
Epoch 6, global step 440: val_f1 reached 0.32195 (best 0.32195), saving model to "/home/bk_anupam/code/ML/KaggleChallenges/AUDIO/MusicClassification/model/fold0_best_model_epoch=6_val_f1=0.3219.ckpt" as top 1


metric val_f1 = 0.321948766708374, val_loss=2.0243079662323


Validating: 0it [00:00, ?it/s]

Epoch 7, global step 503: val_loss was not in top 1
Epoch 7, global step 503: val_f1 was not in top 1


metric val_f1 = 0.31567052006721497, val_loss=2.040850877761841


Validating: 0it [00:00, ?it/s]

Epoch 8, global step 566: val_loss was not in top 1
Epoch 8, global step 566: val_f1 was not in top 1


metric val_f1 = 0.321948766708374, val_loss=2.0314948558807373


Validating: 0it [00:00, ?it/s]

Metric val_loss improved by 0.004 >= min_delta = 0.0. New best score: 2.020
Epoch 9, global step 629: val_loss reached 2.02006 (best 2.02006), saving model to "/home/bk_anupam/code/ML/KaggleChallenges/AUDIO/MusicClassification/model/fold0_best_model_epoch=9_val_loss=2.0201.ckpt" as top 1
Epoch 9, global step 629: val_f1 was not in top 1


metric val_f1 = 0.31491711735725403, val_loss=2.020061492919922
Loss for fold0 = (2.020061492919922, '/home/bk_anupam/code/ML/KaggleChallenges/AUDIO/MusicClassification/model/fold0_best_model_epoch=9_val_loss=2.0201.ckpt'), f1 = (0.321948766708374, '/home/bk_anupam/code/ML/KaggleChallenges/AUDIO/MusicClassification/model/fold0_best_model_epoch=6_val_f1=0.3219.ckpt')
val loss across folds
[(2.020061492919922, '/home/bk_anupam/code/ML/KaggleChallenges/AUDIO/MusicClassification/model/fold0_best_model_epoch=9_val_loss=2.0201.ckpt')]
val f1 across folds
[(0.321948766708374, '/home/bk_anupam/code/ML/KaggleChallenges/AUDIO/MusicClassification/model/fold0_best_model_epoch=6_val_f1=0.3219.ckpt')]
