In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
cd ../src/

/tf/notebooks/src


## Intialization

In [3]:
import tensorflow as tf
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

### Packages

In [4]:
try:
    import pandas
except:
    !pip install pandas iterative-stratification tensorflow_addons nlpaug==0.0.20 tqdm

### Imports

In [5]:
import os
import glob
import click
import random
import logging
import numpy as np
import pandas as pd
import tensorflow as tf

from sklearn.metrics import *

from train import get_model
from split_data import get_split
from metrics import LwlrapAccumulator
from dataloader import MelSampler, convert_csv_to_dict_for_dataloader

In [6]:
from params import *
from utils.metric import lwlrap

## Main

In [7]:
def run_multi_scale_eval(checkpoints_path, fold, scales):
    """
    This function will compute each species's lrap at different scale.
    The idea is, for each species in test-set, we will use its predicted
    value at the scale that maximize its lrap in eval set. in the case
    that the maximum lrap value is achieved at many different scales, let take a
    maximum scale because the smaller scale, the more false positive samples.
    """
    train_data = pd.read_csv("../data/new_train_tp.csv")
    _, val_index = get_split(fold=fold)
    valid_dataset_csv = train_data.iloc[val_index]

    valid_data_loader = MelSampler(
        convert_csv_to_dict_for_dataloader(valid_dataset_csv),
        cache=True,
        batch_size=64,
        n_classes=24,
        is_train=False,
        use_cutmix=False,
        shuffle_aug=False,
        max_length=384,
    )
    all_checkpoints = sorted(
        glob.glob(os.path.join(checkpoints_path, f"fold{fold}", "model-*.h5"))
    )

    model = get_model(
        saved_path=checkpoints_path,
        pretrained_with_contrastive=False,
        pretrained_path=all_checkpoints[-1],
    )

    lwlrap_at_scale = np.zeros((len(scales), NUM_CLASSES))
    preds = []

    for s, max_length in enumerate(scales):
        valid_data_loader.max_length = max_length
        clip_preds = model.predict(valid_data_loader, verbose=0)  # [B, 24]
        clip_preds = tf.nn.sigmoid(clip_preds).numpy()
        preds.append(clip_preds)

        # compute gts
        gts = []
        for i in range(len(valid_dataset_csv)):
            gts.append(valid_dataset_csv.iloc[i]["species_id"])
        gts = tf.keras.utils.to_categorical(gts, 24)

        class_lwlrap, _ , score = lwlrap(gts, clip_preds)
        
        lwlrap_at_scale[s] = class_lwlrap
        print(f"Scale {scales[s]}:\t{score:.3f}")            

    np.save(
        os.path.join(checkpoints_path, "lwlrap_at_scale_{fold}.npy"),
        lwlrap_at_scale,
    )
    
    return lwlrap_at_scale, np.array(preds), gts

In [8]:
SCALES = [32, 64, 128, 192, 256, 320, 384, 448, 512]

In [9]:
# CP_FOLDER = LOG_PATH + "2021-01-28/4/"
# CP_FOLDER = LOG_PATH + "2021-01-28/8/"
CP_FOLDER = LOG_PATH + "2021-01-28/10/"

In [10]:
lwlraps = []
preds = []
best_preds = []
ys = []

for fold_idx in range(5):
    print(f"\n-------------   Fold {fold_idx + 1} / {5}  -------------\n")
    lwlrap_at_scale, pred, y = run_multi_scale_eval(CP_FOLDER, fold_idx, SCALES)
    
    best_pred = np.array([pred[lwlrap_at_scale[:, i].argmax(), :, i] for i in range(NUM_CLASSES)]).T
    score = lwlrap(y, best_pred)[-1]

    print(f'\nMulti-scale : {score:.3f}')
    
    lwlraps.append(lwlrap_at_scale)
    best_preds.append(best_pred)
    preds.append(pred)
    ys.append(y)


-------------   Fold 1 / 5  -------------

 -> Loading weights from ../logs/2021-01-28/10/fold0/model-0.864-0.487.h5

Scale 32:	0.566
Scale 64:	0.757
Scale 128:	0.851
Scale 192:	0.855
Scale 256:	0.858
Scale 320:	0.852
Scale 384:	0.842
Scale 448:	0.859
Scale 512:	0.831

Multi-scale : 0.870

-------------   Fold 2 / 5  -------------

 -> Loading weights from ../logs/2021-01-28/10/fold1/model-0.893-0.094.h5

Scale 32:	0.548
Scale 64:	0.752
Scale 128:	0.876
Scale 192:	0.895
Scale 256:	0.897
Scale 320:	0.899
Scale 384:	0.895
Scale 448:	0.890
Scale 512:	0.880

Multi-scale : 0.924

-------------   Fold 3 / 5  -------------

 -> Loading weights from ../logs/2021-01-28/10/fold2/model-0.851-0.054.h5

Scale 32:	0.484
Scale 64:	0.695
Scale 128:	0.815
Scale 192:	0.861
Scale 256:	0.859
Scale 320:	0.838
Scale 384:	0.837
Scale 448:	0.839
Scale 512:	0.841

Multi-scale : 0.877

-------------   Fold 4 / 5  -------------

 -> Loading weights from ../logs/2021-01-28/10/fold3/model-0.878-0.356.h5

Scale 32

In [12]:
(0.892 + 0.933 + 0.888 + 0.872 + 0.903) / 5

0.8976

In [13]:
(0.875  +  0.925  + 0.889 +  0.915  + 0.869 ) / 5

0.8946

In [None]:
score = lwlrap(np.concatenate(ys), np.concatenate(best_preds))[-1]

print(f'Local CV score {score:.3f}')