In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
cd ../src/

/tf/notebooks/src


## Intialization

In [3]:
import tensorflow as tf
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

### Packages

In [4]:
try:
    import pandas
except:
    !pip install pandas iterative-stratification nlpaug==0.0.20 tqdm click tensorflow_probability==0.11.1 tf2_resnets tensorflow_addons==0.11.1 image-classifiers==0.2.2

### Imports

In [8]:
import os
import glob
import random
import logging
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

from sklearn.metrics import *

from train import get_model
from split_data import get_split
from metrics import LwlrapAccumulator
from dataloaders.val import MelSampler
from dataloaders.utils import csv_to_dict

In [9]:
from params import *
from utils.metric import lwlrap

## Main

In [10]:
def run_multi_scale_eval(checkpoints_path, fold, scales):
    """
    This function will compute each species's lrap at different scale.
    The idea is, for each species in test-set, we will use its predicted
    value at the scale that maximize its lrap in eval set. in the case
    that the maximum lrap value is achieved at many different scales, let take a
    maximum scale because the smaller scale, the more false positive samples.
    """
    train_data = pd.read_csv("../data/new_train_tp.csv")
    _, val_index = get_split(fold=fold)
    valid_dataset_csv = train_data.iloc[val_index]

    valid_data_loader = MelSampler(
        csv_to_dict(valid_dataset_csv),
        cache=True,
        batch_size=64,
        n_classes=24,
        is_train=False,
        use_cutmix=False,
        shuffle_aug=False,
        max_length=384,
    )
    all_checkpoints = sorted(
        glob.glob(os.path.join(checkpoints_path, f"fold{fold}", "model-*.h5"))
    )

    model = get_model(
        saved_path=checkpoints_path,
        pretrained_with_contrastive=False,
        pretrained_path=all_checkpoints[-1],
    )
    
#     model.load_weights(all_checkpoints[-1])
    
    lwlrap_at_scale = np.zeros((len(scales), NUM_CLASSES))
    preds = []
    seg_preds = []

    for s, max_length in enumerate(scales):
        valid_data_loader.max_length = max_length
        clip_preds, seg_pred = model.predict(valid_data_loader, verbose=0)  # [B, 24]
#         clip_preds = tf.nn.sigmoid(clip_preds).numpy()
        preds.append(clip_preds)
        seg_preds.append(seg_pred)

        # compute gts
        gts = []
        for i in range(len(valid_dataset_csv)):
            gts.append(valid_dataset_csv.iloc[i]["species_id"])
        gts = tf.keras.utils.to_categorical(gts, 24)

        class_lwlrap, _ , score = lwlrap(gts, clip_preds)
        
        lwlrap_at_scale[s] = class_lwlrap
        print(f"Scale {scales[s]}:\t{score:.3f}")            

    np.save(
        os.path.join(checkpoints_path, f"lwlrap_at_scale_{fold}.npy"),
        lwlrap_at_scale,
    )
    
    return lwlrap_at_scale, np.array(preds), seg_preds, gts

In [15]:
CP_FOLDER = LOG_PATH + "2021-01-28/4/"  # 0.898
# CP_FOLDER = LOG_PATH + "2021-01-28/12/"  # 0.891
# CP_FOLDER = LOG_PATH + "2021-01-28/14/"  # 0.897
CP_FOLDER = LOG_PATH + "2021-01-28/15/"  # 0.907
CP_FOLDER = LOG_PATH + "2021-01-31/11/"  # 0.910
# CP_FOLDER = LOG_PATH + "2021-01-31/12/"  # 0.912
CP_FOLDER = LOG_PATH + "2021-02-01/20/"  # 0.926

In [18]:
lwlraps = []
preds = []
seg_preds = []
best_preds = []
ys = []

for fold_idx in range(5):
    if fold_idx in [0, 2, 4]:
        tf.keras.backend.clear_session()
    print(f"\n-------------   Fold {fold_idx + 1} / {5}  -------------\n")
    lwlrap_at_scale, pred, seg_pred, y = run_multi_scale_eval(CP_FOLDER, fold_idx, SCALES)
    
    best_pred = []
    for c in range(NUM_CLASSES):  # extended argmax
        best_score = lwlrap_at_scale[:, c].max()
        best_indexes = [i for i, j in enumerate(lwlrap_at_scale[:, c]) if j == best_score]

        class_preds = [pred[idx, :, c] for idx in best_indexes]
#         best_pred.append(np.mean(class_preds, 0))
        best_pred.append(class_preds[-1])
    best_pred = np.array(best_pred).T

#     best_pred = np.array([pred[lwlrap_at_scale[:, i].argmax(), :, i] for i in range(NUM_CLASSES)]).T  # argmax

    score = lwlrap(y, best_pred)[-1]

    print(f'\nMulti-scale : {score:.3f}')
    
    lwlraps.append(lwlrap_at_scale)
    best_preds.append(best_pred)
    seg_preds.append(seg_pred)
    preds.append(pred)
    ys.append(y)
    


#     break


-------------   Fold 1 / 5  -------------

 -> Loading weights from ../logs/2021-02-01/20/fold0/model-0.878-0.117.h5

Scale 32:	0.695
Scale 64:	0.828
Scale 128:	0.868
Scale 192:	0.867
Scale 256:	0.886
Scale 320:	0.882
Scale 384:	0.868
Scale 448:	0.864
Scale 512:	0.857

Multi-scale : 0.892

-------------   Fold 2 / 5  -------------

 -> Loading weights from ../logs/2021-02-01/20/fold1/model-0.921-0.039.h5

Scale 32:	0.640
Scale 64:	0.856
Scale 128:	0.911
Scale 192:	0.923
Scale 256:	0.927
Scale 320:	0.909
Scale 384:	0.917
Scale 448:	0.914
Scale 512:	0.919

Multi-scale : 0.930

-------------   Fold 3 / 5  -------------

 -> Loading weights from ../logs/2021-02-01/20/fold2/model-0.879-0.040.h5

Scale 32:	0.542
Scale 64:	0.745
Scale 128:	0.868
Scale 192:	0.884
Scale 256:	0.883
Scale 320:	0.877
Scale 384:	0.875
Scale 448:	0.874
Scale 512:	0.868

Multi-scale : 0.906

-------------   Fold 4 / 5  -------------

 -> Loading weights from ../logs/2021-02-01/20/fold3/model-0.905-0.029.h5

Scale 32

In [20]:
score = lwlrap(np.concatenate(ys), np.concatenate(best_preds))[-1]

print(f'Local CV score {score:.3f}')

Local CV score 0.907


In [14]:
# for scale, pred in zip(SCALES, seg_preds[0]):
    
#     if scale != 256:
#         continue
        
#     for i in range(pred.shape[0]):
#         plt.figure(figsize=(12, 8))
#         for j in range(24):
#             plt.plot(pred[i][j], label=f'class {j}')
#             plt.ylim(-0.01, 1.01)
            
#         plt.legend()
#         plt.title(f'{scale} - {i}')
#         plt.show()
        
# #     break