In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
cd ../src/

/tf/notebooks/src


## Intialization

### GPU

In [3]:
import tensorflow as tf
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

### Packages

In [4]:
try:
    import pandas
except:
    !pip install pandas iterative-stratification nlpaug==0.0.20 tqdm click tensorflow_probability==0.11.1 tf2_resnets tensorflow_addons==0.11.1 image-classifiers==0.2.2

### Imports

In [5]:
import os
import random
import logging
import warnings

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_addons as tfa
import matplotlib.pyplot as plt

from dataloaders.train import BalancedMelSampler
from dataloaders.val import MelSampler
from dataloaders.utils import csv_to_dict

from losses import NpairsLoss, MovingAverageBCE
from metrics import TFLWLRAP
from split_data import get_split
from train import get_model, get_callbacks
from models import NUM_FRAMES, Classifier, DeepMetricLearning

 The versions of TensorFlow you are currently using is 2.4.1 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


In [6]:
from params import *
from utils.logger import prepare_log_folder, create_logger

### Setup

In [7]:
os.environ["TF_DETERMINISTIC_OPS"] = "1"
os.environ["SM_FRAMEWORK"] = "tf.keras"

physical_devices = tf.config.list_physical_devices("GPU")
for i in range(len(physical_devices)):
    tf.config.experimental.set_memory_growth(physical_devices[i], True)
tf.config.optimizer.set_experimental_options({"auto_mixed_precision": True})

random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

## Main

In [8]:
def main(fold_idx=0, batch_size=64, saved_path="", pretrained_path="", model_name=""):
    os.makedirs(os.path.join(saved_path, f"fold{fold_idx}"), exist_ok=True)

    print(' -> Preparing Data \n')
    
    train_data = pd.read_csv("../data/new_train_tp.csv")

    train_index, val_index = get_split(fold=fold_idx)
    
    fold_train_dict = csv_to_dict(train_data.iloc[train_index])
    fold_valid_dict = csv_to_dict(train_data.iloc[val_index])

    balanced_train_data_loader = BalancedMelSampler(
        fold_train_dict,
        batch_size=batch_size,
        max_length=NUM_FRAMES,
        n_classes=24,
        use_cutmix=True,
        n_classes_in_batch=8,
    )

    valid_data_loader = MelSampler(
        fold_valid_dict,
        batch_size=batch_size,
        n_classes=24,
        max_length=NUM_FRAMES,
    )

    print(' -> Preparing Model \n')
    model = get_model(
        model_name=model_name,
        saved_path=saved_path,
        pretrained_with_contrastive=False,
        pretrained_path=pretrained_path,
    )
    model._build()
    
    scheduler = tfa.optimizers.Triangular2CyclicalLearningRate(
        initial_learning_rate=1e-4,
        maximal_learning_rate=1e-3,
        step_size=50,
    )
    
    optimizer = tf.keras.mixed_precision.experimental.LossScaleOptimizer(
        tfa.optimizers.Lookahead(
            tf.keras.optimizers.Adam(learning_rate=scheduler),
            10,
            0.5,
        ),
        "dynamic",
    )
    
    step_per_epoch = int((len(fold_train_dict)) / balanced_train_data_loader.batch_size)
    ma_bce = MovingAverageBCE(
        train_data.iloc[train_index],
        start_apply_step=20 * step_per_epoch,
        momentum=0.9,
        name="moving_average_loss",
    )
    
    model.compile(
        optimizer=optimizer,
        metrics=[TFLWLRAP(num_classes=24, name="lwlrap")],
        metric_loss_fn=NpairsLoss(temperature=0.1, name="n_pairs"),
        classification_loss_fn=tf.keras.losses.BinaryCrossentropy(from_logits=True),
        moving_average_bce=ma_bce,
    )   

    print(' -> Training Model \n')

    callbacks = get_callbacks(False, fold_idx, saved_path=saved_path)
    steps_per_epoch = int((len(fold_train_dict)) / balanced_train_data_loader.batch_size)
    
    model.fit(
        balanced_train_data_loader,
        steps_per_epoch=steps_per_epoch,
        epochs=100,
        validation_data=valid_data_loader,
        callbacks=callbacks,
        verbose=2
    )

In [9]:
DEBUG = False
pretrained_folder = "../logs/2021-02-04/3/"   # simple
log_folder = "../logs/"

In [10]:
MODELS = [
#     'densenet121',
    'resnet18',
    'resnet34',
    'resnext50',
    'efficientnetb2',
    "xception",
]

BIGGER_MODELS = [  # BS = 32
    'resnest50',
    'efficientnetb3',
#     'efficientnetb4',
]

In [11]:
PRETRAINED_FOLDERS = {
    'densenet121': "../logs/2021-02-04/3/",
    'resnet18': "../logs/2021-02-13/14/",
    'resnet34': "../logs/2021-02-14/0/",
    'resnext50': "../logs/2021-02-14/1/",
    'efficientnetb2': "../logs/2021-02-14/2/",
    "xception": "../logs/2021-02-14/3/",
    "resnest50": "../logs/2021-02-14/5/",
    "efficientnetb3": "../logs/2021-02-14/6/",
}

In [None]:
for model_name in MODELS:
    print(f"\n\nModel {model_name}\n\n")
    if not DEBUG:
        log_folder = prepare_log_folder(LOG_PATH)
        print(f'Logging results to {log_folder}')
        create_logger(directory=log_folder, name="logs.txt")

    for fold_idx in range(5):
        tf.keras.backend.clear_session()
        print(f"\n-------------   Fold {fold_idx + 1} / {5}  -------------\n")

        main(
            model_name=model_name,
            fold_idx=fold_idx, 
            saved_path=log_folder, 
            pretrained_path=PRETRAINED_FOLDERS[model_name] + f"pretrained_best_fold{fold_idx}.h5",
        )

        if DEBUG:
            break



Model resnet18


Logging results to ../logs/2021-02-14/8/

-------------   Fold 1 / 5  -------------

 -> Preparing Data 

 -> Preparing Model 

 -> Loading weights from ../logs/2021-02-13/14/pretrained_best_fold0.h5

  opt = tf.keras.mixed_precision.experimental.LossScaleOptimizer(opt)
 -> Training Model 

Epoch 1/100
Instructions for updating:
Use fn_output_signature instead
15/15 - 17s - loss: 0.4637 - lwlrap: 0.2052 - val_loss: 0.3933 - val_lwlrap: 0.3053
Epoch 2/100
15/15 - 4s - loss: 0.1839 - lwlrap: 0.2351 - val_loss: 0.3141 - val_lwlrap: 0.4787
Epoch 3/100
15/15 - 3s - loss: 0.1554 - lwlrap: 0.3377 - val_loss: 0.1837 - val_lwlrap: 0.5881
Epoch 4/100
15/15 - 3s - loss: 0.1662 - lwlrap: 0.4544 - val_loss: 0.1741 - val_lwlrap: 0.7189
Epoch 5/100
15/15 - 3s - loss: 0.2670 - lwlrap: 0.5448 - val_loss: 0.1195 - val_lwlrap: 0.6729
Epoch 6/100
15/15 - 3s - loss: 0.2360 - lwlrap: 0.6010 - val_loss: 0.1111 - val_lwlrap: 0.7979
Epoch 7/100
15/15 - 3s - loss: 0.2027 - lwlrap: 0.6246 - va

In [None]:
for model_name in BIGGER_MODELS:
    print(f"\n\nModel {model_name}\n\n")
    if not DEBUG:
        log_folder = prepare_log_folder(LOG_PATH)
        print(f'Logging results to {log_folder}')
        create_logger(directory=log_folder, name="logs.txt")

    for fold_idx in range(5):
        tf.keras.backend.clear_session()
        print(f"\n-------------   Fold {fold_idx + 1} / {5}  -------------\n")

        main(
            model_name=model_name,
            batch_size=32,
            fold_idx=fold_idx, 
            saved_path=log_folder, 
            pretrained_path=PRETRAINED_FOLDERS[model_name] + f"pretrained_best_fold{fold_idx}.h5",
        )

        if DEBUG:
            break