In [1]:
import errno
import glob
import json
import os
import re
import shutil
from types import SimpleNamespace
import cv2
import torch
import warnings
from lightning_objects import LightningModel
warnings.filterwarnings('ignore')
from config import Configuration
import pandas as pd
from common_utils import stratify_split, make_holdout_df, set_seeds
from train_manager import TrainManager

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
def main(experiment_name: str, debug, resume=False,
         finetune=False, freeze_bn=True, freeze_feature_extractor=False):

    experiment_dir = os.path.abspath(f'trained-models/{experiment_name}')
    print('Experiment directory', experiment_dir)

    try:
        # -------- SETUP --------
        checkpoint_params = None
        finetune_model_fnames = None
        folds_df, holdout_df = None, None

        if not resume and not finetune: # totally new experiment
            make_experiment_directory(experiment_dir)
            config = Configuration()
            config.debug = debug
            set_seeds(config.seed)

            # -------- LOAD DATA FROM TRAIN FILE --------
            data_df = pd.read_csv(config.data_dir + '/train.csv', engine='python')
            data_df, holdout_df = make_holdout_df(data_df, seed=config.seed)
            folds_df = stratify_split(data_df, config.fold_num, config.seed, config.target_col)

            # -------- SAVE FILES (for experiment state) --------
            folds_df.to_csv(experiment_dir + '/folds.csv', index=False)
            # save holdout to a csv file for final inference (so we don't run inference on training examples)
            holdout_df.to_csv(experiment_dir + '/holdout.csv', index=False)
            with open(experiment_dir + '/experiment_config.json', 'w') as f:
                json.dump(config.__dict__, f)
        elif resume or finetune:
            # LOAD DATA FROM SAVED FILES
            with open(experiment_dir + '/experiment_config.json', 'r') as f:
                config = json.load(f, object_hook=lambda d: SimpleNamespace(**d))
                set_seeds(config.seed)
                config.debug = debug

            folds_df = pd.read_csv(experiment_dir + '/folds.csv', engine='python')
            holdout_df = pd.read_csv(experiment_dir + '/holdout.csv', engine='python')

            if finetune and not resume:
                print('finetuning...')
                # verify there are checkpoints to fine tune
                finetune_model_fnames = glob.glob(experiment_dir + '/*fold*.ckpt')
                assert len(finetune_model_fnames) > 0
                finetune_model_fnames.sort()

                # make new directory for tuning experiment with files from training run 1
                make_experiment_directory(experiment_dir + '_tune')
                for f in os.listdir(experiment_dir):
                    print(f"copying {f} to {experiment_dir + '_tune'}")
                    shutil.copy2(experiment_dir + '/' + f, experiment_dir + '_tune')
                experiment_dir += '_tune'
                experiment_name += '_tune'
            else:
                print('resuming from last checkpoint...')
                checkpoint_params = get_checkpoint_params(experiment_dir, resume, config.model_arch)

        assert holdout_df is not None, 'holdout_df is None'
        assert folds_df is not None, 'folds_df is None'

        # cv2 multithreading seems to go into deadlock with PyTorch data loaders
        if config.num_workers > 0:
            cv2.setNumThreads(0)

        trainer = TrainManager(experiment_name=experiment_name, experiment_dir=experiment_dir,
                               folds_df=folds_df, holdout_df=holdout_df,
                               checkpoint_params=checkpoint_params, config=config,
                               finetune=finetune, freeze_bn=freeze_bn,
                               freeze_feature_extractor=freeze_feature_extractor,
                               finetune_model_fnames=finetune_model_fnames)
        trainer.run()
    finally:
        torch.cuda.empty_cache()

def make_experiment_directory(name):
    try:
        os.makedirs(name)
    except FileExistsError as e:
        print('Experiment already exists. Be sure to resume training appropriately or start a new experiment.')
        if e.errno == errno.EEXIST: raise


def get_checkpoint_params(basename, resume, model_arch):
    checkpoint_params = None
    if resume:
        checkpoint_params = {}
        model_filenames = glob.glob(basename + '/*fold*.ckpt')
        trained_folds = [re.findall(r'fold\d+', f)[0][len('fold'):] for f in model_filenames]
        most_recent_fold = int(max(trained_folds)) if len(trained_folds) > 0 else 0

        checkpoint_params['restart_from'] = most_recent_fold
        checkpoint_params['checkpoint_file_path'] = f'{basename}/{model_arch}_fold{most_recent_fold}.pth'
        #checkpoint_params['checkpoint_file_path'] = f'{basename}/{model_arch}_fold{1}.pth'

    return checkpoint_params

In [None]:
if __name__ == '__main__':
    try:
        debug = False
        print('Running in debug mode:', debug)
        main(experiment_name='adabound_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0', debug=debug,
             resume=False, finetune=False, freeze_bn=True, freeze_feature_extractor=False)
    except KeyboardInterrupt:
        pass

Running in debug mode: False
Experiment directory /opt/favordata/AI/Felix/kaggle-cassava/trained-models/adabound_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0
folds_df len 18187, holdout_df len 3210
Training fold 0
Class sample counts [ 758 1470 1623 8933 1765]
After class sample counts [2274 2940 3732 8933 4765]


GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.

  | Name           | Type           | Params
--------------------------------------------------
0 | valid_accuracy | Accuracy       | 0     
1 | test_accuracy  | Accuracy       | 0     
2 | criterion      | BiTemperedLoss | 0     
3 | model          | EfficientNet   | 17.6 M
--------------------------------------------------
17.4 M    Trainable params
125 K     Non-trainable params
17.6 M    Total params
Finding best initial lr: 100%|██████████| 100/100 [01:57<00:00,  1.16s/it]Restored states from the checkpoint file at /opt/favordata/AI/Felix/kaggle-cassava/trained-models/adabound_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0/lr_find_temp_model.ckpt
Learning rate set to 0.0019054607179632484

  | Name           | Type           | Params
--------------------------------------------------
0 | valid_accuracy | Accuracy       | 0     
1 | test

Epoch 1:  47%|████▋     | 451/967 [02:16<22:33,  2.62s/it, loss=0.202, v_num=1, val_loss=0.487, val_acc=0.258, train_loss=0.233]    

Finding best initial lr: 100%|██████████| 100/100 [04:17<00:00,  2.58s/it]

Epoch 1:  47%|████▋     | 452/967 [02:16<22:09,  2.58s/it, loss=0.203, v_num=1, val_loss=0.487, val_acc=0.258, train_loss=0.164]




Epoch 1:  94%|█████████▍| 910/967 [04:36<00:30,  1.85it/s, loss=0.174, v_num=1, val_loss=0.487, val_acc=0.258, train_loss=0.0432]
Validating: 0it [00:00, ?it/s][A
Epoch 1:  94%|█████████▍| 912/967 [04:37<00:29,  1.85it/s, loss=0.174, v_num=1, val_loss=0.487, val_acc=0.258, train_loss=0.0432]
Validating:   4%|▎         | 2/57 [00:01<00:48,  1.14it/s][A
Epoch 1:  95%|█████████▍| 914/967 [04:38<00:28,  1.85it/s, loss=0.174, v_num=1, val_loss=0.487, val_acc=0.258, train_loss=0.0432]
Validating:   7%|▋         | 4/57 [00:02<00:25,  2.09it/s][A
Epoch 1:  95%|█████████▍| 916/967 [04:39<00:27,  1.85it/s, loss=0.174, v_num=1, val_loss=0.487, val_acc=0.258, train_loss=0.0432]
Validating:  11%|█         | 6/57 [00:03<00:18,  2.73it/s][A
Epoch 1:  95%|█████████▍| 918/967 [04:39<00:26,  1.86it/s, loss=0.174, v_num=1, val_loss=0.487, val_acc=0.258, train_loss=0.0432]
Validating:  14%|█▍        | 8/57 [00:03<00:16,  2.96it/s][A
Epoch 1:  95%|█████████▌| 920/967 [04:40<00:25,  1.86it/s, loss=0.17

Epoch 1, global step 228: val_loss reached 0.11401 (best 0.11401), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/adabound_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.114_val_acc=0.872_fold0.ckpt" as top 1


Epoch 1: 100%|██████████| 967/967 [04:57<00:00,  1.91it/s, loss=0.174, v_num=1, val_loss=0.114, val_acc=0.872, train_loss=0.266] 
Epoch 2:  94%|█████████▍| 910/967 [04:35<00:30,  1.86it/s, loss=0.169, v_num=1, val_loss=0.114, val_acc=0.872, train_loss=0.0891]    
Validating: 0it [00:00, ?it/s][A
Epoch 2:  94%|█████████▍| 912/967 [04:36<00:29,  1.85it/s, loss=0.169, v_num=1, val_loss=0.114, val_acc=0.872, train_loss=0.0891]
Validating:   4%|▎         | 2/57 [00:02<00:48,  1.13it/s][A
Epoch 2:  95%|█████████▍| 914/967 [04:37<00:28,  1.86it/s, loss=0.169, v_num=1, val_loss=0.114, val_acc=0.872, train_loss=0.0891]
Validating:   7%|▋         | 4/57 [00:02<00:25,  2.06it/s][A
Epoch 2:  95%|█████████▍| 916/967 [04:38<00:27,  1.86it/s, loss=0.169, v_num=1, val_loss=0.114, val_acc=0.872, train_loss=0.0891]
Validating:  11%|█         | 6/57 [00:03<00:18,  2.76it/s][A
Epoch 2:  95%|█████████▍| 918/967 [04:38<00:26,  1.86it/s, loss=0.169, v_num=1, val_loss=0.114, val_acc=0.872, train_loss=0.08

Epoch 2, global step 456: val_loss reached 0.10828 (best 0.10828), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/adabound_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.108_val_acc=0.882_fold0.ckpt" as top 1


Epoch 2: 100%|██████████| 967/967 [04:54<00:00,  1.93it/s, loss=0.169, v_num=1, val_loss=0.108, val_acc=0.882, train_loss=0.181] 
Epoch 3:  94%|█████████▍| 910/967 [04:34<00:30,  1.86it/s, loss=0.166, v_num=1, val_loss=0.108, val_acc=0.882, train_loss=0.223]     
Validating: 0it [00:00, ?it/s][A
Epoch 3:  94%|█████████▍| 912/967 [04:36<00:29,  1.86it/s, loss=0.166, v_num=1, val_loss=0.108, val_acc=0.882, train_loss=0.223]
Validating:   4%|▎         | 2/57 [00:01<00:47,  1.15it/s][A
Epoch 3:  95%|█████████▍| 914/967 [04:36<00:28,  1.86it/s, loss=0.166, v_num=1, val_loss=0.108, val_acc=0.882, train_loss=0.223]
Validating:   7%|▋         | 4/57 [00:02<00:25,  2.10it/s][A
Epoch 3:  95%|█████████▍| 916/967 [04:37<00:27,  1.86it/s, loss=0.166, v_num=1, val_loss=0.108, val_acc=0.882, train_loss=0.223]
Validating:  11%|█         | 6/57 [00:03<00:18,  2.76it/s][A
Epoch 3:  95%|█████████▍| 918/967 [04:37<00:26,  1.87it/s, loss=0.166, v_num=1, val_loss=0.108, val_acc=0.882, train_loss=0.223]


Epoch 3, global step 684: val_loss reached 0.10250 (best 0.10250), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/adabound_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.102_val_acc=0.889_fold0.ckpt" as top 1


Epoch 3: 100%|██████████| 967/967 [04:53<00:00,  1.94it/s, loss=0.166, v_num=1, val_loss=0.102, val_acc=0.889, train_loss=0.213]
Epoch 4:  94%|█████████▍| 910/967 [04:34<00:30,  1.86it/s, loss=0.16, v_num=1, val_loss=0.102, val_acc=0.889, train_loss=0.152]      
Validating: 0it [00:00, ?it/s][A
Epoch 4:  94%|█████████▍| 912/967 [04:36<00:29,  1.86it/s, loss=0.16, v_num=1, val_loss=0.102, val_acc=0.889, train_loss=0.152]
Validating:   4%|▎         | 2/57 [00:01<00:44,  1.22it/s][A
Epoch 4:  95%|█████████▍| 914/967 [04:37<00:28,  1.86it/s, loss=0.16, v_num=1, val_loss=0.102, val_acc=0.889, train_loss=0.152]
Validating:   7%|▋         | 4/57 [00:02<00:24,  2.19it/s][A
Epoch 4:  95%|█████████▍| 916/967 [04:37<00:27,  1.86it/s, loss=0.16, v_num=1, val_loss=0.102, val_acc=0.889, train_loss=0.152]
Validating:  11%|█         | 6/57 [00:02<00:18,  2.81it/s][A
Epoch 4:  95%|█████████▍| 918/967 [04:38<00:26,  1.87it/s, loss=0.16, v_num=1, val_loss=0.102, val_acc=0.889, train_loss=0.152]
Valid

Epoch 4, step 912: val_loss was not in top 1


Epoch 4: 100%|██████████| 967/967 [04:52<00:00,  1.94it/s, loss=0.16, v_num=1, val_loss=0.105, val_acc=0.886, train_loss=0.184]
Epoch 5:  94%|█████████▍| 910/967 [04:34<00:30,  1.86it/s, loss=0.145, v_num=1, val_loss=0.105, val_acc=0.886, train_loss=0.175]    
Validating: 0it [00:00, ?it/s][A
Epoch 5:  94%|█████████▍| 912/967 [04:36<00:29,  1.85it/s, loss=0.145, v_num=1, val_loss=0.105, val_acc=0.886, train_loss=0.175]
Validating:   4%|▎         | 2/57 [00:02<00:49,  1.11it/s][A
Epoch 5:  95%|█████████▍| 914/967 [04:37<00:28,  1.86it/s, loss=0.145, v_num=1, val_loss=0.105, val_acc=0.886, train_loss=0.175]
Validating:   7%|▋         | 4/57 [00:02<00:26,  2.04it/s][A
Epoch 5:  95%|█████████▍| 916/967 [04:37<00:27,  1.86it/s, loss=0.145, v_num=1, val_loss=0.105, val_acc=0.886, train_loss=0.175]
Validating:  11%|█         | 6/57 [00:03<00:18,  2.73it/s][A
Epoch 5:  95%|█████████▍| 918/967 [04:38<00:26,  1.87it/s, loss=0.145, v_num=1, val_loss=0.105, val_acc=0.886, train_loss=0.175]
Val

Epoch 5, global step 1140: val_loss reached 0.10114 (best 0.10114), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/adabound_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.101_val_acc=0.890_fold0.ckpt" as top 1


Epoch 5: 100%|██████████| 967/967 [04:53<00:00,  1.94it/s, loss=0.145, v_num=1, val_loss=0.101, val_acc=0.89, train_loss=0.162] 
Epoch 6:  94%|█████████▍| 910/967 [04:34<00:30,  1.86it/s, loss=0.158, v_num=1, val_loss=0.101, val_acc=0.89, train_loss=0.0823]    
Validating: 0it [00:00, ?it/s][A
Epoch 6:  94%|█████████▍| 912/967 [04:36<00:29,  1.85it/s, loss=0.158, v_num=1, val_loss=0.101, val_acc=0.89, train_loss=0.0823]
Validating:   4%|▎         | 2/57 [00:02<00:48,  1.14it/s][A
Epoch 6:  95%|█████████▍| 914/967 [04:37<00:28,  1.86it/s, loss=0.158, v_num=1, val_loss=0.101, val_acc=0.89, train_loss=0.0823]
Validating:   7%|▋         | 4/57 [00:02<00:26,  2.04it/s][A
Epoch 6:  95%|█████████▍| 916/967 [04:37<00:27,  1.86it/s, loss=0.158, v_num=1, val_loss=0.101, val_acc=0.89, train_loss=0.0823]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.57it/s][A
Epoch 6:  95%|█████████▍| 918/967 [04:38<00:26,  1.86it/s, loss=0.158, v_num=1, val_loss=0.101, val_acc=0.89, train_loss=0.0823]
Va

Epoch 6, step 1368: val_loss was not in top 1


Epoch 6: 100%|██████████| 967/967 [04:52<00:00,  1.94it/s, loss=0.158, v_num=1, val_loss=0.108, val_acc=0.88, train_loss=0.225] 
Epoch 7:  94%|█████████▍| 910/967 [04:35<00:30,  1.86it/s, loss=0.151, v_num=1, val_loss=0.108, val_acc=0.88, train_loss=0.0934]    
Validating: 0it [00:00, ?it/s][A
Epoch 7:  94%|█████████▍| 912/967 [04:37<00:29,  1.85it/s, loss=0.151, v_num=1, val_loss=0.108, val_acc=0.88, train_loss=0.0934]
Validating:   4%|▎         | 2/57 [00:02<00:51,  1.07it/s][A
Epoch 7:  95%|█████████▍| 914/967 [04:37<00:28,  1.86it/s, loss=0.151, v_num=1, val_loss=0.108, val_acc=0.88, train_loss=0.0934]
Validating:   7%|▋         | 4/57 [00:02<00:26,  1.97it/s][A
Epoch 7:  95%|█████████▍| 916/967 [04:38<00:27,  1.86it/s, loss=0.151, v_num=1, val_loss=0.108, val_acc=0.88, train_loss=0.0934]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.68it/s][A
Epoch 7:  95%|█████████▍| 918/967 [04:38<00:26,  1.86it/s, loss=0.151, v_num=1, val_loss=0.108, val_acc=0.88, train_loss=0.0934]
Va

Epoch 7, step 1596: val_loss was not in top 1


Epoch 7: 100%|██████████| 967/967 [04:53<00:00,  1.94it/s, loss=0.151, v_num=1, val_loss=0.104, val_acc=0.887, train_loss=0.152]
Epoch 8:  94%|█████████▍| 910/967 [04:34<00:30,  1.86it/s, loss=0.15, v_num=1, val_loss=0.104, val_acc=0.887, train_loss=0.116]      
Validating: 0it [00:00, ?it/s][A
Epoch 8:  94%|█████████▍| 912/967 [04:36<00:29,  1.86it/s, loss=0.15, v_num=1, val_loss=0.104, val_acc=0.887, train_loss=0.116]
Validating:   4%|▎         | 2/57 [00:01<00:44,  1.25it/s][A
Epoch 8:  95%|█████████▍| 914/967 [04:37<00:28,  1.86it/s, loss=0.15, v_num=1, val_loss=0.104, val_acc=0.887, train_loss=0.116]
Validating:   7%|▋         | 4/57 [00:02<00:23,  2.21it/s][A
Epoch 8:  95%|█████████▍| 916/967 [04:37<00:27,  1.86it/s, loss=0.15, v_num=1, val_loss=0.104, val_acc=0.887, train_loss=0.116]
Validating:  11%|█         | 6/57 [00:03<00:18,  2.74it/s][A
Epoch 8:  95%|█████████▍| 918/967 [04:38<00:26,  1.87it/s, loss=0.15, v_num=1, val_loss=0.104, val_acc=0.887, train_loss=0.116]
Valid

Epoch 8, step 1824: val_loss was not in top 1


Epoch 8: 100%|██████████| 967/967 [04:53<00:00,  1.94it/s, loss=0.15, v_num=1, val_loss=0.108, val_acc=0.88, train_loss=0.0819]
Epoch 9:  94%|█████████▍| 910/967 [04:34<00:30,  1.86it/s, loss=0.133, v_num=1, val_loss=0.108, val_acc=0.88, train_loss=0.101]     
Validating: 0it [00:00, ?it/s][A
Epoch 9:  94%|█████████▍| 912/967 [04:36<00:29,  1.85it/s, loss=0.133, v_num=1, val_loss=0.108, val_acc=0.88, train_loss=0.101]
Validating:   4%|▎         | 2/57 [00:02<00:47,  1.15it/s][A
Epoch 9:  95%|█████████▍| 914/967 [04:37<00:28,  1.86it/s, loss=0.133, v_num=1, val_loss=0.108, val_acc=0.88, train_loss=0.101]
Validating:   7%|▋         | 4/57 [00:02<00:26,  2.00it/s][A
Epoch 9:  95%|█████████▍| 916/967 [04:37<00:27,  1.86it/s, loss=0.133, v_num=1, val_loss=0.108, val_acc=0.88, train_loss=0.101]
Validating:  11%|█         | 6/57 [00:03<00:18,  2.70it/s][A
Epoch 9:  95%|█████████▍| 918/967 [04:38<00:26,  1.86it/s, loss=0.133, v_num=1, val_loss=0.108, val_acc=0.88, train_loss=0.101]
Validat

Epoch 9, step 2052: val_loss was not in top 1


Epoch 9: 100%|██████████| 967/967 [04:52<00:00,  1.94it/s, loss=0.133, v_num=1, val_loss=0.108, val_acc=0.881, train_loss=0.0035]
Epoch 10:  94%|█████████▍| 910/967 [04:35<00:30,  1.85it/s, loss=0.148, v_num=1, val_loss=0.108, val_acc=0.881, train_loss=0.0956]   
Validating: 0it [00:00, ?it/s][A
Epoch 10:  94%|█████████▍| 912/967 [04:37<00:29,  1.85it/s, loss=0.148, v_num=1, val_loss=0.108, val_acc=0.881, train_loss=0.0956]
Validating:   4%|▎         | 2/57 [00:02<00:49,  1.12it/s][A
Epoch 10:  95%|█████████▍| 914/967 [04:37<00:28,  1.85it/s, loss=0.148, v_num=1, val_loss=0.108, val_acc=0.881, train_loss=0.0956]
Validating:   7%|▋         | 4/57 [00:02<00:26,  2.01it/s][A
Epoch 10:  95%|█████████▍| 916/967 [04:38<00:27,  1.86it/s, loss=0.148, v_num=1, val_loss=0.108, val_acc=0.881, train_loss=0.0956]
Validating:  11%|█         | 6/57 [00:03<00:18,  2.71it/s][A
Epoch 10:  95%|█████████▍| 918/967 [04:39<00:26,  1.86it/s, loss=0.148, v_num=1, val_loss=0.108, val_acc=0.881, train_loss=

Epoch 10, global step 2280: val_loss reached 0.10074 (best 0.10074), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/adabound_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.101_val_acc=0.892_fold0.ckpt" as top 1


Epoch 10: 100%|██████████| 967/967 [04:54<00:00,  1.93it/s, loss=0.148, v_num=1, val_loss=0.101, val_acc=0.892, train_loss=0.397] 
Epoch 11:  94%|█████████▍| 910/967 [04:34<00:30,  1.86it/s, loss=0.143, v_num=1, val_loss=0.101, val_acc=0.892, train_loss=0.184]     
Validating: 0it [00:00, ?it/s][A
Epoch 11:  94%|█████████▍| 912/967 [04:36<00:29,  1.85it/s, loss=0.143, v_num=1, val_loss=0.101, val_acc=0.892, train_loss=0.184]
Validating:   4%|▎         | 2/57 [00:02<00:48,  1.13it/s][A
Epoch 11:  95%|█████████▍| 914/967 [04:37<00:28,  1.86it/s, loss=0.143, v_num=1, val_loss=0.101, val_acc=0.892, train_loss=0.184]
Validating:   7%|▋         | 4/57 [00:02<00:26,  2.02it/s][A
Epoch 11:  95%|█████████▍| 916/967 [04:37<00:27,  1.86it/s, loss=0.143, v_num=1, val_loss=0.101, val_acc=0.892, train_loss=0.184]
Validating:  11%|█         | 6/57 [00:03<00:18,  2.71it/s][A
Epoch 11:  95%|█████████▍| 918/967 [04:38<00:26,  1.86it/s, loss=0.143, v_num=1, val_loss=0.101, val_acc=0.892, train_loss=0

Epoch 11, step 2508: val_loss was not in top 1


Epoch 11: 100%|██████████| 967/967 [04:53<00:00,  1.94it/s, loss=0.143, v_num=1, val_loss=0.101, val_acc=0.891, train_loss=0.185]
Epoch 12:  94%|█████████▍| 910/967 [04:34<00:30,  1.86it/s, loss=0.132, v_num=1, val_loss=0.101, val_acc=0.891, train_loss=0.194]     
Validating: 0it [00:00, ?it/s][A
Epoch 12:  94%|█████████▍| 912/967 [04:36<00:29,  1.86it/s, loss=0.132, v_num=1, val_loss=0.101, val_acc=0.891, train_loss=0.194]
Validating:   4%|▎         | 2/57 [00:01<00:42,  1.29it/s][A
Epoch 12:  95%|█████████▍| 914/967 [04:36<00:28,  1.86it/s, loss=0.132, v_num=1, val_loss=0.101, val_acc=0.891, train_loss=0.194]
Validating:   7%|▋         | 4/57 [00:02<00:23,  2.26it/s][A
Epoch 12:  95%|█████████▍| 916/967 [04:37<00:27,  1.86it/s, loss=0.132, v_num=1, val_loss=0.101, val_acc=0.891, train_loss=0.194]
Validating:  11%|█         | 6/57 [00:02<00:18,  2.74it/s][A
Epoch 12:  95%|█████████▍| 918/967 [04:38<00:26,  1.87it/s, loss=0.132, v_num=1, val_loss=0.101, val_acc=0.891, train_loss=0.

Epoch 12, step 2736: val_loss was not in top 1


Epoch 12: 100%|██████████| 967/967 [04:52<00:00,  1.94it/s, loss=0.132, v_num=1, val_loss=0.102, val_acc=0.894, train_loss=0.308]
Epoch 13:  94%|█████████▍| 910/967 [04:34<00:30,  1.86it/s, loss=0.145, v_num=1, val_loss=0.102, val_acc=0.894, train_loss=0.079]     
Validating: 0it [00:00, ?it/s][A
Epoch 13:  94%|█████████▍| 912/967 [04:36<00:29,  1.86it/s, loss=0.145, v_num=1, val_loss=0.102, val_acc=0.894, train_loss=0.079]
Validating:   4%|▎         | 2/57 [00:01<00:43,  1.26it/s][A
Epoch 13:  95%|█████████▍| 914/967 [04:37<00:28,  1.86it/s, loss=0.145, v_num=1, val_loss=0.102, val_acc=0.894, train_loss=0.079]
Validating:   7%|▋         | 4/57 [00:02<00:23,  2.21it/s][A
Epoch 13:  95%|█████████▍| 916/967 [04:37<00:27,  1.86it/s, loss=0.145, v_num=1, val_loss=0.102, val_acc=0.894, train_loss=0.079]
Validating:  11%|█         | 6/57 [00:02<00:18,  2.82it/s][A
Epoch 13:  95%|█████████▍| 918/967 [04:38<00:26,  1.87it/s, loss=0.145, v_num=1, val_loss=0.102, val_acc=0.894, train_loss=0.

Epoch 13, global step 2964: val_loss reached 0.10060 (best 0.10060), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/adabound_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.101_val_acc=0.894_fold0.ckpt" as top 1


Epoch 13: 100%|██████████| 967/967 [04:53<00:00,  1.94it/s, loss=0.145, v_num=1, val_loss=0.101, val_acc=0.894, train_loss=0.374]
Epoch 14:  94%|█████████▍| 910/967 [04:35<00:30,  1.86it/s, loss=0.124, v_num=1, val_loss=0.101, val_acc=0.894, train_loss=0.0932]    
Validating: 0it [00:00, ?it/s][A
Epoch 14:  94%|█████████▍| 912/967 [04:36<00:29,  1.85it/s, loss=0.124, v_num=1, val_loss=0.101, val_acc=0.894, train_loss=0.0932]
Validating:   4%|▎         | 2/57 [00:01<00:46,  1.19it/s][A
Epoch 14:  95%|█████████▍| 914/967 [04:37<00:28,  1.86it/s, loss=0.124, v_num=1, val_loss=0.101, val_acc=0.894, train_loss=0.0932]
Validating:   7%|▋         | 4/57 [00:02<00:25,  2.07it/s][A
Epoch 14:  95%|█████████▍| 916/967 [04:38<00:27,  1.86it/s, loss=0.124, v_num=1, val_loss=0.101, val_acc=0.894, train_loss=0.0932]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.59it/s][A
Epoch 14:  95%|█████████▍| 918/967 [04:38<00:26,  1.86it/s, loss=0.124, v_num=1, val_loss=0.101, val_acc=0.894, train_loss

Epoch 14, step 3192: val_loss was not in top 1


Epoch 14: 100%|██████████| 967/967 [04:52<00:00,  1.94it/s, loss=0.124, v_num=1, val_loss=0.102, val_acc=0.891, train_loss=0.108] 
Epoch 15:  94%|█████████▍| 910/967 [04:34<00:30,  1.86it/s, loss=0.149, v_num=1, val_loss=0.102, val_acc=0.891, train_loss=0.0646]    
Validating: 0it [00:00, ?it/s][A
Epoch 15:  94%|█████████▍| 912/967 [04:36<00:29,  1.86it/s, loss=0.149, v_num=1, val_loss=0.102, val_acc=0.891, train_loss=0.0646]
Validating:   4%|▎         | 2/57 [00:01<00:45,  1.21it/s][A
Epoch 15:  95%|█████████▍| 914/967 [04:36<00:28,  1.86it/s, loss=0.149, v_num=1, val_loss=0.102, val_acc=0.891, train_loss=0.0646]
Validating:   7%|▋         | 4/57 [00:02<00:24,  2.17it/s][A
Epoch 15:  95%|█████████▍| 916/967 [04:37<00:27,  1.86it/s, loss=0.149, v_num=1, val_loss=0.102, val_acc=0.891, train_loss=0.0646]
Validating:  11%|█         | 6/57 [00:03<00:18,  2.70it/s][A
Epoch 15:  95%|█████████▍| 918/967 [04:37<00:26,  1.87it/s, loss=0.149, v_num=1, val_loss=0.102, val_acc=0.891, train_los

Epoch 15, step 3420: val_loss was not in top 1


Epoch 15: 100%|██████████| 967/967 [04:52<00:00,  1.94it/s, loss=0.149, v_num=1, val_loss=0.103, val_acc=0.889, train_loss=0.157] 
Epoch 16:  94%|█████████▍| 910/967 [04:34<00:30,  1.86it/s, loss=0.132, v_num=1, val_loss=0.103, val_acc=0.889, train_loss=0.0838]    
Validating: 0it [00:00, ?it/s][A
Epoch 16:  94%|█████████▍| 912/967 [04:36<00:29,  1.86it/s, loss=0.132, v_num=1, val_loss=0.103, val_acc=0.889, train_loss=0.0838]
Validating:   4%|▎         | 2/57 [00:01<00:42,  1.28it/s][A
Epoch 16:  95%|█████████▍| 914/967 [04:36<00:28,  1.86it/s, loss=0.132, v_num=1, val_loss=0.103, val_acc=0.889, train_loss=0.0838]
Validating:   7%|▋         | 4/57 [00:02<00:23,  2.25it/s][A
Epoch 16:  95%|█████████▍| 916/967 [04:37<00:27,  1.86it/s, loss=0.132, v_num=1, val_loss=0.103, val_acc=0.889, train_loss=0.0838]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.57it/s][A
Epoch 16:  95%|█████████▍| 918/967 [04:38<00:26,  1.87it/s, loss=0.132, v_num=1, val_loss=0.103, val_acc=0.889, train_los

Epoch 16, step 3648: val_loss was not in top 1


Epoch 16: 100%|██████████| 967/967 [04:53<00:00,  1.94it/s, loss=0.132, v_num=1, val_loss=0.104, val_acc=0.888, train_loss=0.047] 
Epoch 17:  94%|█████████▍| 910/967 [04:34<00:30,  1.86it/s, loss=0.138, v_num=1, val_loss=0.104, val_acc=0.888, train_loss=0.153]     
Validating: 0it [00:00, ?it/s][A
Epoch 17:  94%|█████████▍| 912/967 [04:36<00:29,  1.86it/s, loss=0.138, v_num=1, val_loss=0.104, val_acc=0.888, train_loss=0.153]
Validating:   4%|▎         | 2/57 [00:01<00:42,  1.30it/s][A
Epoch 17:  95%|█████████▍| 914/967 [04:36<00:28,  1.86it/s, loss=0.138, v_num=1, val_loss=0.104, val_acc=0.888, train_loss=0.153]
Validating:   7%|▋         | 4/57 [00:02<00:23,  2.28it/s][A
Epoch 17:  95%|█████████▍| 916/967 [04:37<00:27,  1.86it/s, loss=0.138, v_num=1, val_loss=0.104, val_acc=0.888, train_loss=0.153]
Validating:  11%|█         | 6/57 [00:03<00:21,  2.39it/s][A
Epoch 17:  95%|█████████▍| 918/967 [04:38<00:26,  1.86it/s, loss=0.138, v_num=1, val_loss=0.104, val_acc=0.888, train_loss=0

Epoch 17, step 3876: val_loss was not in top 1


Epoch 17: 100%|██████████| 967/967 [04:52<00:00,  1.94it/s, loss=0.138, v_num=1, val_loss=0.104, val_acc=0.889, train_loss=0.135]
Epoch 18:  94%|█████████▍| 910/967 [04:34<00:30,  1.86it/s, loss=0.117, v_num=1, val_loss=0.104, val_acc=0.889, train_loss=0.124]     
Validating: 0it [00:00, ?it/s][A
Epoch 18:  94%|█████████▍| 912/967 [04:36<00:29,  1.86it/s, loss=0.117, v_num=1, val_loss=0.104, val_acc=0.889, train_loss=0.124]
Validating:   4%|▎         | 2/57 [00:01<00:47,  1.16it/s][A
Epoch 18:  95%|█████████▍| 914/967 [04:37<00:28,  1.86it/s, loss=0.117, v_num=1, val_loss=0.104, val_acc=0.889, train_loss=0.124]
Validating:   7%|▋         | 4/57 [00:02<00:25,  2.08it/s][A
Epoch 18:  95%|█████████▍| 916/967 [04:37<00:27,  1.86it/s, loss=0.117, v_num=1, val_loss=0.104, val_acc=0.889, train_loss=0.124]
Validating:  11%|█         | 6/57 [00:03<00:18,  2.76it/s][A
Epoch 18:  95%|█████████▍| 918/967 [04:38<00:26,  1.87it/s, loss=0.117, v_num=1, val_loss=0.104, val_acc=0.889, train_loss=0.

Epoch 18, step 4104: val_loss was not in top 1


Epoch 18: 100%|██████████| 967/967 [04:52<00:00,  1.94it/s, loss=0.117, v_num=1, val_loss=0.103, val_acc=0.891, train_loss=0.0773]
Epoch 19:  94%|█████████▍| 910/967 [04:34<00:30,  1.86it/s, loss=0.102, v_num=1, val_loss=0.103, val_acc=0.891, train_loss=0.0982]    
Validating: 0it [00:00, ?it/s][A
Epoch 19:  94%|█████████▍| 912/967 [04:36<00:29,  1.86it/s, loss=0.102, v_num=1, val_loss=0.103, val_acc=0.891, train_loss=0.0982]
Validating:   4%|▎         | 2/57 [00:02<00:48,  1.13it/s][A
Epoch 19:  95%|█████████▍| 914/967 [04:37<00:28,  1.86it/s, loss=0.102, v_num=1, val_loss=0.103, val_acc=0.891, train_loss=0.0982]
Validating:   7%|▋         | 4/57 [00:02<00:25,  2.09it/s][A
Epoch 19:  95%|█████████▍| 916/967 [04:37<00:27,  1.86it/s, loss=0.102, v_num=1, val_loss=0.103, val_acc=0.891, train_loss=0.0982]
Validating:  11%|█         | 6/57 [00:03<00:22,  2.25it/s][A
Epoch 19:  95%|█████████▍| 918/967 [04:38<00:26,  1.86it/s, loss=0.102, v_num=1, val_loss=0.103, val_acc=0.891, train_los

Epoch 19, step 4332: val_loss was not in top 1


Epoch 19: 100%|██████████| 967/967 [04:53<00:00,  1.94it/s, loss=0.102, v_num=1, val_loss=0.106, val_acc=0.887, train_loss=0.0594]
                                                           [ATraining fold 1
Class sample counts [ 758 1471 1622 8933 1765]
After class sample counts [2274 2942 3730 8933 4765]


GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.

  | Name           | Type           | Params
--------------------------------------------------
0 | valid_accuracy | Accuracy       | 0     
1 | test_accuracy  | Accuracy       | 0     
2 | criterion      | BiTemperedLoss | 0     
3 | model          | EfficientNet   | 17.6 M
--------------------------------------------------
17.4 M    Trainable params
125 K     Non-trainable params
17.6 M    Total params

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s][A
Finding best initial lr:   1%|          | 1/100 [00:00<00:34,  2.84it/s][A
Finding best initial lr:   2%|▏         | 2/100 [00:01<01:23,  1.18it/s][A
Finding best initial lr:   3%|▎         | 3/100 [00:02<01:37,  1.00s/it][A
Finding best initial lr:   4%|▍         | 4/100 [00:03<01:43,  1.07s/it][A
Finding best initial lr:   5%|▌         | 5/100 [00:05<01:46,  1.12s/it][

Epoch 19: 100%|██████████| 967/967 [05:07<00:00,  1.85it/s, loss=0.102, v_num=1, val_loss=0.106, val_acc=0.887, train_loss=0.0594]


Finding best initial lr:   9%|▉         | 9/100 [00:09<01:46,  1.18s/it][A
Finding best initial lr:  10%|█         | 10/100 [00:11<01:46,  1.19s/it][A
Finding best initial lr:  11%|█         | 11/100 [00:12<01:45,  1.19s/it][A
Finding best initial lr:  12%|█▏        | 12/100 [00:13<01:44,  1.19s/it][A
Finding best initial lr:  13%|█▎        | 13/100 [00:14<01:43,  1.19s/it][A
Finding best initial lr:  14%|█▍        | 14/100 [00:15<01:42,  1.19s/it][A
Finding best initial lr:  15%|█▌        | 15/100 [00:17<01:40,  1.19s/it][A
Finding best initial lr:  16%|█▌        | 16/100 [00:18<01:39,  1.19s/it][A
Finding best initial lr:  17%|█▋        | 17/100 [00:19<01:38,  1.19s/it][A
Finding best initial lr:  18%|█▊        | 18/100 [00:20<01:37,  1.19s/it][A
Finding best initial lr:  19%|█▉        | 19/100 [00:21<01:36,  1.19s/it][A
Finding best initial lr:  20%|██        | 20/100 [00:23<01:35,  1.20s/it][A
Finding best initial lr:  21%|██        | 21/100 [00:24<01:34,  1.20s/it][A

Epoch 1:  94%|█████████▍| 910/967 [04:35<00:30,  1.86it/s, loss=0.17, v_num=2, val_loss=0.494, val_acc=0.273, train_loss=0.131]     
Validating: 0it [00:00, ?it/s][A
Epoch 1:  94%|█████████▍| 912/967 [04:37<00:29,  1.85it/s, loss=0.17, v_num=2, val_loss=0.494, val_acc=0.273, train_loss=0.131]
Validating:   4%|▎         | 2/57 [00:02<00:49,  1.12it/s][A
Epoch 1:  95%|█████████▍| 914/967 [04:37<00:28,  1.85it/s, loss=0.17, v_num=2, val_loss=0.494, val_acc=0.273, train_loss=0.131]
Validating:   7%|▋         | 4/57 [00:02<00:25,  2.07it/s][A
Epoch 1:  95%|█████████▍| 916/967 [04:38<00:27,  1.86it/s, loss=0.17, v_num=2, val_loss=0.494, val_acc=0.273, train_loss=0.131]
Validating:  11%|█         | 6/57 [00:03<00:18,  2.74it/s][A
Epoch 1:  95%|█████████▍| 918/967 [04:38<00:26,  1.86it/s, loss=0.17, v_num=2, val_loss=0.494, val_acc=0.273, train_loss=0.131]
Validating:  14%|█▍        | 8/57 [00:03<00:15,  3.11it/s][A
Epoch 1:  95%|█████████▌| 920/967 [04:39<00:25,  1.86it/s, loss=0.17, v_n

Epoch 1, global step 228: val_loss reached 0.11955 (best 0.11955), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/adabound_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.120_val_acc=0.867_fold1.ckpt" as top 1


Epoch 19: 100%|██████████| 967/967 [11:53<00:00,  1.26s/it, loss=0.102, v_num=1, val_loss=0.106, val_acc=0.887, train_loss=0.0594]

Finding best initial lr: 100%|██████████| 100/100 [06:55<00:00,  4.15s/it]







Epoch 1: 100%|██████████| 967/967 [04:53<00:00,  1.93it/s, loss=0.17, v_num=2, val_loss=0.12, val_acc=0.867, train_loss=0.0967]
Epoch 2:  94%|█████████▍| 910/967 [04:35<00:30,  1.86it/s, loss=0.176, v_num=2, val_loss=0.12, val_acc=0.867, train_loss=0.135]     
Validating: 0it [00:00, ?it/s][A
Epoch 2:  94%|█████████▍| 912/967 [04:36<00:29,  1.85it/s, loss=0.176, v_num=2, val_loss=0.12, val_acc=0.867, train_loss=0.135]
Validating:   4%|▎         | 2/57 [00:01<00:47,  1.15it/s][A
Epoch 2:  95%|█████████▍| 914/967 [04:37<00:28,  1.86it/s, loss=0.176, v_num=2, val_loss=0.12, val_acc=0.867, train_loss=0.135]
Validating:   7%|▋         | 4/57 [00:02<00:25,  2.07it/s][A
Epoch 2:  95%|█████████▍| 916/967 [04:37<00:27,  1.86it/s, loss=0.176, v_num=2, val_loss=0.12, val_acc=0.867, train_loss=0.135]
Validating:  11%|█         | 6/57 [00:03<00:18,  2.76it/s][A
Epoch 2:  95%|█████████▍| 918/967 [04:38<00:26,  1.86it/s, loss=0.176, v_num=2, val_loss=0.12, val_acc=0.867, train_loss=0.135]
Validat

Epoch 2, global step 456: val_loss reached 0.11550 (best 0.11550), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/adabound_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.115_val_acc=0.867_fold1.ckpt" as top 1


Epoch 2: 100%|██████████| 967/967 [04:53<00:00,  1.94it/s, loss=0.176, v_num=2, val_loss=0.115, val_acc=0.867, train_loss=0.582]
Epoch 3:  94%|█████████▍| 910/967 [04:35<00:30,  1.86it/s, loss=0.168, v_num=2, val_loss=0.115, val_acc=0.867, train_loss=0.247]     
Validating: 0it [00:00, ?it/s][A
Epoch 3:  94%|█████████▍| 912/967 [04:36<00:29,  1.85it/s, loss=0.168, v_num=2, val_loss=0.115, val_acc=0.867, train_loss=0.247]
Validating:   4%|▎         | 2/57 [00:01<00:44,  1.23it/s][A
Epoch 3:  95%|█████████▍| 914/967 [04:37<00:28,  1.86it/s, loss=0.168, v_num=2, val_loss=0.115, val_acc=0.867, train_loss=0.247]
Validating:   7%|▋         | 4/57 [00:02<00:23,  2.21it/s][A
Epoch 3:  95%|█████████▍| 916/967 [04:37<00:27,  1.86it/s, loss=0.168, v_num=2, val_loss=0.115, val_acc=0.867, train_loss=0.247]
Validating:  11%|█         | 6/57 [00:02<00:17,  2.84it/s][A
Epoch 3:  95%|█████████▍| 918/967 [04:38<00:26,  1.86it/s, loss=0.168, v_num=2, val_loss=0.115, val_acc=0.867, train_loss=0.247]
V

Epoch 3, global step 684: val_loss reached 0.11051 (best 0.11051), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/adabound_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.111_val_acc=0.875_fold1.ckpt" as top 1


Epoch 3: 100%|██████████| 967/967 [04:53<00:00,  1.93it/s, loss=0.168, v_num=2, val_loss=0.111, val_acc=0.875, train_loss=0.348]
Epoch 4:  94%|█████████▍| 910/967 [04:35<00:30,  1.86it/s, loss=0.166, v_num=2, val_loss=0.111, val_acc=0.875, train_loss=0.0711]    
Validating: 0it [00:00, ?it/s][A
Epoch 4:  94%|█████████▍| 912/967 [04:36<00:29,  1.85it/s, loss=0.166, v_num=2, val_loss=0.111, val_acc=0.875, train_loss=0.0711]
Validating:   4%|▎         | 2/57 [00:01<00:42,  1.29it/s][A
Epoch 4:  95%|█████████▍| 914/967 [04:37<00:28,  1.86it/s, loss=0.166, v_num=2, val_loss=0.111, val_acc=0.875, train_loss=0.0711]
Validating:   7%|▋         | 4/57 [00:02<00:24,  2.20it/s][A
Epoch 4:  95%|█████████▍| 916/967 [04:37<00:27,  1.86it/s, loss=0.166, v_num=2, val_loss=0.111, val_acc=0.875, train_loss=0.0711]
Validating:  11%|█         | 6/57 [00:02<00:18,  2.70it/s][A
Epoch 4:  95%|█████████▍| 918/967 [04:38<00:26,  1.86it/s, loss=0.166, v_num=2, val_loss=0.111, val_acc=0.875, train_loss=0.071

Epoch 4, global step 912: val_loss reached 0.11001 (best 0.11001), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/adabound_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.110_val_acc=0.877_fold1.ckpt" as top 1


Epoch 4: 100%|██████████| 967/967 [04:53<00:00,  1.94it/s, loss=0.166, v_num=2, val_loss=0.11, val_acc=0.877, train_loss=0.525]  
Epoch 5:  94%|█████████▍| 910/967 [04:35<00:30,  1.86it/s, loss=0.148, v_num=2, val_loss=0.11, val_acc=0.877, train_loss=0.065]     
Validating: 0it [00:00, ?it/s][A
Epoch 5:  94%|█████████▍| 912/967 [04:36<00:29,  1.85it/s, loss=0.148, v_num=2, val_loss=0.11, val_acc=0.877, train_loss=0.065]
Validating:   4%|▎         | 2/57 [00:01<00:42,  1.29it/s][A
Epoch 5:  95%|█████████▍| 914/967 [04:37<00:28,  1.86it/s, loss=0.148, v_num=2, val_loss=0.11, val_acc=0.877, train_loss=0.065]
Validating:   7%|▋         | 4/57 [00:02<00:23,  2.25it/s][A
Epoch 5:  95%|█████████▍| 916/967 [04:38<00:27,  1.86it/s, loss=0.148, v_num=2, val_loss=0.11, val_acc=0.877, train_loss=0.065]
Validating:  11%|█         | 6/57 [00:02<00:18,  2.74it/s][A
Epoch 5:  95%|█████████▍| 918/967 [04:38<00:26,  1.86it/s, loss=0.148, v_num=2, val_loss=0.11, val_acc=0.877, train_loss=0.065]
Valid

Epoch 5, global step 1140: val_loss reached 0.10592 (best 0.10592), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/adabound_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.106_val_acc=0.882_fold1.ckpt" as top 1


Epoch 5: 100%|██████████| 967/967 [04:53<00:00,  1.93it/s, loss=0.148, v_num=2, val_loss=0.106, val_acc=0.882, train_loss=0.124]
Epoch 6:  94%|█████████▍| 910/967 [04:34<00:30,  1.86it/s, loss=0.151, v_num=2, val_loss=0.106, val_acc=0.882, train_loss=0.206]     
Validating: 0it [00:00, ?it/s][A
Epoch 6:  94%|█████████▍| 912/967 [04:36<00:29,  1.86it/s, loss=0.151, v_num=2, val_loss=0.106, val_acc=0.882, train_loss=0.206]
Validating:   4%|▎         | 2/57 [00:01<00:42,  1.28it/s][A
Epoch 6:  95%|█████████▍| 914/967 [04:36<00:28,  1.86it/s, loss=0.151, v_num=2, val_loss=0.106, val_acc=0.882, train_loss=0.206]
Validating:   7%|▋         | 4/57 [00:02<00:23,  2.27it/s][A
Epoch 6:  95%|█████████▍| 916/967 [04:37<00:27,  1.86it/s, loss=0.151, v_num=2, val_loss=0.106, val_acc=0.882, train_loss=0.206]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.65it/s][A
Epoch 6:  95%|█████████▍| 918/967 [04:38<00:26,  1.87it/s, loss=0.151, v_num=2, val_loss=0.106, val_acc=0.882, train_loss=0.206]
V

Epoch 6, step 1368: val_loss was not in top 1


Epoch 6: 100%|██████████| 967/967 [04:53<00:00,  1.94it/s, loss=0.151, v_num=2, val_loss=0.111, val_acc=0.877, train_loss=0.0472]
Epoch 7:  94%|█████████▍| 910/967 [04:35<00:30,  1.86it/s, loss=0.16, v_num=2, val_loss=0.111, val_acc=0.877, train_loss=0.341]      
Validating: 0it [00:00, ?it/s][A
Epoch 7:  94%|█████████▍| 912/967 [04:36<00:29,  1.85it/s, loss=0.16, v_num=2, val_loss=0.111, val_acc=0.877, train_loss=0.341]
Validating:   4%|▎         | 2/57 [00:01<00:45,  1.21it/s][A
Epoch 7:  95%|█████████▍| 914/967 [04:37<00:28,  1.86it/s, loss=0.16, v_num=2, val_loss=0.111, val_acc=0.877, train_loss=0.341]
Validating:   7%|▋         | 4/57 [00:02<00:24,  2.18it/s][A
Epoch 7:  95%|█████████▍| 916/967 [04:38<00:27,  1.86it/s, loss=0.16, v_num=2, val_loss=0.111, val_acc=0.877, train_loss=0.341]
Validating:  11%|█         | 6/57 [00:03<00:18,  2.81it/s][A
Epoch 7:  95%|█████████▍| 918/967 [04:38<00:26,  1.86it/s, loss=0.16, v_num=2, val_loss=0.111, val_acc=0.877, train_loss=0.341]
Vali

Epoch 7, step 1596: val_loss was not in top 1


Epoch 7: 100%|██████████| 967/967 [04:53<00:00,  1.93it/s, loss=0.16, v_num=2, val_loss=0.117, val_acc=0.872, train_loss=0.222]
Epoch 8:  94%|█████████▍| 910/967 [04:35<00:30,  1.86it/s, loss=0.141, v_num=2, val_loss=0.117, val_acc=0.872, train_loss=0.173]     
Validating: 0it [00:00, ?it/s][A
Epoch 8:  94%|█████████▍| 912/967 [04:36<00:29,  1.85it/s, loss=0.141, v_num=2, val_loss=0.117, val_acc=0.872, train_loss=0.173]
Validating:   4%|▎         | 2/57 [00:02<00:49,  1.12it/s][A
Epoch 8:  95%|█████████▍| 914/967 [04:37<00:28,  1.86it/s, loss=0.141, v_num=2, val_loss=0.117, val_acc=0.872, train_loss=0.173]
Validating:   7%|▋         | 4/57 [00:02<00:26,  2.01it/s][A
Epoch 8:  95%|█████████▍| 916/967 [04:38<00:27,  1.86it/s, loss=0.141, v_num=2, val_loss=0.117, val_acc=0.872, train_loss=0.173]
Validating:  11%|█         | 6/57 [00:03<00:18,  2.71it/s][A
Epoch 8:  95%|█████████▍| 918/967 [04:38<00:26,  1.86it/s, loss=0.141, v_num=2, val_loss=0.117, val_acc=0.872, train_loss=0.173]
Va

Epoch 8, step 1824: val_loss was not in top 1


Epoch 8: 100%|██████████| 967/967 [04:52<00:00,  1.94it/s, loss=0.141, v_num=2, val_loss=0.108, val_acc=0.883, train_loss=0.0927]
Epoch 9:  94%|█████████▍| 910/967 [04:35<00:30,  1.86it/s, loss=0.135, v_num=2, val_loss=0.108, val_acc=0.883, train_loss=0.111]     
Validating: 0it [00:00, ?it/s][A
Epoch 9:  94%|█████████▍| 912/967 [04:36<00:29,  1.85it/s, loss=0.135, v_num=2, val_loss=0.108, val_acc=0.883, train_loss=0.111]
Validating:   4%|▎         | 2/57 [00:01<00:42,  1.30it/s][A
Epoch 9:  95%|█████████▍| 914/967 [04:37<00:28,  1.86it/s, loss=0.135, v_num=2, val_loss=0.108, val_acc=0.883, train_loss=0.111]
Validating:   7%|▋         | 4/57 [00:02<00:23,  2.29it/s][A
Epoch 9:  95%|█████████▍| 916/967 [04:37<00:27,  1.86it/s, loss=0.135, v_num=2, val_loss=0.108, val_acc=0.883, train_loss=0.111]
Validating:  11%|█         | 6/57 [00:02<00:18,  2.83it/s][A
Epoch 9:  95%|█████████▍| 918/967 [04:38<00:26,  1.86it/s, loss=0.135, v_num=2, val_loss=0.108, val_acc=0.883, train_loss=0.111]


Epoch 9, step 2052: val_loss was not in top 1


Epoch 9: 100%|██████████| 967/967 [04:53<00:00,  1.94it/s, loss=0.135, v_num=2, val_loss=0.113, val_acc=0.874, train_loss=0.0187]
Epoch 10:  94%|█████████▍| 910/967 [04:35<00:30,  1.86it/s, loss=0.141, v_num=2, val_loss=0.113, val_acc=0.874, train_loss=0.061]     
Validating: 0it [00:00, ?it/s][A
Epoch 10:  94%|█████████▍| 912/967 [04:36<00:29,  1.85it/s, loss=0.141, v_num=2, val_loss=0.113, val_acc=0.874, train_loss=0.061]
Validating:   4%|▎         | 2/57 [00:01<00:42,  1.29it/s][A
Epoch 10:  95%|█████████▍| 914/967 [04:37<00:28,  1.86it/s, loss=0.141, v_num=2, val_loss=0.113, val_acc=0.874, train_loss=0.061]
Validating:   7%|▋         | 4/57 [00:02<00:23,  2.27it/s][A
Epoch 10:  95%|█████████▍| 916/967 [04:37<00:27,  1.86it/s, loss=0.141, v_num=2, val_loss=0.113, val_acc=0.874, train_loss=0.061]
Validating:  11%|█         | 6/57 [00:03<00:21,  2.41it/s][A
Epoch 10:  95%|█████████▍| 918/967 [04:38<00:26,  1.86it/s, loss=0.141, v_num=2, val_loss=0.113, val_acc=0.874, train_loss=0.

Epoch 10, step 2280: val_loss was not in top 1


Epoch 10: 100%|██████████| 967/967 [04:53<00:00,  1.94it/s, loss=0.141, v_num=2, val_loss=0.115, val_acc=0.873, train_loss=0.294]
Epoch 11:  94%|█████████▍| 910/967 [04:34<00:30,  1.86it/s, loss=0.153, v_num=2, val_loss=0.115, val_acc=0.873, train_loss=0.23]      
Validating: 0it [00:00, ?it/s][A
Epoch 11:  94%|█████████▍| 912/967 [04:36<00:29,  1.86it/s, loss=0.153, v_num=2, val_loss=0.115, val_acc=0.873, train_loss=0.23]
Validating:   4%|▎         | 2/57 [00:01<00:42,  1.29it/s][A
Epoch 11:  95%|█████████▍| 914/967 [04:36<00:28,  1.86it/s, loss=0.153, v_num=2, val_loss=0.115, val_acc=0.873, train_loss=0.23]
Validating:   7%|▋         | 4/57 [00:02<00:23,  2.27it/s][A
Epoch 11:  95%|█████████▍| 916/967 [04:37<00:27,  1.86it/s, loss=0.153, v_num=2, val_loss=0.115, val_acc=0.873, train_loss=0.23]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.59it/s][A
Epoch 11:  95%|█████████▍| 918/967 [04:38<00:26,  1.87it/s, loss=0.153, v_num=2, val_loss=0.115, val_acc=0.873, train_loss=0.23]

Epoch 11, step 2508: val_loss was not in top 1


Epoch 11: 100%|██████████| 967/967 [04:53<00:00,  1.94it/s, loss=0.153, v_num=2, val_loss=0.107, val_acc=0.882, train_loss=0.345]
                                                           [ATraining fold 2
Class sample counts [ 759 1471 1622 8934 1764]
After class sample counts [2277 2942 3730 8934 4762]


GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.

  | Name           | Type           | Params
--------------------------------------------------
0 | valid_accuracy | Accuracy       | 0     
1 | test_accuracy  | Accuracy       | 0     
2 | criterion      | BiTemperedLoss | 0     
3 | model          | EfficientNet   | 17.6 M
--------------------------------------------------
17.4 M    Trainable params
125 K     Non-trainable params
17.6 M    Total params

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s][A
Finding best initial lr:   1%|          | 1/100 [00:00<00:33,  2.97it/s][A
Finding best initial lr:   2%|▏         | 2/100 [00:01<01:21,  1.20it/s][A
Finding best initial lr:   3%|▎         | 3/100 [00:02<01:36,  1.00it/s][A
Finding best initial lr:   4%|▍         | 4/100 [00:03<01:43,  1.08s/it][A
Finding best initial lr:   5%|▌         | 5/100 [00:05<01:46,  1.12s/it][

Epoch 11: 100%|██████████| 967/967 [05:12<00:00,  1.82it/s, loss=0.153, v_num=2, val_loss=0.107, val_acc=0.882, train_loss=0.345]


Finding best initial lr:  13%|█▎        | 13/100 [00:14<01:43,  1.19s/it][A
Finding best initial lr:  14%|█▍        | 14/100 [00:15<01:42,  1.19s/it][A
Finding best initial lr:  15%|█▌        | 15/100 [00:17<01:41,  1.19s/it][A
Finding best initial lr:  16%|█▌        | 16/100 [00:18<01:40,  1.19s/it][A
Finding best initial lr:  17%|█▋        | 17/100 [00:19<01:38,  1.19s/it][A
Finding best initial lr:  18%|█▊        | 18/100 [00:20<01:37,  1.19s/it][A
Finding best initial lr:  19%|█▉        | 19/100 [00:21<01:36,  1.19s/it][A
Finding best initial lr:  20%|██        | 20/100 [00:23<01:35,  1.20s/it][A
Finding best initial lr:  21%|██        | 21/100 [00:24<01:35,  1.21s/it][A
Finding best initial lr:  22%|██▏       | 22/100 [00:25<01:33,  1.20s/it][A
Finding best initial lr:  23%|██▎       | 23/100 [00:26<01:32,  1.20s/it][A
Finding best initial lr:  24%|██▍       | 24/100 [00:27<01:31,  1.20s/it][A
Finding best initial lr:  25%|██▌       | 25/100 [00:29<01:29,  1.20s/it][

Epoch 1:  94%|█████████▍| 910/967 [04:37<00:30,  1.84it/s, loss=0.188, v_num=3, val_loss=0.494, val_acc=0.195, train_loss=0.17]     
Validating: 0it [00:00, ?it/s][A
Epoch 1:  94%|█████████▍| 912/967 [04:39<00:29,  1.84it/s, loss=0.188, v_num=3, val_loss=0.494, val_acc=0.195, train_loss=0.17]
Validating:   4%|▎         | 2/57 [00:01<00:46,  1.19it/s][A
Epoch 1:  95%|█████████▍| 914/967 [04:39<00:28,  1.84it/s, loss=0.188, v_num=3, val_loss=0.494, val_acc=0.195, train_loss=0.17]
Validating:   7%|▋         | 4/57 [00:02<00:24,  2.15it/s][A
Epoch 1:  95%|█████████▍| 916/967 [04:40<00:27,  1.84it/s, loss=0.188, v_num=3, val_loss=0.494, val_acc=0.195, train_loss=0.17]
Validating:  11%|█         | 6/57 [00:03<00:18,  2.79it/s][A
Epoch 1:  95%|█████████▍| 918/967 [04:41<00:26,  1.85it/s, loss=0.188, v_num=3, val_loss=0.494, val_acc=0.195, train_loss=0.17]
Validating:  14%|█▍        | 8/57 [00:03<00:15,  3.16it/s][A
Epoch 1:  95%|█████████▌| 920/967 [04:41<00:25,  1.85it/s, loss=0.188, v_

Epoch 1, global step 228: val_loss reached 0.11940 (best 0.11940), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/adabound_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.119_val_acc=0.865_fold2.ckpt" as top 1


Epoch 1: 100%|██████████| 967/967 [04:58<00:00,  1.90it/s, loss=0.188, v_num=3, val_loss=0.119, val_acc=0.865, train_loss=0.119]
Epoch 2:  94%|█████████▍| 910/967 [04:36<00:30,  1.85it/s, loss=0.163, v_num=3, val_loss=0.119, val_acc=0.865, train_loss=0.0934]    
Validating: 0it [00:00, ?it/s][A
Epoch 2:  94%|█████████▍| 912/967 [04:37<00:29,  1.85it/s, loss=0.163, v_num=3, val_loss=0.119, val_acc=0.865, train_loss=0.0934]
Validating:   4%|▎         | 2/57 [00:01<00:47,  1.16it/s][A
Epoch 2:  95%|█████████▍| 914/967 [04:38<00:28,  1.85it/s, loss=0.163, v_num=3, val_loss=0.119, val_acc=0.865, train_loss=0.0934]
Validating:   7%|▋         | 4/57 [00:02<00:25,  2.09it/s][A
Epoch 2:  95%|█████████▍| 916/967 [04:38<00:27,  1.85it/s, loss=0.163, v_num=3, val_loss=0.119, val_acc=0.865, train_loss=0.0934]
Validating:  11%|█         | 6/57 [00:03<00:18,  2.77it/s][A
Epoch 2:  95%|█████████▍| 918/967 [04:39<00:26,  1.86it/s, loss=0.163, v_num=3, val_loss=0.119, val_acc=0.865, train_loss=0.093

Epoch 2, global step 456: val_loss reached 0.11380 (best 0.11380), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/adabound_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.114_val_acc=0.874_fold2.ckpt" as top 1


Epoch 2: 100%|██████████| 967/967 [04:54<00:00,  1.93it/s, loss=0.163, v_num=3, val_loss=0.114, val_acc=0.874, train_loss=0.207] 
Epoch 3:  94%|█████████▍| 910/967 [04:35<00:30,  1.86it/s, loss=0.157, v_num=3, val_loss=0.114, val_acc=0.874, train_loss=0.157]    
Validating: 0it [00:00, ?it/s][A
Epoch 3:  94%|█████████▍| 912/967 [04:36<00:29,  1.85it/s, loss=0.157, v_num=3, val_loss=0.114, val_acc=0.874, train_loss=0.157]
Validating:   4%|▎         | 2/57 [00:01<00:44,  1.24it/s][A
Epoch 3:  95%|█████████▍| 914/967 [04:37<00:28,  1.86it/s, loss=0.157, v_num=3, val_loss=0.114, val_acc=0.874, train_loss=0.157]
Validating:   7%|▋         | 4/57 [00:02<00:24,  2.14it/s][A
Epoch 3:  95%|█████████▍| 916/967 [04:38<00:27,  1.86it/s, loss=0.157, v_num=3, val_loss=0.114, val_acc=0.874, train_loss=0.157]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.60it/s][A
Epoch 3:  95%|█████████▍| 918/967 [04:38<00:26,  1.86it/s, loss=0.157, v_num=3, val_loss=0.114, val_acc=0.874, train_loss=0.157]
V

Epoch 3, global step 684: val_loss reached 0.11378 (best 0.11378), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/adabound_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.114_val_acc=0.873_fold2.ckpt" as top 1


Epoch 11: 100%|██████████| 967/967 [21:48<00:00,  2.30s/it, loss=0.153, v_num=2, val_loss=0.107, val_acc=0.882, train_loss=0.345]

Finding best initial lr: 100%|██████████| 100/100 [16:49<00:00, 10.09s/it]







Epoch 3: 100%|██████████| 967/967 [04:54<00:00,  1.93it/s, loss=0.157, v_num=3, val_loss=0.114, val_acc=0.873, train_loss=0.303]
Epoch 4:  94%|█████████▍| 910/967 [04:35<00:30,  1.86it/s, loss=0.162, v_num=3, val_loss=0.114, val_acc=0.873, train_loss=0.133]     
Validating: 0it [00:00, ?it/s][A
Epoch 4:  94%|█████████▍| 912/967 [04:37<00:29,  1.85it/s, loss=0.162, v_num=3, val_loss=0.114, val_acc=0.873, train_loss=0.133]
Validating:   4%|▎         | 2/57 [00:01<00:46,  1.18it/s][A
Epoch 4:  95%|█████████▍| 914/967 [04:37<00:28,  1.86it/s, loss=0.162, v_num=3, val_loss=0.114, val_acc=0.873, train_loss=0.133]
Validating:   7%|▋         | 4/57 [00:02<00:24,  2.12it/s][A
Epoch 4:  95%|█████████▍| 916/967 [04:38<00:27,  1.86it/s, loss=0.162, v_num=3, val_loss=0.114, val_acc=0.873, train_loss=0.133]
Validating:  11%|█         | 6/57 [00:03<00:18,  2.77it/s][A
Epoch 4:  95%|█████████▍| 918/967 [04:38<00:26,  1.86it/s, loss=0.162, v_num=3, val_loss=0.114, val_acc=0.873, train_loss=0.133]
V

Epoch 4, step 912: val_loss was not in top 1


Epoch 4: 100%|██████████| 967/967 [04:52<00:00,  1.94it/s, loss=0.162, v_num=3, val_loss=0.115, val_acc=0.87, train_loss=0.21]  
Epoch 5:  94%|█████████▍| 910/967 [04:35<00:30,  1.85it/s, loss=0.16, v_num=3, val_loss=0.115, val_acc=0.87, train_loss=0.209]     
Validating: 0it [00:00, ?it/s][A
Epoch 5:  94%|█████████▍| 912/967 [04:37<00:29,  1.85it/s, loss=0.16, v_num=3, val_loss=0.115, val_acc=0.87, train_loss=0.209]
Validating:   4%|▎         | 2/57 [00:01<00:44,  1.24it/s][A
Epoch 5:  95%|█████████▍| 914/967 [04:37<00:28,  1.85it/s, loss=0.16, v_num=3, val_loss=0.115, val_acc=0.87, train_loss=0.209]
Validating:   7%|▋         | 4/57 [00:02<00:23,  2.22it/s][A
Epoch 5:  95%|█████████▍| 916/967 [04:38<00:27,  1.86it/s, loss=0.16, v_num=3, val_loss=0.115, val_acc=0.87, train_loss=0.209]
Validating:  11%|█         | 6/57 [00:03<00:18,  2.70it/s][A
Epoch 5:  95%|█████████▍| 918/967 [04:38<00:26,  1.86it/s, loss=0.16, v_num=3, val_loss=0.115, val_acc=0.87, train_loss=0.209]
Validating:

Epoch 5, step 1140: val_loss was not in top 1


Epoch 5: 100%|██████████| 967/967 [04:53<00:00,  1.93it/s, loss=0.16, v_num=3, val_loss=0.114, val_acc=0.874, train_loss=0.0667]
Epoch 6:  94%|█████████▍| 910/967 [04:35<00:30,  1.85it/s, loss=0.15, v_num=3, val_loss=0.114, val_acc=0.874, train_loss=0.0975]     
Validating: 0it [00:00, ?it/s][A
Epoch 6:  94%|█████████▍| 912/967 [04:37<00:29,  1.85it/s, loss=0.15, v_num=3, val_loss=0.114, val_acc=0.874, train_loss=0.0975]
Validating:   4%|▎         | 2/57 [00:01<00:46,  1.19it/s][A
Epoch 6:  95%|█████████▍| 914/967 [04:37<00:28,  1.85it/s, loss=0.15, v_num=3, val_loss=0.114, val_acc=0.874, train_loss=0.0975]
Validating:   7%|▋         | 4/57 [00:02<00:24,  2.15it/s][A
Epoch 6:  95%|█████████▍| 916/967 [04:38<00:27,  1.86it/s, loss=0.15, v_num=3, val_loss=0.114, val_acc=0.874, train_loss=0.0975]
Validating:  11%|█         | 6/57 [00:03<00:18,  2.73it/s][A
Epoch 6:  95%|█████████▍| 918/967 [04:38<00:26,  1.86it/s, loss=0.15, v_num=3, val_loss=0.114, val_acc=0.874, train_loss=0.0975]
V

Epoch 6, global step 1368: val_loss reached 0.10629 (best 0.10629), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/adabound_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.106_val_acc=0.881_fold2.ckpt" as top 1


Epoch 6: 100%|██████████| 967/967 [04:53<00:00,  1.93it/s, loss=0.15, v_num=3, val_loss=0.106, val_acc=0.881, train_loss=0.0952]
Epoch 7:  94%|█████████▍| 910/967 [04:36<00:30,  1.85it/s, loss=0.153, v_num=3, val_loss=0.106, val_acc=0.881, train_loss=0.163]     
Validating: 0it [00:00, ?it/s][A
Epoch 7:  94%|█████████▍| 912/967 [04:37<00:29,  1.85it/s, loss=0.153, v_num=3, val_loss=0.106, val_acc=0.881, train_loss=0.163]
Validating:   4%|▎         | 2/57 [00:01<00:43,  1.26it/s][A
Epoch 7:  95%|█████████▍| 914/967 [04:38<00:28,  1.85it/s, loss=0.153, v_num=3, val_loss=0.106, val_acc=0.881, train_loss=0.163]
Validating:   7%|▋         | 4/57 [00:02<00:24,  2.21it/s][A
Epoch 7:  95%|█████████▍| 916/967 [04:38<00:27,  1.85it/s, loss=0.153, v_num=3, val_loss=0.106, val_acc=0.881, train_loss=0.163]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.56it/s][A
Epoch 7:  95%|█████████▍| 918/967 [04:39<00:26,  1.86it/s, loss=0.153, v_num=3, val_loss=0.106, val_acc=0.881, train_loss=0.163]
V

Epoch 7, global step 1596: val_loss reached 0.10359 (best 0.10359), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/adabound_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.104_val_acc=0.884_fold2.ckpt" as top 1


Epoch 7: 100%|██████████| 967/967 [04:55<00:00,  1.93it/s, loss=0.153, v_num=3, val_loss=0.104, val_acc=0.884, train_loss=0.0611]
Epoch 8:  94%|█████████▍| 910/967 [04:35<00:30,  1.85it/s, loss=0.15, v_num=3, val_loss=0.104, val_acc=0.884, train_loss=0.148]       
Validating: 0it [00:00, ?it/s][A
Epoch 8:  94%|█████████▍| 912/967 [04:37<00:29,  1.85it/s, loss=0.15, v_num=3, val_loss=0.104, val_acc=0.884, train_loss=0.148]
Validating:   4%|▎         | 2/57 [00:01<00:46,  1.18it/s][A
Epoch 8:  95%|█████████▍| 914/967 [04:37<00:28,  1.85it/s, loss=0.15, v_num=3, val_loss=0.104, val_acc=0.884, train_loss=0.148]
Validating:   7%|▋         | 4/57 [00:02<00:24,  2.13it/s][A
Epoch 8:  95%|█████████▍| 916/967 [04:38<00:27,  1.86it/s, loss=0.15, v_num=3, val_loss=0.104, val_acc=0.884, train_loss=0.148]
Validating:  11%|█         | 6/57 [00:03<00:18,  2.78it/s][A
Epoch 8:  95%|█████████▍| 918/967 [04:39<00:26,  1.86it/s, loss=0.15, v_num=3, val_loss=0.104, val_acc=0.884, train_loss=0.148]
Val

Epoch 8, step 1824: val_loss was not in top 1


Epoch 8: 100%|██████████| 967/967 [04:53<00:00,  1.93it/s, loss=0.15, v_num=3, val_loss=0.106, val_acc=0.881, train_loss=0.0816]
Epoch 9:  94%|█████████▍| 910/967 [04:35<00:30,  1.85it/s, loss=0.136, v_num=3, val_loss=0.106, val_acc=0.881, train_loss=0.193]     
Validating: 0it [00:00, ?it/s][A
Epoch 9:  94%|█████████▍| 912/967 [04:37<00:29,  1.85it/s, loss=0.136, v_num=3, val_loss=0.106, val_acc=0.881, train_loss=0.193]
Validating:   4%|▎         | 2/57 [00:01<00:46,  1.19it/s][A
Epoch 9:  95%|█████████▍| 914/967 [04:37<00:28,  1.85it/s, loss=0.136, v_num=3, val_loss=0.106, val_acc=0.881, train_loss=0.193]
Validating:   7%|▋         | 4/57 [00:02<00:25,  2.12it/s][A
Epoch 9:  95%|█████████▍| 916/967 [04:38<00:27,  1.86it/s, loss=0.136, v_num=3, val_loss=0.106, val_acc=0.881, train_loss=0.193]
Validating:  11%|█         | 6/57 [00:03<00:18,  2.77it/s][A
Epoch 9:  95%|█████████▍| 918/967 [04:38<00:26,  1.86it/s, loss=0.136, v_num=3, val_loss=0.106, val_acc=0.881, train_loss=0.193]
V

Epoch 9, step 2052: val_loss was not in top 1


Epoch 9: 100%|██████████| 967/967 [04:53<00:00,  1.93it/s, loss=0.136, v_num=3, val_loss=0.106, val_acc=0.884, train_loss=0.138]
Epoch 10:  94%|█████████▍| 910/967 [04:35<00:30,  1.86it/s, loss=0.143, v_num=3, val_loss=0.106, val_acc=0.884, train_loss=0.31]      
Validating: 0it [00:00, ?it/s][A
Epoch 10:  94%|█████████▍| 912/967 [04:36<00:29,  1.85it/s, loss=0.143, v_num=3, val_loss=0.106, val_acc=0.884, train_loss=0.31]
Validating:   4%|▎         | 2/57 [00:01<00:47,  1.17it/s][A
Epoch 10:  95%|█████████▍| 914/967 [04:37<00:28,  1.86it/s, loss=0.143, v_num=3, val_loss=0.106, val_acc=0.884, train_loss=0.31]
Validating:   7%|▋         | 4/57 [00:02<00:25,  2.07it/s][A
Epoch 10:  95%|█████████▍| 916/967 [04:38<00:27,  1.86it/s, loss=0.143, v_num=3, val_loss=0.106, val_acc=0.884, train_loss=0.31]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.61it/s][A
Epoch 10:  95%|█████████▍| 918/967 [04:38<00:26,  1.86it/s, loss=0.143, v_num=3, val_loss=0.106, val_acc=0.884, train_loss=0.31]


Epoch 10, step 2280: val_loss was not in top 1


Epoch 10: 100%|██████████| 967/967 [04:53<00:00,  1.94it/s, loss=0.143, v_num=3, val_loss=0.107, val_acc=0.883, train_loss=0.0732]
Epoch 11:  94%|█████████▍| 910/967 [04:34<00:30,  1.86it/s, loss=0.137, v_num=3, val_loss=0.107, val_acc=0.883, train_loss=0.14]      
Validating: 0it [00:00, ?it/s][A
Epoch 11:  94%|█████████▍| 912/967 [04:36<00:29,  1.85it/s, loss=0.137, v_num=3, val_loss=0.107, val_acc=0.883, train_loss=0.14]
Validating:   4%|▎         | 2/57 [00:01<00:47,  1.15it/s][A
Epoch 11:  95%|█████████▍| 914/967 [04:37<00:28,  1.86it/s, loss=0.137, v_num=3, val_loss=0.107, val_acc=0.883, train_loss=0.14]
Validating:   7%|▋         | 4/57 [00:02<00:25,  2.12it/s][A
Epoch 11:  95%|█████████▍| 916/967 [04:38<00:27,  1.86it/s, loss=0.137, v_num=3, val_loss=0.107, val_acc=0.883, train_loss=0.14]
Validating:  11%|█         | 6/57 [00:03<00:21,  2.32it/s][A
Epoch 11:  95%|█████████▍| 918/967 [04:38<00:26,  1.86it/s, loss=0.137, v_num=3, val_loss=0.107, val_acc=0.883, train_loss=0.14

Epoch 11, step 2508: val_loss was not in top 1


Epoch 11: 100%|██████████| 967/967 [04:53<00:00,  1.94it/s, loss=0.137, v_num=3, val_loss=0.108, val_acc=0.879, train_loss=0.0995]
Epoch 12:  94%|█████████▍| 910/967 [04:35<00:30,  1.85it/s, loss=0.131, v_num=3, val_loss=0.108, val_acc=0.879, train_loss=0.187]     
Validating: 0it [00:00, ?it/s][A
Epoch 12:  94%|█████████▍| 912/967 [04:37<00:29,  1.85it/s, loss=0.131, v_num=3, val_loss=0.108, val_acc=0.879, train_loss=0.187]
Validating:   4%|▎         | 2/57 [00:01<00:47,  1.16it/s][A
Epoch 12:  95%|█████████▍| 914/967 [04:37<00:28,  1.85it/s, loss=0.131, v_num=3, val_loss=0.108, val_acc=0.879, train_loss=0.187]
Validating:   7%|▋         | 4/57 [00:02<00:25,  2.12it/s][A
Epoch 12:  95%|█████████▍| 916/967 [04:38<00:27,  1.86it/s, loss=0.131, v_num=3, val_loss=0.108, val_acc=0.879, train_loss=0.187]
Validating:  11%|█         | 6/57 [00:03<00:18,  2.77it/s][A
Epoch 12:  95%|█████████▍| 918/967 [04:38<00:26,  1.86it/s, loss=0.131, v_num=3, val_loss=0.108, val_acc=0.879, train_loss=0

Epoch 12, step 2736: val_loss was not in top 1


Epoch 12: 100%|██████████| 967/967 [04:53<00:00,  1.93it/s, loss=0.131, v_num=3, val_loss=0.106, val_acc=0.885, train_loss=0.309]
Epoch 13:  94%|█████████▍| 910/967 [04:35<00:30,  1.86it/s, loss=0.133, v_num=3, val_loss=0.106, val_acc=0.885, train_loss=0.124]     
Validating: 0it [00:00, ?it/s][A
Epoch 13:  94%|█████████▍| 912/967 [04:37<00:29,  1.85it/s, loss=0.133, v_num=3, val_loss=0.106, val_acc=0.885, train_loss=0.124]
Validating:   4%|▎         | 2/57 [00:01<00:45,  1.20it/s][A
Epoch 13:  95%|█████████▍| 914/967 [04:37<00:28,  1.86it/s, loss=0.133, v_num=3, val_loss=0.106, val_acc=0.885, train_loss=0.124]
Validating:   7%|▋         | 4/57 [00:02<00:25,  2.05it/s][A
Epoch 13:  95%|█████████▍| 916/967 [04:38<00:27,  1.86it/s, loss=0.133, v_num=3, val_loss=0.106, val_acc=0.885, train_loss=0.124]
Validating:  11%|█         | 6/57 [00:03<00:18,  2.72it/s][A
Epoch 13:  95%|█████████▍| 918/967 [04:38<00:26,  1.86it/s, loss=0.133, v_num=3, val_loss=0.106, val_acc=0.885, train_loss=0.

Epoch 13, step 2964: val_loss was not in top 1


Epoch 13: 100%|██████████| 967/967 [04:53<00:00,  1.93it/s, loss=0.133, v_num=3, val_loss=0.11, val_acc=0.881, train_loss=0.128] 
                                                           [ATraining fold 3
Class sample counts [ 759 1470 1622 8934 1765]
After class sample counts [2277 2940 3730 8934 4765]


GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.

  | Name           | Type           | Params
--------------------------------------------------
0 | valid_accuracy | Accuracy       | 0     
1 | test_accuracy  | Accuracy       | 0     
2 | criterion      | BiTemperedLoss | 0     
3 | model          | EfficientNet   | 17.6 M
--------------------------------------------------
17.4 M    Trainable params
125 K     Non-trainable params
17.6 M    Total params

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s][A
Finding best initial lr:   1%|          | 1/100 [00:00<00:33,  2.92it/s][A
Finding best initial lr:   2%|▏         | 2/100 [00:01<01:22,  1.19it/s][A
Finding best initial lr:   3%|▎         | 3/100 [00:02<01:37,  1.01s/it][A
Finding best initial lr:   4%|▍         | 4/100 [00:03<01:43,  1.08s/it][A
Finding best initial lr:   5%|▌         | 5/100 [00:05<01:46,  1.12s/it][

Epoch 13: 100%|██████████| 967/967 [05:10<00:00,  1.83it/s, loss=0.133, v_num=3, val_loss=0.11, val_acc=0.881, train_loss=0.128]


Finding best initial lr:  10%|█         | 10/100 [00:11<01:46,  1.19s/it][A
Finding best initial lr:  11%|█         | 11/100 [00:12<01:45,  1.19s/it][A
Finding best initial lr:  12%|█▏        | 12/100 [00:13<01:44,  1.19s/it][A
Finding best initial lr:  13%|█▎        | 13/100 [00:14<01:43,  1.19s/it][A
Finding best initial lr:  14%|█▍        | 14/100 [00:15<01:42,  1.19s/it][A
Finding best initial lr:  15%|█▌        | 15/100 [00:17<01:41,  1.19s/it][A
Finding best initial lr:  16%|█▌        | 16/100 [00:18<01:40,  1.19s/it][A
Finding best initial lr:  17%|█▋        | 17/100 [00:19<01:39,  1.19s/it][A
Finding best initial lr:  18%|█▊        | 18/100 [00:20<01:38,  1.20s/it][A
Finding best initial lr:  19%|█▉        | 19/100 [00:21<01:36,  1.19s/it][A
Finding best initial lr:  20%|██        | 20/100 [00:23<01:36,  1.20s/it][A
Finding best initial lr:  21%|██        | 21/100 [00:24<01:34,  1.20s/it][A
Finding best initial lr:  22%|██▏       | 22/100 [00:25<01:33,  1.20s/it][

Epoch 1:  94%|█████████▍| 910/967 [04:35<00:30,  1.86it/s, loss=0.177, v_num=4, val_loss=0.518, val_acc=0.102, train_loss=0.214]    
Validating: 0it [00:00, ?it/s][A
Epoch 1:  94%|█████████▍| 912/967 [04:36<00:29,  1.85it/s, loss=0.177, v_num=4, val_loss=0.518, val_acc=0.102, train_loss=0.214]
Validating:   4%|▎         | 2/57 [00:01<00:45,  1.21it/s][A
Epoch 1:  95%|█████████▍| 914/967 [04:37<00:28,  1.86it/s, loss=0.177, v_num=4, val_loss=0.518, val_acc=0.102, train_loss=0.214]
Validating:   7%|▋         | 4/57 [00:02<00:24,  2.15it/s][A
Epoch 1:  95%|█████████▍| 916/967 [04:38<00:27,  1.86it/s, loss=0.177, v_num=4, val_loss=0.518, val_acc=0.102, train_loss=0.214]
Validating:  11%|█         | 6/57 [00:03<00:18,  2.81it/s][A
Epoch 1:  95%|█████████▍| 918/967 [04:38<00:26,  1.86it/s, loss=0.177, v_num=4, val_loss=0.518, val_acc=0.102, train_loss=0.214]
Validating:  14%|█▍        | 8/57 [00:03<00:15,  3.14it/s][A
Epoch 1:  95%|█████████▌| 920/967 [04:39<00:25,  1.87it/s, loss=0.177

Epoch 1, global step 228: val_loss reached 0.12930 (best 0.12930), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/adabound_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.129_val_acc=0.852_fold3.ckpt" as top 1


Epoch 1: 100%|██████████| 967/967 [04:53<00:00,  1.93it/s, loss=0.177, v_num=4, val_loss=0.129, val_acc=0.852, train_loss=0.164]
Epoch 2:  94%|█████████▍| 910/967 [04:35<00:30,  1.85it/s, loss=0.183, v_num=4, val_loss=0.129, val_acc=0.852, train_loss=0.113]     
Validating: 0it [00:00, ?it/s][A
Epoch 2:  94%|█████████▍| 912/967 [04:37<00:29,  1.85it/s, loss=0.183, v_num=4, val_loss=0.129, val_acc=0.852, train_loss=0.113]
Validating:   4%|▎         | 2/57 [00:02<00:50,  1.10it/s][A
Epoch 2:  95%|█████████▍| 914/967 [04:38<00:28,  1.85it/s, loss=0.183, v_num=4, val_loss=0.129, val_acc=0.852, train_loss=0.113]
Validating:   7%|▋         | 4/57 [00:02<00:25,  2.06it/s][A
Epoch 2:  95%|█████████▍| 916/967 [04:38<00:27,  1.86it/s, loss=0.183, v_num=4, val_loss=0.129, val_acc=0.852, train_loss=0.113]
Validating:  11%|█         | 6/57 [00:03<00:18,  2.75it/s][A
Epoch 2:  95%|█████████▍| 918/967 [04:39<00:26,  1.86it/s, loss=0.183, v_num=4, val_loss=0.129, val_acc=0.852, train_loss=0.113]
V

Epoch 2, global step 456: val_loss reached 0.12203 (best 0.12203), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/adabound_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.122_val_acc=0.861_fold3.ckpt" as top 1


Epoch 2: 100%|██████████| 967/967 [04:54<00:00,  1.93it/s, loss=0.183, v_num=4, val_loss=0.122, val_acc=0.861, train_loss=0.0971]
Epoch 3:  94%|█████████▍| 910/967 [04:35<00:30,  1.85it/s, loss=0.173, v_num=4, val_loss=0.122, val_acc=0.861, train_loss=0.304]      
Validating: 0it [00:00, ?it/s][A
Epoch 3:  94%|█████████▍| 912/967 [04:37<00:29,  1.85it/s, loss=0.173, v_num=4, val_loss=0.122, val_acc=0.861, train_loss=0.304]
Validating:   4%|▎         | 2/57 [00:01<00:42,  1.28it/s][A
Epoch 3:  95%|█████████▍| 914/967 [04:37<00:28,  1.85it/s, loss=0.173, v_num=4, val_loss=0.122, val_acc=0.861, train_loss=0.304]
Validating:   7%|▋         | 4/57 [00:02<00:23,  2.25it/s][A
Epoch 3:  95%|█████████▍| 916/967 [04:38<00:27,  1.86it/s, loss=0.173, v_num=4, val_loss=0.122, val_acc=0.861, train_loss=0.304]
Validating:  11%|█         | 6/57 [00:02<00:18,  2.77it/s][A
Epoch 3:  95%|█████████▍| 918/967 [04:38<00:26,  1.86it/s, loss=0.173, v_num=4, val_loss=0.122, val_acc=0.861, train_loss=0.304]

Epoch 3, global step 684: val_loss reached 0.12047 (best 0.12047), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/adabound_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.120_val_acc=0.864_fold3.ckpt" as top 1


Epoch 3: 100%|██████████| 967/967 [04:53<00:00,  1.93it/s, loss=0.173, v_num=4, val_loss=0.12, val_acc=0.864, train_loss=0.275] 
Epoch 4:  94%|█████████▍| 910/967 [04:35<00:30,  1.86it/s, loss=0.172, v_num=4, val_loss=0.12, val_acc=0.864, train_loss=0.31]      
Validating: 0it [00:00, ?it/s][A
Epoch 4:  94%|█████████▍| 912/967 [04:37<00:29,  1.85it/s, loss=0.172, v_num=4, val_loss=0.12, val_acc=0.864, train_loss=0.31]
Validating:   4%|▎         | 2/57 [00:02<00:51,  1.08it/s][A
Epoch 4:  95%|█████████▍| 914/967 [04:37<00:28,  1.85it/s, loss=0.172, v_num=4, val_loss=0.12, val_acc=0.864, train_loss=0.31]
Validating:   7%|▋         | 4/57 [00:02<00:26,  2.02it/s][A
Epoch 4:  95%|█████████▍| 916/967 [04:38<00:27,  1.86it/s, loss=0.172, v_num=4, val_loss=0.12, val_acc=0.864, train_loss=0.31]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.55it/s][A
Epoch 4:  95%|█████████▍| 918/967 [04:38<00:26,  1.86it/s, loss=0.172, v_num=4, val_loss=0.12, val_acc=0.864, train_loss=0.31]
Validating

Epoch 4, global step 912: val_loss reached 0.11139 (best 0.11139), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/adabound_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.111_val_acc=0.876_fold3.ckpt" as top 1


Epoch 13: 100%|██████████| 967/967 [26:35<00:00,  2.81s/it, loss=0.133, v_num=3, val_loss=0.11, val_acc=0.881, train_loss=0.128]

Finding best initial lr: 100%|██████████| 100/100 [21:36<00:00, 12.97s/it]







Epoch 4: 100%|██████████| 967/967 [04:53<00:00,  1.93it/s, loss=0.172, v_num=4, val_loss=0.111, val_acc=0.876, train_loss=0.212]
Epoch 5:  94%|█████████▍| 910/967 [04:35<00:30,  1.86it/s, loss=0.157, v_num=4, val_loss=0.111, val_acc=0.876, train_loss=0.123]     
Validating: 0it [00:00, ?it/s][A
Epoch 5:  94%|█████████▍| 912/967 [04:36<00:29,  1.85it/s, loss=0.157, v_num=4, val_loss=0.111, val_acc=0.876, train_loss=0.123]
Validating:   4%|▎         | 2/57 [00:01<00:44,  1.24it/s][A
Epoch 5:  95%|█████████▍| 914/967 [04:37<00:28,  1.86it/s, loss=0.157, v_num=4, val_loss=0.111, val_acc=0.876, train_loss=0.123]
Validating:   7%|▋         | 4/57 [00:02<00:24,  2.15it/s][A
Epoch 5:  95%|█████████▍| 916/967 [04:37<00:27,  1.86it/s, loss=0.157, v_num=4, val_loss=0.111, val_acc=0.876, train_loss=0.123]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.66it/s][A
Epoch 5:  95%|█████████▍| 918/967 [04:38<00:26,  1.86it/s, loss=0.157, v_num=4, val_loss=0.111, val_acc=0.876, train_loss=0.123]
V

Epoch 5, step 1140: val_loss was not in top 1


Epoch 5: 100%|██████████| 967/967 [04:53<00:00,  1.94it/s, loss=0.157, v_num=4, val_loss=0.115, val_acc=0.867, train_loss=0.129]
Epoch 6:  90%|████████▉ | 869/967 [04:23<00:54,  1.79it/s, loss=0.154, v_num=4, val_loss=0.115, val_acc=0.867, train_loss=0.0587]    