In [1]:
import errno
import glob
import json
import os
import re
import shutil
from types import SimpleNamespace
import cv2
import torch
import warnings
from lightning_objects import LightningModel
warnings.filterwarnings('ignore')
from config import Configuration
import pandas as pd
from utils import stratify_split, make_holdout_df, set_seeds
from train_manager import TrainManager

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
def main(experiment_name: str, debug, resume=False,
         finetune=False, freeze_bn=True, freeze_feature_extractor=False):

    experiment_dir = os.path.abspath(f'trained-models/{experiment_name}')
    print('Experiment directory', experiment_dir)

    try:
        # -------- SETUP --------
        checkpoint_params = None
        finetune_model_fnames = None
        folds_df, holdout_df = None, None

        if not resume and not finetune: # totally new experiment
            make_experiment_directory(experiment_dir)
            config = Configuration()
            config.debug = debug
            set_seeds(config.seed)

            # -------- LOAD DATA FROM TRAIN FILE --------
            data_df = pd.read_csv(config.data_dir + '/train.csv', engine='python')
            data_df, holdout_df = make_holdout_df(data_df, seed=config.seed)
            folds_df = stratify_split(data_df, config.fold_num, config.seed, config.target_col)

            # -------- SAVE FILES (experiment state: things like resuming, fine tuning, and inference on holdout) --------
            folds_df.to_csv(experiment_dir + '/folds.csv', index=False)
            holdout_df.to_csv(experiment_dir + '/holdout.csv', index=False)
            with open(experiment_dir + '/experiment_config.json', 'w') as f:
                json.dump(config.__dict__, f)
        elif resume or finetune:
            # LOAD DATA FROM SAVED FILES
            with open(experiment_dir + '/experiment_config.json', 'r') as f:
                config = json.load(f, object_hook=lambda d: SimpleNamespace(**d))
                set_seeds(config.seed)
                config.debug = debug

            folds_df = pd.read_csv(experiment_dir + '/folds.csv', engine='python')
            holdout_df = pd.read_csv(experiment_dir + '/holdout.csv', engine='python')

            if finetune and not resume:
                print('finetuning...')
                # verify there are checkpoints to fine tune
                finetune_model_fnames = glob.glob(experiment_dir + '/*fold*.ckpt')
                assert len(finetune_model_fnames) > 0
                finetune_model_fnames.sort()

                # make new directory for tuning experiment with files from training run 1
                make_experiment_directory(experiment_dir + '_tune')
                for f in os.listdir(experiment_dir):
                    print(f"copying {f} to {experiment_dir + '_tune'}")
                    shutil.copy2(experiment_dir + '/' + f, experiment_dir + '_tune')
                experiment_dir += '_tune'
                experiment_name += '_tune'
            else:
                print('resuming from last checkpoint...')
                checkpoint_params = get_checkpoint_params(experiment_dir, resume, config.model_arch)

        assert holdout_df is not None, 'holdout_df is None'
        assert folds_df is not None, 'folds_df is None'

        # cv2 multithreading seems to go into deadlock with PyTorch data loaders
        if config.num_workers > 0:
            cv2.setNumThreads(0)

        trainer = TrainManager(experiment_name=experiment_name, experiment_dir=experiment_dir,
                               folds_df=folds_df, holdout_df=holdout_df,
                               checkpoint_params=checkpoint_params, config=config,
                               finetune=finetune, freeze_bn=freeze_bn,
                               freeze_feature_extractor=freeze_feature_extractor,
                               finetune_model_fnames=finetune_model_fnames)
        trainer.run()
    finally:
        torch.cuda.empty_cache()

def make_experiment_directory(name):
    try:
        os.makedirs(name)
    except FileExistsError as e:
        print('Experiment already exists. Be sure to resume training appropriately or start a new experiment.')
        if e.errno == errno.EEXIST: raise


def get_checkpoint_params(basename, resume, model_arch):
    checkpoint_params = None
    if resume:
        checkpoint_params = {}
        model_filenames = glob.glob(basename + '/*fold*.ckpt')
        model_filenames.sort()
        trained_folds = [re.findall(r'fold\d+', f)[0][len('fold'):] for f in model_filenames]
        most_recent_fold = int(max(trained_folds)) if len(trained_folds) > 0 else 0

        checkpoint_params['restart_from'] = most_recent_fold
        checkpoint_params['checkpoint_file_path'] = model_filenames[-1]

    return checkpoint_params

In [4]:
if __name__ == '__main__':
    try:
        debug = False
        print('Running in debug mode:', debug)
        main(experiment_name='tf_efficientnet_b4_ns_adamw_oencycle_smoothing=0.05_weighted_t1=0.8-t2=1', debug=debug,
             resume=False, finetune=False, freeze_bn=True, freeze_feature_extractor=False)
    except KeyboardInterrupt:
        pass

GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.


Running in debug mode: False
Experiment directory /opt/favordata/AI/Felix/kaggle-cassava/trained-models/tf_efficientnet_b4_ns_adamw_oencycle_smoothing=0.05_weighted_t1=0.8-t2=1
folds_df len 18187, holdout_df len 3210
Training fold 0
Class sample counts [ 758 1470 1623 8933 1765]
After class sample counts [2274 2940 3732 8933 4765]



  | Name           | Type           | Params
--------------------------------------------------
0 | valid_accuracy | Accuracy       | 0     
1 | test_accuracy  | Accuracy       | 0     
2 | criterion      | BiTemperedLoss | 0     
3 | model          | EfficientNet   | 17.6 M
--------------------------------------------------
17.4 M    Trainable params
125 K     Non-trainable params
17.6 M    Total params
Finding best initial lr: 100%|██████████| 100/100 [01:57<00:00,  1.16s/it]Restored states from the checkpoint file at /opt/favordata/AI/Felix/kaggle-cassava/trained-models/tf_efficientnet_b4_ns_adamw_oencycle_smoothing=0.05_weighted_t1=0.8-t2=1/lr_find_temp_model.ckpt
Learning rate set to 0.001584893192461114

  | Name           | Type           | Params
--------------------------------------------------
0 | valid_accuracy | Accuracy       | 0     
1 | test_accuracy  | Accuracy       | 0     
2 | criterion      | BiTemperedLoss | 0     
3 | model          | EfficientNet   | 17.6 M
---

Epoch 1:  49%|████▉     | 475/967 [02:22<15:24,  1.88s/it, loss=0.48, v_num=1, val_loss=0.957, val_acc=0.258, train_loss=0.484]     

Finding best initial lr: 100%|██████████| 100/100 [04:24<00:00,  2.64s/it]

Epoch 1:  49%|████▉     | 476/967 [02:23<15:13,  1.86s/it, loss=0.484, v_num=1, val_loss=0.957, val_acc=0.258, train_loss=0.602]




Epoch 1:  94%|█████████▍| 910/967 [04:34<00:30,  1.86it/s, loss=0.384, v_num=1, val_loss=0.957, val_acc=0.258, train_loss=0.2]  
Validating: 0it [00:00, ?it/s][A
Epoch 1:  94%|█████████▍| 912/967 [04:36<00:29,  1.86it/s, loss=0.384, v_num=1, val_loss=0.957, val_acc=0.258, train_loss=0.2]
Validating:   4%|▎         | 2/57 [00:02<00:50,  1.08it/s][A
Epoch 1:  95%|█████████▍| 914/967 [04:36<00:28,  1.86it/s, loss=0.384, v_num=1, val_loss=0.957, val_acc=0.258, train_loss=0.2]
Validating:   7%|▋         | 4/57 [00:02<00:26,  2.02it/s][A
Epoch 1:  95%|█████████▍| 916/967 [04:37<00:27,  1.86it/s, loss=0.384, v_num=1, val_loss=0.957, val_acc=0.258, train_loss=0.2]
Validating:  11%|█         | 6/57 [00:03<00:18,  2.70it/s][A
Epoch 1:  95%|█████████▍| 918/967 [04:38<00:26,  1.87it/s, loss=0.384, v_num=1, val_loss=0.957, val_acc=0.258, train_loss=0.2]
Validating:  14%|█▍        | 8/57 [00:03<00:15,  3.09it/s][A
Epoch 1:  95%|█████████▌| 920/967 [04:38<00:25,  1.87it/s, loss=0.384, v_num=1, v

Epoch 1, global step 228: val_loss reached 0.24933 (best 0.24933), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/tf_efficientnet_b4_ns_adamw_oencycle_smoothing=0.05_weighted_t1=0.8-t2=1/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.249_val_acc=0.855_fold0.ckpt" as top 1


Epoch 1: 100%|██████████| 967/967 [04:55<00:00,  1.92it/s, loss=0.384, v_num=1, val_loss=0.249, val_acc=0.855, train_loss=0.481]
Epoch 2:  94%|█████████▍| 910/967 [04:33<00:30,  1.87it/s, loss=0.32, v_num=1, val_loss=0.249, val_acc=0.855, train_loss=0.321]      
Validating: 0it [00:00, ?it/s][A
Epoch 2:  94%|█████████▍| 912/967 [04:35<00:29,  1.86it/s, loss=0.32, v_num=1, val_loss=0.249, val_acc=0.855, train_loss=0.321]
Validating:   4%|▎         | 2/57 [00:02<00:57,  1.05s/it][A
Epoch 2:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.32, v_num=1, val_loss=0.249, val_acc=0.855, train_loss=0.321]
Validating:   7%|▋         | 4/57 [00:03<00:28,  1.86it/s][A
Epoch 2:  95%|█████████▍| 916/967 [04:36<00:27,  1.87it/s, loss=0.32, v_num=1, val_loss=0.249, val_acc=0.855, train_loss=0.321]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.58it/s][A
Epoch 2:  95%|█████████▍| 918/967 [04:37<00:26,  1.87it/s, loss=0.32, v_num=1, val_loss=0.249, val_acc=0.855, train_loss=0.321]
Valid

Epoch 2, global step 456: val_loss reached 0.20957 (best 0.20957), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/tf_efficientnet_b4_ns_adamw_oencycle_smoothing=0.05_weighted_t1=0.8-t2=1/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.210_val_acc=0.879_fold0.ckpt" as top 1


Epoch 2: 100%|██████████| 967/967 [04:51<00:00,  1.95it/s, loss=0.32, v_num=1, val_loss=0.21, val_acc=0.879, train_loss=0.393] 
Epoch 3:  94%|█████████▍| 910/967 [04:32<00:30,  1.88it/s, loss=0.287, v_num=1, val_loss=0.21, val_acc=0.879, train_loss=0.479]    
Validating: 0it [00:00, ?it/s][A
Epoch 3:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.287, v_num=1, val_loss=0.21, val_acc=0.879, train_loss=0.479]
Validating:   4%|▎         | 2/57 [00:02<00:49,  1.11it/s][A
Epoch 3:  95%|█████████▍| 914/967 [04:34<00:28,  1.87it/s, loss=0.287, v_num=1, val_loss=0.21, val_acc=0.879, train_loss=0.479]
Validating:   7%|▋         | 4/57 [00:02<00:25,  2.08it/s][A
Epoch 3:  95%|█████████▍| 916/967 [04:35<00:27,  1.88it/s, loss=0.287, v_num=1, val_loss=0.21, val_acc=0.879, train_loss=0.479]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.63it/s][A
Epoch 3:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.287, v_num=1, val_loss=0.21, val_acc=0.879, train_loss=0.479]
Validati

Epoch 3, step 684: val_loss was not in top 1


Epoch 3: 100%|██████████| 967/967 [04:50<00:00,  1.96it/s, loss=0.287, v_num=1, val_loss=0.215, val_acc=0.881, train_loss=0.111]
Epoch 4:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.293, v_num=1, val_loss=0.215, val_acc=0.881, train_loss=0.209]     
Validating: 0it [00:00, ?it/s][A
Epoch 4:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.293, v_num=1, val_loss=0.215, val_acc=0.881, train_loss=0.209]
Validating:   4%|▎         | 2/57 [00:02<00:55,  1.00s/it][A
Epoch 4:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.293, v_num=1, val_loss=0.215, val_acc=0.881, train_loss=0.209]
Validating:   7%|▋         | 4/57 [00:02<00:28,  1.87it/s][A
Epoch 4:  95%|█████████▍| 916/967 [04:35<00:27,  1.87it/s, loss=0.293, v_num=1, val_loss=0.215, val_acc=0.881, train_loss=0.209]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.59it/s][A
Epoch 4:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.293, v_num=1, val_loss=0.215, val_acc=0.881, train_loss=0.209]
V

Epoch 4, step 912: val_loss was not in top 1


Epoch 4: 100%|██████████| 967/967 [04:50<00:00,  1.95it/s, loss=0.293, v_num=1, val_loss=0.217, val_acc=0.882, train_loss=0.195]
Epoch 5:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.268, v_num=1, val_loss=0.217, val_acc=0.882, train_loss=0.322]    
Validating: 0it [00:00, ?it/s][A
Epoch 5:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.268, v_num=1, val_loss=0.217, val_acc=0.882, train_loss=0.322]
Validating:   4%|▎         | 2/57 [00:02<00:52,  1.04it/s][A
Epoch 5:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.268, v_num=1, val_loss=0.217, val_acc=0.882, train_loss=0.322]
Validating:   7%|▋         | 4/57 [00:02<00:26,  1.96it/s][A
Epoch 5:  95%|█████████▍| 916/967 [04:36<00:27,  1.87it/s, loss=0.268, v_num=1, val_loss=0.217, val_acc=0.882, train_loss=0.322]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.66it/s][A
Epoch 5:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.268, v_num=1, val_loss=0.217, val_acc=0.882, train_loss=0.322]
Va

Epoch 5, global step 1140: val_loss reached 0.20843 (best 0.20843), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/tf_efficientnet_b4_ns_adamw_oencycle_smoothing=0.05_weighted_t1=0.8-t2=1/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.208_val_acc=0.886_fold0.ckpt" as top 1


Epoch 5: 100%|██████████| 967/967 [04:51<00:00,  1.95it/s, loss=0.268, v_num=1, val_loss=0.208, val_acc=0.886, train_loss=0.209]
Epoch 6:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.268, v_num=1, val_loss=0.208, val_acc=0.886, train_loss=0.158]     
Validating: 0it [00:00, ?it/s][A
Epoch 6:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.268, v_num=1, val_loss=0.208, val_acc=0.886, train_loss=0.158]
Validating:   4%|▎         | 2/57 [00:01<00:44,  1.23it/s][A
Epoch 6:  95%|█████████▍| 914/967 [04:34<00:28,  1.87it/s, loss=0.268, v_num=1, val_loss=0.208, val_acc=0.886, train_loss=0.158]
Validating:   7%|▋         | 4/57 [00:02<00:24,  2.20it/s][A
Epoch 6:  95%|█████████▍| 916/967 [04:35<00:27,  1.88it/s, loss=0.268, v_num=1, val_loss=0.208, val_acc=0.886, train_loss=0.158]
Validating:  11%|█         | 6/57 [00:02<00:18,  2.76it/s][A
Epoch 6:  95%|█████████▍| 918/967 [04:35<00:26,  1.88it/s, loss=0.268, v_num=1, val_loss=0.208, val_acc=0.886, train_loss=0.158]
V

Epoch 6, global step 1368: val_loss reached 0.20800 (best 0.20800), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/tf_efficientnet_b4_ns_adamw_oencycle_smoothing=0.05_weighted_t1=0.8-t2=1/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.208_val_acc=0.886_fold0.ckpt" as top 1


Epoch 6: 100%|██████████| 967/967 [04:50<00:00,  1.96it/s, loss=0.268, v_num=1, val_loss=0.208, val_acc=0.886, train_loss=0.226]
Epoch 7:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.298, v_num=1, val_loss=0.208, val_acc=0.886, train_loss=0.111]     
Validating: 0it [00:00, ?it/s][A
Epoch 7:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.298, v_num=1, val_loss=0.208, val_acc=0.886, train_loss=0.111]
Validating:   4%|▎         | 2/57 [00:02<00:49,  1.12it/s][A
Epoch 7:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.298, v_num=1, val_loss=0.208, val_acc=0.886, train_loss=0.111]
Validating:   7%|▋         | 4/57 [00:02<00:25,  2.06it/s][A
Epoch 7:  95%|█████████▍| 916/967 [04:35<00:27,  1.88it/s, loss=0.298, v_num=1, val_loss=0.208, val_acc=0.886, train_loss=0.111]
Validating:  11%|█         | 6/57 [00:03<00:20,  2.52it/s][A
Epoch 7:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.298, v_num=1, val_loss=0.208, val_acc=0.886, train_loss=0.111]
V

Epoch 7, step 1596: val_loss was not in top 1


Epoch 7: 100%|██████████| 967/967 [04:50<00:00,  1.96it/s, loss=0.298, v_num=1, val_loss=0.239, val_acc=0.861, train_loss=0.265]
Epoch 8:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.29, v_num=1, val_loss=0.239, val_acc=0.861, train_loss=0.333]      
Validating: 0it [00:00, ?it/s][A
Epoch 8:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.29, v_num=1, val_loss=0.239, val_acc=0.861, train_loss=0.333]
Validating:   4%|▎         | 2/57 [00:01<00:44,  1.23it/s][A
Epoch 8:  95%|█████████▍| 914/967 [04:34<00:28,  1.87it/s, loss=0.29, v_num=1, val_loss=0.239, val_acc=0.861, train_loss=0.333]
Validating:   7%|▋         | 4/57 [00:02<00:23,  2.21it/s][A
Epoch 8:  95%|█████████▍| 916/967 [04:35<00:27,  1.88it/s, loss=0.29, v_num=1, val_loss=0.239, val_acc=0.861, train_loss=0.333]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.65it/s][A
Epoch 8:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.29, v_num=1, val_loss=0.239, val_acc=0.861, train_loss=0.333]
Valid

Epoch 8, step 1824: val_loss was not in top 1


Epoch 8: 100%|██████████| 967/967 [04:50<00:00,  1.96it/s, loss=0.29, v_num=1, val_loss=0.244, val_acc=0.866, train_loss=0.427]
Epoch 9:  94%|█████████▍| 910/967 [04:33<00:30,  1.87it/s, loss=0.266, v_num=1, val_loss=0.244, val_acc=0.866, train_loss=0.171]     
Validating: 0it [00:00, ?it/s][A
Epoch 9:  94%|█████████▍| 912/967 [04:35<00:29,  1.87it/s, loss=0.266, v_num=1, val_loss=0.244, val_acc=0.866, train_loss=0.171]
Validating:   4%|▎         | 2/57 [00:02<00:53,  1.03it/s][A
Epoch 9:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.266, v_num=1, val_loss=0.244, val_acc=0.866, train_loss=0.171]
Validating:   7%|▋         | 4/57 [00:02<00:28,  1.88it/s][A
Epoch 9:  95%|█████████▍| 916/967 [04:36<00:27,  1.87it/s, loss=0.266, v_num=1, val_loss=0.244, val_acc=0.866, train_loss=0.171]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.60it/s][A
Epoch 9:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.266, v_num=1, val_loss=0.244, val_acc=0.866, train_loss=0.171]
Va

Epoch 9, step 2052: val_loss was not in top 1


Epoch 9: 100%|██████████| 967/967 [04:51<00:00,  1.95it/s, loss=0.266, v_num=1, val_loss=0.21, val_acc=0.883, train_loss=0.0586]
Epoch 10:  94%|█████████▍| 910/967 [04:33<00:30,  1.87it/s, loss=0.272, v_num=1, val_loss=0.21, val_acc=0.883, train_loss=0.0961]   
Validating: 0it [00:00, ?it/s][A
Epoch 10:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.272, v_num=1, val_loss=0.21, val_acc=0.883, train_loss=0.0961]
Validating:   4%|▎         | 2/57 [00:01<00:47,  1.17it/s][A
Epoch 10:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.272, v_num=1, val_loss=0.21, val_acc=0.883, train_loss=0.0961]
Validating:   7%|▋         | 4/57 [00:02<00:24,  2.14it/s][A
Epoch 10:  95%|█████████▍| 916/967 [04:35<00:27,  1.87it/s, loss=0.272, v_num=1, val_loss=0.21, val_acc=0.883, train_loss=0.0961]
Validating:  11%|█         | 6/57 [00:03<00:18,  2.72it/s][A
Epoch 10:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.272, v_num=1, val_loss=0.21, val_acc=0.883, train_loss=0.0961

Epoch 10, step 2280: val_loss was not in top 1


Epoch 10: 100%|██████████| 967/967 [04:50<00:00,  1.95it/s, loss=0.272, v_num=1, val_loss=0.242, val_acc=0.866, train_loss=0.77] 
Epoch 11:  94%|█████████▍| 910/967 [04:33<00:30,  1.87it/s, loss=0.277, v_num=1, val_loss=0.242, val_acc=0.866, train_loss=0.339]    
Validating: 0it [00:00, ?it/s][A
Epoch 11:  94%|█████████▍| 912/967 [04:35<00:29,  1.87it/s, loss=0.277, v_num=1, val_loss=0.242, val_acc=0.866, train_loss=0.339]
Validating:   4%|▎         | 2/57 [00:02<00:54,  1.00it/s][A
Epoch 11:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.277, v_num=1, val_loss=0.242, val_acc=0.866, train_loss=0.339]
Validating:   7%|▋         | 4/57 [00:02<00:27,  1.90it/s][A
Epoch 11:  95%|█████████▍| 916/967 [04:36<00:27,  1.87it/s, loss=0.277, v_num=1, val_loss=0.242, val_acc=0.866, train_loss=0.339]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.62it/s][A
Epoch 11:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.277, v_num=1, val_loss=0.242, val_acc=0.866, train_loss=0.3

Epoch 11, step 2508: val_loss was not in top 1


Epoch 11: 100%|██████████| 967/967 [04:51<00:00,  1.95it/s, loss=0.277, v_num=1, val_loss=0.32, val_acc=0.833, train_loss=0.202] 
Epoch 12:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.268, v_num=1, val_loss=0.32, val_acc=0.833, train_loss=0.392]     
Validating: 0it [00:00, ?it/s][A
Epoch 12:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.268, v_num=1, val_loss=0.32, val_acc=0.833, train_loss=0.392]
Validating:   4%|▎         | 2/57 [00:02<00:49,  1.10it/s][A
Epoch 12:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.268, v_num=1, val_loss=0.32, val_acc=0.833, train_loss=0.392]
Validating:   7%|▋         | 4/57 [00:02<00:26,  1.97it/s][A
Epoch 12:  95%|█████████▍| 916/967 [04:35<00:27,  1.87it/s, loss=0.268, v_num=1, val_loss=0.32, val_acc=0.833, train_loss=0.392]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.58it/s][A
Epoch 12:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.268, v_num=1, val_loss=0.32, val_acc=0.833, train_loss=0.392]


Epoch 12, step 2736: val_loss was not in top 1


Epoch 12: 100%|██████████| 967/967 [04:50<00:00,  1.95it/s, loss=0.268, v_num=1, val_loss=0.226, val_acc=0.873, train_loss=0.63]
Epoch 13:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.307, v_num=1, val_loss=0.226, val_acc=0.873, train_loss=0.282]    
Validating: 0it [00:00, ?it/s][A
Epoch 13:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.307, v_num=1, val_loss=0.226, val_acc=0.873, train_loss=0.282]
Validating:   4%|▎         | 2/57 [00:02<00:55,  1.01s/it][A
Epoch 13:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.307, v_num=1, val_loss=0.226, val_acc=0.873, train_loss=0.282]
Validating:   7%|▋         | 4/57 [00:02<00:28,  1.83it/s][A
Epoch 13:  95%|█████████▍| 916/967 [04:36<00:27,  1.87it/s, loss=0.307, v_num=1, val_loss=0.226, val_acc=0.873, train_loss=0.282]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.56it/s][A
Epoch 13:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.307, v_num=1, val_loss=0.226, val_acc=0.873, train_loss=0.28

Epoch 13, step 2964: val_loss was not in top 1


Epoch 13: 100%|██████████| 967/967 [04:50<00:00,  1.95it/s, loss=0.307, v_num=1, val_loss=nan, val_acc=0.869, train_loss=0.472]  
Epoch 14:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.282, v_num=1, val_loss=nan, val_acc=0.869, train_loss=0.265]     
Validating: 0it [00:00, ?it/s][A
Epoch 14:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.282, v_num=1, val_loss=nan, val_acc=0.869, train_loss=0.265]
Validating:   4%|▎         | 2/57 [00:02<00:48,  1.13it/s][A
Epoch 14:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.282, v_num=1, val_loss=nan, val_acc=0.869, train_loss=0.265]
Validating:   7%|▋         | 4/57 [00:02<00:25,  2.06it/s][A
Epoch 14:  95%|█████████▍| 916/967 [04:35<00:27,  1.88it/s, loss=0.282, v_num=1, val_loss=nan, val_acc=0.869, train_loss=0.265]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.64it/s][A
Epoch 14:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.282, v_num=1, val_loss=nan, val_acc=0.869, train_loss=0.265]
Valid

Epoch 14, step 3192: val_loss was not in top 1


Epoch 14: 100%|██████████| 967/967 [04:50<00:00,  1.96it/s, loss=0.282, v_num=1, val_loss=0.292, val_acc=0.837, train_loss=0.418]
                                                           [A

GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.


Training fold 1
Class sample counts [ 758 1471 1622 8933 1765]
After class sample counts [2274 2942 3730 8933 4765]



  | Name           | Type           | Params
--------------------------------------------------
0 | valid_accuracy | Accuracy       | 0     
1 | test_accuracy  | Accuracy       | 0     
2 | criterion      | BiTemperedLoss | 0     
3 | model          | EfficientNet   | 17.6 M
--------------------------------------------------
17.4 M    Trainable params
125 K     Non-trainable params
17.6 M    Total params

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s][A
Finding best initial lr:   1%|          | 1/100 [00:00<00:32,  3.05it/s][A
Finding best initial lr:   2%|▏         | 2/100 [00:01<01:21,  1.21it/s][A
Finding best initial lr:   3%|▎         | 3/100 [00:02<01:35,  1.01it/s][A
Finding best initial lr:   4%|▍         | 4/100 [00:03<01:42,  1.07s/it][A
Finding best initial lr:   5%|▌         | 5/100 [00:05<01:45,  1.11s/it][A
Finding best initial lr:   6%|▌         | 6/100 [00:06<01:46,  1.14s/it][A
Finding best initial lr:   7%|▋         | 7/100 [00:07<01:46,  1.1

Epoch 14: 100%|██████████| 967/967 [05:03<00:00,  1.87it/s, loss=0.282, v_num=1, val_loss=0.292, val_acc=0.837, train_loss=0.418]


Finding best initial lr:   8%|▊         | 8/100 [00:08<01:46,  1.16s/it][A
Finding best initial lr:   9%|▉         | 9/100 [00:09<01:45,  1.16s/it][A
Finding best initial lr:  10%|█         | 10/100 [00:10<01:45,  1.17s/it][A
Finding best initial lr:  11%|█         | 11/100 [00:12<01:44,  1.18s/it][A
Finding best initial lr:  12%|█▏        | 12/100 [00:13<01:43,  1.18s/it][A
Finding best initial lr:  13%|█▎        | 13/100 [00:14<01:42,  1.18s/it][A
Finding best initial lr:  14%|█▍        | 14/100 [00:15<01:41,  1.18s/it][A
Finding best initial lr:  15%|█▌        | 15/100 [00:16<01:40,  1.18s/it][A
Finding best initial lr:  16%|█▌        | 16/100 [00:18<01:39,  1.18s/it][A
Finding best initial lr:  17%|█▋        | 17/100 [00:19<01:38,  1.19s/it][A
Finding best initial lr:  18%|█▊        | 18/100 [00:20<01:37,  1.18s/it][A
Finding best initial lr:  19%|█▉        | 19/100 [00:21<01:35,  1.18s/it][A
Finding best initial lr:  20%|██        | 20/100 [00:22<01:35,  1.19s/it][A


Epoch 1:  94%|█████████▍| 910/967 [04:33<00:30,  1.87it/s, loss=0.416, v_num=1, val_loss=0.986, val_acc=0.0781, train_loss=0.31]     
Validating: 0it [00:00, ?it/s][A
Epoch 1:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.416, v_num=1, val_loss=0.986, val_acc=0.0781, train_loss=0.31]
Validating:   4%|▎         | 2/57 [00:01<00:47,  1.16it/s][A
Epoch 1:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.416, v_num=1, val_loss=0.986, val_acc=0.0781, train_loss=0.31]
Validating:   7%|▋         | 4/57 [00:02<00:24,  2.13it/s][A
Epoch 1:  95%|█████████▍| 916/967 [04:35<00:27,  1.87it/s, loss=0.416, v_num=1, val_loss=0.986, val_acc=0.0781, train_loss=0.31]
Validating:  11%|█         | 6/57 [00:03<00:18,  2.73it/s][A
Epoch 1:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.416, v_num=1, val_loss=0.986, val_acc=0.0781, train_loss=0.31]
Validating:  14%|█▍        | 8/57 [00:03<00:16,  3.05it/s][A
Epoch 1:  95%|█████████▌| 920/967 [04:37<00:25,  1.88it/s, loss=0.41

Epoch 1, global step 228: val_loss reached 0.29340 (best 0.29340), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/tf_efficientnet_b4_ns_adamw_oencycle_smoothing=0.05_weighted_t1=0.8-t2=1/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.293_val_acc=0.839_fold1.ckpt" as top 1


Epoch 1: 100%|██████████| 967/967 [04:51<00:00,  1.95it/s, loss=0.416, v_num=1, val_loss=0.293, val_acc=0.839, train_loss=0.249]
Epoch 2:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.357, v_num=1, val_loss=0.293, val_acc=0.839, train_loss=0.33]      
Validating: 0it [00:00, ?it/s][A
Epoch 2:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.357, v_num=1, val_loss=0.293, val_acc=0.839, train_loss=0.33]
Validating:   4%|▎         | 2/57 [00:02<00:49,  1.12it/s][A
Epoch 2:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.357, v_num=1, val_loss=0.293, val_acc=0.839, train_loss=0.33]
Validating:   7%|▋         | 4/57 [00:02<00:26,  1.97it/s][A
Epoch 2:  95%|█████████▍| 916/967 [04:35<00:27,  1.87it/s, loss=0.357, v_num=1, val_loss=0.293, val_acc=0.839, train_loss=0.33]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.57it/s][A
Epoch 2:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.357, v_num=1, val_loss=0.293, val_acc=0.839, train_loss=0.33]
Valid

Epoch 2, global step 456: val_loss reached 0.21940 (best 0.21940), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/tf_efficientnet_b4_ns_adamw_oencycle_smoothing=0.05_weighted_t1=0.8-t2=1/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.219_val_acc=0.876_fold1.ckpt" as top 1


Epoch 2: 100%|██████████| 967/967 [04:51<00:00,  1.95it/s, loss=0.357, v_num=1, val_loss=0.219, val_acc=0.876, train_loss=0.852]
Epoch 3:  94%|█████████▍| 910/967 [04:33<00:30,  1.87it/s, loss=0.34, v_num=1, val_loss=0.219, val_acc=0.876, train_loss=0.452]      
Validating: 0it [00:00, ?it/s][A
Epoch 3:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.34, v_num=1, val_loss=0.219, val_acc=0.876, train_loss=0.452]
Validating:   4%|▎         | 2/57 [00:02<00:51,  1.06it/s][A
Epoch 3:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.34, v_num=1, val_loss=0.219, val_acc=0.876, train_loss=0.452]
Validating:   7%|▋         | 4/57 [00:02<00:27,  1.96it/s][A
Epoch 3:  95%|█████████▍| 916/967 [04:36<00:27,  1.87it/s, loss=0.34, v_num=1, val_loss=0.219, val_acc=0.876, train_loss=0.452]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.66it/s][A
Epoch 3:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.34, v_num=1, val_loss=0.219, val_acc=0.876, train_loss=0.452]
Valid

Epoch 3, step 684: val_loss was not in top 1


Epoch 3: 100%|██████████| 967/967 [04:50<00:00,  1.95it/s, loss=0.34, v_num=1, val_loss=0.223, val_acc=0.871, train_loss=1.03] 
Epoch 14: 100%|██████████| 967/967 [23:36<00:00,  2.49s/it, loss=0.282, v_num=1, val_loss=0.292, val_acc=0.837, train_loss=0.418]]  

Finding best initial lr: 100%|██████████| 100/100 [18:39<00:00, 11.20s/it]


Epoch 4:  43%|████▎     | 416/967 [02:05<1:07:38,  7.37s/it, loss=0.31, v_num=1, val_loss=0.223, val_acc=0.871, train_loss=0.199] 




Epoch 4:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.291, v_num=1, val_loss=0.223, val_acc=0.871, train_loss=0.283] 
Validating: 0it [00:00, ?it/s][A
Epoch 4:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.291, v_num=1, val_loss=0.223, val_acc=0.871, train_loss=0.283]
Validating:   4%|▎         | 2/57 [00:01<00:46,  1.17it/s][A
Epoch 4:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.291, v_num=1, val_loss=0.223, val_acc=0.871, train_loss=0.283]
Validating:   7%|▋         | 4/57 [00:02<00:24,  2.12it/s][A
Epoch 4:  95%|█████████▍| 916/967 [04:35<00:27,  1.87it/s, loss=0.291, v_num=1, val_loss=0.223, val_acc=0.871, train_loss=0.283]
Validating:  11%|█         | 6/57 [00:03<00:18,  2.75it/s][A
Epoch 4:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.291, v_num=1, val_loss=0.223, val_acc=0.871, train_loss=0.283]
Validating:  14%|█▍        | 8/57 [00:03<00:15,  3.13it/s][A
Epoch 4:  95%|█████████▌| 920/967 [04:36<00:24,  1.88it/s, loss=0.291, v

Epoch 4, step 912: val_loss was not in top 1


Epoch 4: 100%|██████████| 967/967 [04:50<00:00,  1.95it/s, loss=0.291, v_num=1, val_loss=0.221, val_acc=0.874, train_loss=0.0718]
Epoch 5:  94%|█████████▍| 910/967 [04:33<00:30,  1.87it/s, loss=0.294, v_num=1, val_loss=0.221, val_acc=0.874, train_loss=0.306]      
Validating: 0it [00:00, ?it/s][A
Epoch 5:  94%|█████████▍| 912/967 [04:35<00:29,  1.86it/s, loss=0.294, v_num=1, val_loss=0.221, val_acc=0.874, train_loss=0.306]
Validating:   4%|▎         | 2/57 [00:02<00:54,  1.01it/s][A
Epoch 5:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.294, v_num=1, val_loss=0.221, val_acc=0.874, train_loss=0.306]
Validating:   7%|▋         | 4/57 [00:02<00:28,  1.87it/s][A
Epoch 5:  95%|█████████▍| 916/967 [04:36<00:27,  1.87it/s, loss=0.294, v_num=1, val_loss=0.221, val_acc=0.874, train_loss=0.306]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.59it/s][A
Epoch 5:  95%|█████████▍| 918/967 [04:36<00:26,  1.87it/s, loss=0.294, v_num=1, val_loss=0.221, val_acc=0.874, train_loss=0.306]

Epoch 5, step 1140: val_loss was not in top 1


Epoch 5: 100%|██████████| 967/967 [04:51<00:00,  1.95it/s, loss=0.294, v_num=1, val_loss=0.235, val_acc=0.865, train_loss=0.415]
Epoch 6:  94%|█████████▍| 910/967 [04:33<00:30,  1.87it/s, loss=0.244, v_num=1, val_loss=0.235, val_acc=0.865, train_loss=0.0741]    
Validating: 0it [00:00, ?it/s][A
Epoch 6:  94%|█████████▍| 912/967 [04:35<00:29,  1.86it/s, loss=0.244, v_num=1, val_loss=0.235, val_acc=0.865, train_loss=0.0741]
Validating:   4%|▎         | 2/57 [00:02<00:54,  1.02it/s][A
Epoch 6:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.244, v_num=1, val_loss=0.235, val_acc=0.865, train_loss=0.0741]
Validating:   7%|▋         | 4/57 [00:02<00:27,  1.95it/s][A
Epoch 6:  95%|█████████▍| 916/967 [04:36<00:27,  1.87it/s, loss=0.244, v_num=1, val_loss=0.235, val_acc=0.865, train_loss=0.0741]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.65it/s][A
Epoch 6:  95%|█████████▍| 918/967 [04:36<00:26,  1.87it/s, loss=0.244, v_num=1, val_loss=0.235, val_acc=0.865, train_loss=0.074

Epoch 6, step 1368: val_loss was not in top 1


Epoch 6: 100%|██████████| 967/967 [04:51<00:00,  1.95it/s, loss=0.244, v_num=1, val_loss=0.229, val_acc=0.871, train_loss=0.202] 
Epoch 7:  94%|█████████▍| 910/967 [04:33<00:30,  1.87it/s, loss=0.261, v_num=1, val_loss=0.229, val_acc=0.871, train_loss=0.306]     
Validating: 0it [00:00, ?it/s][A
Epoch 7:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.261, v_num=1, val_loss=0.229, val_acc=0.871, train_loss=0.306]
Validating:   4%|▎         | 2/57 [00:01<00:48,  1.13it/s][A
Epoch 7:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.261, v_num=1, val_loss=0.229, val_acc=0.871, train_loss=0.306]
Validating:   7%|▋         | 4/57 [00:02<00:25,  2.08it/s][A
Epoch 7:  95%|█████████▍| 916/967 [04:35<00:27,  1.87it/s, loss=0.261, v_num=1, val_loss=0.229, val_acc=0.871, train_loss=0.306]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.65it/s][A
Epoch 7:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.261, v_num=1, val_loss=0.229, val_acc=0.871, train_loss=0.306]


Epoch 7, step 1596: val_loss was not in top 1


Epoch 7: 100%|██████████| 967/967 [04:50<00:00,  1.95it/s, loss=0.261, v_num=1, val_loss=0.234, val_acc=0.871, train_loss=0.716]
Epoch 8:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.257, v_num=1, val_loss=0.234, val_acc=0.871, train_loss=0.354]    
Validating: 0it [00:00, ?it/s][A
Epoch 8:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.257, v_num=1, val_loss=0.234, val_acc=0.871, train_loss=0.354]
Validating:   4%|▎         | 2/57 [00:01<00:46,  1.18it/s][A
Epoch 8:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.257, v_num=1, val_loss=0.234, val_acc=0.871, train_loss=0.354]
Validating:   7%|▋         | 4/57 [00:02<00:24,  2.15it/s][A
Epoch 8:  95%|█████████▍| 916/967 [04:35<00:27,  1.87it/s, loss=0.257, v_num=1, val_loss=0.234, val_acc=0.871, train_loss=0.354]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.67it/s][A
Epoch 8:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.257, v_num=1, val_loss=0.234, val_acc=0.871, train_loss=0.354]
Va

Epoch 8, step 1824: val_loss was not in top 1


Epoch 8: 100%|██████████| 967/967 [04:50<00:00,  1.95it/s, loss=0.257, v_num=1, val_loss=0.223, val_acc=0.874, train_loss=0.436]
Epoch 9:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.251, v_num=1, val_loss=0.223, val_acc=0.874, train_loss=0.111]    
Validating: 0it [00:00, ?it/s][A
Epoch 9:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.251, v_num=1, val_loss=0.223, val_acc=0.874, train_loss=0.111]
Validating:   4%|▎         | 2/57 [00:02<00:56,  1.02s/it][A
Epoch 9:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.251, v_num=1, val_loss=0.223, val_acc=0.874, train_loss=0.111]
Validating:   7%|▋         | 4/57 [00:02<00:28,  1.86it/s][A
Epoch 9:  95%|█████████▍| 916/967 [04:36<00:27,  1.87it/s, loss=0.251, v_num=1, val_loss=0.223, val_acc=0.874, train_loss=0.111]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.59it/s][A
Epoch 9:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.251, v_num=1, val_loss=0.223, val_acc=0.874, train_loss=0.111]
Va

Epoch 9, step 2052: val_loss was not in top 1


Epoch 9: 100%|██████████| 967/967 [04:50<00:00,  1.95it/s, loss=0.251, v_num=1, val_loss=0.234, val_acc=0.872, train_loss=0.948]
Epoch 10:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.218, v_num=1, val_loss=0.234, val_acc=0.872, train_loss=0.179]     
Validating: 0it [00:00, ?it/s][A
Epoch 10:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.218, v_num=1, val_loss=0.234, val_acc=0.872, train_loss=0.179]
Validating:   4%|▎         | 2/57 [00:01<00:49,  1.12it/s][A
Epoch 10:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.218, v_num=1, val_loss=0.234, val_acc=0.872, train_loss=0.179]
Validating:   7%|▋         | 4/57 [00:02<00:25,  2.07it/s][A
Epoch 10:  95%|█████████▍| 916/967 [04:35<00:27,  1.88it/s, loss=0.218, v_num=1, val_loss=0.234, val_acc=0.872, train_loss=0.179]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.65it/s][A
Epoch 10:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.218, v_num=1, val_loss=0.234, val_acc=0.872, train_loss=0.1

Epoch 10, step 2280: val_loss was not in top 1


Epoch 10: 100%|██████████| 967/967 [04:50<00:00,  1.96it/s, loss=0.218, v_num=1, val_loss=0.222, val_acc=0.878, train_loss=0.00985]
                                                           [A

GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.


Training fold 2
Class sample counts [ 759 1471 1622 8934 1764]
After class sample counts [2277 2942 3730 8934 4762]



  | Name           | Type           | Params
--------------------------------------------------
0 | valid_accuracy | Accuracy       | 0     
1 | test_accuracy  | Accuracy       | 0     
2 | criterion      | BiTemperedLoss | 0     
3 | model          | EfficientNet   | 17.6 M
--------------------------------------------------
17.4 M    Trainable params
125 K     Non-trainable params
17.6 M    Total params

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s][A
Finding best initial lr:   1%|          | 1/100 [00:00<00:32,  3.06it/s][A
Finding best initial lr:   2%|▏         | 2/100 [00:01<01:21,  1.21it/s][A
Finding best initial lr:   3%|▎         | 3/100 [00:02<01:36,  1.01it/s][A
Finding best initial lr:   4%|▍         | 4/100 [00:03<01:42,  1.06s/it][A
Finding best initial lr:   5%|▌         | 5/100 [00:05<01:44,  1.10s/it][A
Finding best initial lr:   6%|▌         | 6/100 [00:06<01:46,  1.13s/it][A
Finding best initial lr:   7%|▋         | 7/100 [00:07<01:46,  1.1

Epoch 10: 100%|██████████| 967/967 [05:07<00:00,  1.85it/s, loss=0.218, v_num=1, val_loss=0.222, val_acc=0.878, train_loss=0.00985]


Finding best initial lr:  11%|█         | 11/100 [00:12<01:45,  1.18s/it][A
Finding best initial lr:  12%|█▏        | 12/100 [00:13<01:44,  1.18s/it][A
Finding best initial lr:  13%|█▎        | 13/100 [00:14<01:43,  1.19s/it][A
Finding best initial lr:  14%|█▍        | 14/100 [00:15<01:41,  1.18s/it][A
Finding best initial lr:  15%|█▌        | 15/100 [00:16<01:40,  1.18s/it][A
Finding best initial lr:  16%|█▌        | 16/100 [00:18<01:39,  1.18s/it][A
Finding best initial lr:  17%|█▋        | 17/100 [00:19<01:38,  1.19s/it][A
Finding best initial lr:  18%|█▊        | 18/100 [00:20<01:37,  1.19s/it][A
Finding best initial lr:  19%|█▉        | 19/100 [00:21<01:36,  1.19s/it][A
Finding best initial lr:  20%|██        | 20/100 [00:22<01:35,  1.19s/it][A
Finding best initial lr:  21%|██        | 21/100 [00:24<01:34,  1.19s/it][A
Finding best initial lr:  22%|██▏       | 22/100 [00:25<01:32,  1.19s/it][A
Finding best initial lr:  23%|██▎       | 23/100 [00:26<01:31,  1.19s/it][

Epoch 1:  94%|█████████▍| 910/967 [04:35<00:30,  1.86it/s, loss=0.405, v_num=1, val_loss=0.944, val_acc=0.469, train_loss=0.432]    
Validating: 0it [00:00, ?it/s][A
Epoch 1:  94%|█████████▍| 912/967 [04:36<00:29,  1.85it/s, loss=0.405, v_num=1, val_loss=0.944, val_acc=0.469, train_loss=0.432]
Validating:   4%|▎         | 2/57 [00:02<00:49,  1.12it/s][A
Epoch 1:  95%|█████████▍| 914/967 [04:37<00:28,  1.86it/s, loss=0.405, v_num=1, val_loss=0.944, val_acc=0.469, train_loss=0.432]
Validating:   7%|▋         | 4/57 [00:02<00:26,  1.97it/s][A
Epoch 1:  95%|█████████▍| 916/967 [04:38<00:27,  1.86it/s, loss=0.405, v_num=1, val_loss=0.944, val_acc=0.469, train_loss=0.432]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.64it/s][A
Epoch 1:  95%|█████████▍| 918/967 [04:38<00:26,  1.86it/s, loss=0.405, v_num=1, val_loss=0.944, val_acc=0.469, train_loss=0.432]
Validating:  14%|█▍        | 8/57 [00:03<00:16,  3.06it/s][A
Epoch 1:  95%|█████████▌| 920/967 [04:39<00:25,  1.87it/s, loss=0.405

Epoch 1, global step 228: val_loss reached 0.25612 (best 0.25612), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/tf_efficientnet_b4_ns_adamw_oencycle_smoothing=0.05_weighted_t1=0.8-t2=1/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.256_val_acc=0.853_fold2.ckpt" as top 1


Epoch 1: 100%|██████████| 967/967 [04:55<00:00,  1.92it/s, loss=0.405, v_num=1, val_loss=0.256, val_acc=0.853, train_loss=0.351]
Epoch 10: 100%|██████████| 967/967 [15:20<00:00,  1.62s/it, loss=0.218, v_num=1, val_loss=0.222, val_acc=0.878, train_loss=0.00985] 

Finding best initial lr: 100%|██████████| 100/100 [10:23<00:00,  6.24s/it]


Epoch 2:  71%|███████   | 688/967 [03:27<03:19,  1.40it/s, loss=0.346, v_num=1, val_loss=0.256, val_acc=0.853, train_loss=0.153]




Epoch 2:  94%|█████████▍| 910/967 [04:33<00:30,  1.87it/s, loss=0.343, v_num=1, val_loss=0.256, val_acc=0.853, train_loss=0.483] 
Validating: 0it [00:00, ?it/s][A
Epoch 2:  94%|█████████▍| 912/967 [04:35<00:29,  1.86it/s, loss=0.343, v_num=1, val_loss=0.256, val_acc=0.853, train_loss=0.483]
Validating:   4%|▎         | 2/57 [00:02<00:52,  1.06it/s][A
Epoch 2:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.343, v_num=1, val_loss=0.256, val_acc=0.853, train_loss=0.483]
Validating:   7%|▋         | 4/57 [00:02<00:26,  1.97it/s][A
Epoch 2:  95%|█████████▍| 916/967 [04:36<00:27,  1.87it/s, loss=0.343, v_num=1, val_loss=0.256, val_acc=0.853, train_loss=0.483]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.67it/s][A
Epoch 2:  95%|█████████▍| 918/967 [04:36<00:26,  1.87it/s, loss=0.343, v_num=1, val_loss=0.256, val_acc=0.853, train_loss=0.483]
Validating:  14%|█▍        | 8/57 [00:03<00:15,  3.08it/s][A
Epoch 2:  95%|█████████▌| 920/967 [04:37<00:25,  1.88it/s, loss=0.343, v

Epoch 2, global step 456: val_loss reached 0.23738 (best 0.23738), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/tf_efficientnet_b4_ns_adamw_oencycle_smoothing=0.05_weighted_t1=0.8-t2=1/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.237_val_acc=0.862_fold2.ckpt" as top 1


Epoch 2: 100%|██████████| 967/967 [04:51<00:00,  1.95it/s, loss=0.343, v_num=1, val_loss=0.237, val_acc=0.862, train_loss=0.348]
Epoch 3:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.335, v_num=1, val_loss=0.237, val_acc=0.862, train_loss=0.355]    
Validating: 0it [00:00, ?it/s][A
Epoch 3:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.335, v_num=1, val_loss=0.237, val_acc=0.862, train_loss=0.355]
Validating:   4%|▎         | 2/57 [00:02<00:49,  1.12it/s][A
Epoch 3:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.335, v_num=1, val_loss=0.237, val_acc=0.862, train_loss=0.355]
Validating:   7%|▋         | 4/57 [00:02<00:25,  2.07it/s][A
Epoch 3:  95%|█████████▍| 916/967 [04:35<00:27,  1.87it/s, loss=0.335, v_num=1, val_loss=0.237, val_acc=0.862, train_loss=0.355]
Validating:  11%|█         | 6/57 [00:03<00:20,  2.55it/s][A
Epoch 3:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.335, v_num=1, val_loss=0.237, val_acc=0.862, train_loss=0.355]
Va

Epoch 3, global step 684: val_loss reached 0.21263 (best 0.21263), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/tf_efficientnet_b4_ns_adamw_oencycle_smoothing=0.05_weighted_t1=0.8-t2=1/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.213_val_acc=0.885_fold2.ckpt" as top 1


Epoch 3: 100%|██████████| 967/967 [04:51<00:00,  1.95it/s, loss=0.335, v_num=1, val_loss=0.213, val_acc=0.885, train_loss=0.439]
Epoch 4:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.313, v_num=1, val_loss=0.213, val_acc=0.885, train_loss=0.229]    
Validating: 0it [00:00, ?it/s][A
Epoch 4:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.313, v_num=1, val_loss=0.213, val_acc=0.885, train_loss=0.229]
Validating:   4%|▎         | 2/57 [00:02<00:50,  1.08it/s][A
Epoch 4:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.313, v_num=1, val_loss=0.213, val_acc=0.885, train_loss=0.229]
Validating:   7%|▋         | 4/57 [00:02<00:26,  2.03it/s][A
Epoch 4:  95%|█████████▍| 916/967 [04:35<00:27,  1.87it/s, loss=0.313, v_num=1, val_loss=0.213, val_acc=0.885, train_loss=0.229]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.66it/s][A
Epoch 4:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.313, v_num=1, val_loss=0.213, val_acc=0.885, train_loss=0.229]
Va

Epoch 4, step 912: val_loss was not in top 1


Epoch 4: 100%|██████████| 967/967 [04:50<00:00,  1.96it/s, loss=0.313, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.712]  
Epoch 5:  94%|█████████▍| 910/967 [04:32<00:30,  1.88it/s, loss=0.262, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.35]     
Validating: 0it [00:00, ?it/s][A
Epoch 5:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.262, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.35]
Validating:   4%|▎         | 2/57 [00:02<00:50,  1.09it/s][A
Epoch 5:  95%|█████████▍| 914/967 [04:34<00:28,  1.87it/s, loss=0.262, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.35]
Validating:   7%|▋         | 4/57 [00:02<00:25,  2.04it/s][A
Epoch 5:  95%|█████████▍| 916/967 [04:35<00:27,  1.88it/s, loss=0.262, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.35]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.57it/s][A
Epoch 5:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.262, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.35]
Validating:  14%

Epoch 5, step 1140: val_loss was not in top 1


Epoch 5: 100%|██████████| 967/967 [04:50<00:00,  1.96it/s, loss=0.262, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.0552]
Epoch 6:  94%|█████████▍| 910/967 [04:32<00:30,  1.88it/s, loss=0.276, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.35]      
Validating: 0it [00:00, ?it/s][A
Epoch 6:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.276, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.35]
Validating:   4%|▎         | 2/57 [00:02<00:49,  1.12it/s][A
Epoch 6:  95%|█████████▍| 914/967 [04:34<00:28,  1.87it/s, loss=0.276, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.35]
Validating:   7%|▋         | 4/57 [00:02<00:26,  2.02it/s][A
Epoch 6:  95%|█████████▍| 916/967 [04:35<00:27,  1.88it/s, loss=0.276, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.35]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.61it/s][A
Epoch 6:  95%|█████████▍| 918/967 [04:35<00:26,  1.88it/s, loss=0.276, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.35]
Validating:  14%

Epoch 6, step 1368: val_loss was not in top 1


Epoch 6: 100%|██████████| 967/967 [04:50<00:00,  1.96it/s, loss=0.276, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.285]
Epoch 7:  94%|█████████▍| 910/967 [04:32<00:30,  1.88it/s, loss=0.269, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.174]     
Validating: 0it [00:00, ?it/s][A
Epoch 7:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.269, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.174]
Validating:   4%|▎         | 2/57 [00:02<00:53,  1.04it/s][A
Epoch 7:  95%|█████████▍| 914/967 [04:34<00:28,  1.87it/s, loss=0.269, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.174]
Validating:   7%|▋         | 4/57 [00:02<00:27,  1.90it/s][A
Epoch 7:  95%|█████████▍| 916/967 [04:35<00:27,  1.88it/s, loss=0.269, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.174]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.63it/s][A
Epoch 7:  95%|█████████▍| 918/967 [04:35<00:26,  1.88it/s, loss=0.269, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.174]
Validating:  

Epoch 7, step 1596: val_loss was not in top 1


Epoch 7: 100%|██████████| 967/967 [04:49<00:00,  1.96it/s, loss=0.269, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.331]
Epoch 8:  94%|█████████▍| 910/967 [04:32<00:30,  1.88it/s, loss=0.256, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.259]    
Validating: 0it [00:00, ?it/s][A
Epoch 8:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.256, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.259]
Validating:   4%|▎         | 2/57 [00:02<00:52,  1.05it/s][A
Epoch 8:  95%|█████████▍| 914/967 [04:34<00:28,  1.87it/s, loss=0.256, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.259]
Validating:   7%|▋         | 4/57 [00:02<00:26,  1.98it/s][A
Epoch 8:  95%|█████████▍| 916/967 [04:35<00:27,  1.88it/s, loss=0.256, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.259]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.66it/s][A
Epoch 8:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.256, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.259]
Validating:  1

Epoch 8, step 1824: val_loss was not in top 1


Epoch 8: 100%|██████████| 967/967 [04:50<00:00,  1.96it/s, loss=0.256, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.289]
Epoch 9:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.239, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.318]    
Validating: 0it [00:00, ?it/s][A
Epoch 9:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.239, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.318]
Validating:   4%|▎         | 2/57 [00:01<00:43,  1.27it/s][A
Epoch 9:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.239, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.318]
Validating:   7%|▋         | 4/57 [00:02<00:23,  2.26it/s][A
Epoch 9:  95%|█████████▍| 916/967 [04:35<00:27,  1.88it/s, loss=0.239, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.318]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.63it/s][A
Epoch 9:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.239, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.318]
Validating:  1

Epoch 9, step 2052: val_loss was not in top 1


Epoch 9: 100%|██████████| 967/967 [04:50<00:00,  1.96it/s, loss=0.239, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.0682]
Epoch 10:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.245, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.168]    
Validating: 0it [00:00, ?it/s][A
Epoch 10:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.245, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.168]
Validating:   4%|▎         | 2/57 [00:02<00:51,  1.06it/s][A
Epoch 10:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.245, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.168]
Validating:   7%|▋         | 4/57 [00:02<00:27,  1.91it/s][A
Epoch 10:  95%|█████████▍| 916/967 [04:36<00:27,  1.87it/s, loss=0.245, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.168]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.62it/s][A
Epoch 10:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.245, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.168]
Validati

Epoch 10, step 2280: val_loss was not in top 1


Epoch 10: 100%|██████████| 967/967 [04:50<00:00,  1.95it/s, loss=0.245, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.356]
Epoch 11:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.249, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.245]    
Validating: 0it [00:00, ?it/s][A
Epoch 11:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.249, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.245]
Validating:   4%|▎         | 2/57 [00:02<00:55,  1.01s/it][A
Epoch 11:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.249, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.245]
Validating:   7%|▋         | 4/57 [00:02<00:27,  1.91it/s][A
Epoch 11:  95%|█████████▍| 916/967 [04:35<00:27,  1.87it/s, loss=0.249, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.245]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.61it/s][A
Epoch 11:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.249, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.245]
Validati

Epoch 11, step 2508: val_loss was not in top 1


Epoch 11: 100%|██████████| 967/967 [04:50<00:00,  1.95it/s, loss=0.249, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.0584]
                                                           [A

GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.


Training fold 3
Class sample counts [ 759 1470 1622 8934 1765]
After class sample counts [2277 2940 3730 8934 4765]



  | Name           | Type           | Params
--------------------------------------------------
0 | valid_accuracy | Accuracy       | 0     
1 | test_accuracy  | Accuracy       | 0     
2 | criterion      | BiTemperedLoss | 0     
3 | model          | EfficientNet   | 17.6 M
--------------------------------------------------
17.4 M    Trainable params
125 K     Non-trainable params
17.6 M    Total params

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s][A
Finding best initial lr:   1%|          | 1/100 [00:00<00:33,  2.98it/s][A
Finding best initial lr:   2%|▏         | 2/100 [00:01<01:21,  1.21it/s][A
Finding best initial lr:   3%|▎         | 3/100 [00:02<01:35,  1.01it/s][A
Finding best initial lr:   4%|▍         | 4/100 [00:03<01:42,  1.06s/it][A
Finding best initial lr:   5%|▌         | 5/100 [00:05<01:44,  1.10s/it][A
Finding best initial lr:   6%|▌         | 6/100 [00:06<01:46,  1.13s/it][A
Finding best initial lr:   7%|▋         | 7/100 [00:07<01:46,  1.1

Epoch 11: 100%|██████████| 967/967 [05:09<00:00,  1.84it/s, loss=0.249, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.0584]


Finding best initial lr:  12%|█▏        | 12/100 [00:13<01:44,  1.18s/it][A
Finding best initial lr:  13%|█▎        | 13/100 [00:14<01:43,  1.19s/it][A
Finding best initial lr:  14%|█▍        | 14/100 [00:15<01:41,  1.19s/it][A
Finding best initial lr:  15%|█▌        | 15/100 [00:16<01:41,  1.19s/it][A
Finding best initial lr:  16%|█▌        | 16/100 [00:18<01:39,  1.19s/it][A
Finding best initial lr:  17%|█▋        | 17/100 [00:19<01:38,  1.19s/it][A
Finding best initial lr:  18%|█▊        | 18/100 [00:20<01:37,  1.18s/it][A
Finding best initial lr:  19%|█▉        | 19/100 [00:21<01:35,  1.18s/it][A
Finding best initial lr:  20%|██        | 20/100 [00:22<01:35,  1.19s/it][A
Finding best initial lr:  21%|██        | 21/100 [00:24<01:33,  1.19s/it][A
Finding best initial lr:  22%|██▏       | 22/100 [00:25<01:32,  1.19s/it][A
Finding best initial lr:  23%|██▎       | 23/100 [00:26<01:31,  1.19s/it][A
Finding best initial lr:  24%|██▍       | 24/100 [00:27<01:30,  1.19s/it][

Epoch 1:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.43, v_num=1, val_loss=1.03, val_acc=0.0781, train_loss=0.183]     
Validating: 0it [00:00, ?it/s][A
Epoch 1:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.43, v_num=1, val_loss=1.03, val_acc=0.0781, train_loss=0.183]
Validating:   4%|▎         | 2/57 [00:01<00:47,  1.16it/s][A
Epoch 1:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.43, v_num=1, val_loss=1.03, val_acc=0.0781, train_loss=0.183]
Validating:   7%|▋         | 4/57 [00:02<00:25,  2.08it/s][A
Epoch 1:  95%|█████████▍| 916/967 [04:35<00:27,  1.87it/s, loss=0.43, v_num=1, val_loss=1.03, val_acc=0.0781, train_loss=0.183]
Validating:  11%|█         | 6/57 [00:03<00:18,  2.70it/s][A
Epoch 1:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.43, v_num=1, val_loss=1.03, val_acc=0.0781, train_loss=0.183]
Validating:  14%|█▍        | 8/57 [00:03<00:15,  3.09it/s][A
Epoch 1:  95%|█████████▌| 920/967 [04:36<00:24,  1.88it/s, loss=0.43, v_n

Epoch 1, global step 228: val_loss reached 0.25679 (best 0.25679), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/tf_efficientnet_b4_ns_adamw_oencycle_smoothing=0.05_weighted_t1=0.8-t2=1/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.257_val_acc=0.849_fold3.ckpt" as top 1


Epoch 1: 100%|██████████| 967/967 [04:50<00:00,  1.95it/s, loss=0.43, v_num=1, val_loss=0.257, val_acc=0.849, train_loss=0.534]
Epoch 2:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.341, v_num=1, val_loss=0.257, val_acc=0.849, train_loss=0.287]    
Validating: 0it [00:00, ?it/s][A
Epoch 2:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.341, v_num=1, val_loss=0.257, val_acc=0.849, train_loss=0.287]
Validating:   4%|▎         | 2/57 [00:01<00:43,  1.26it/s][A
Epoch 2:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.341, v_num=1, val_loss=0.257, val_acc=0.849, train_loss=0.287]
Validating:   7%|▋         | 4/57 [00:02<00:23,  2.23it/s][A
Epoch 2:  95%|█████████▍| 916/967 [04:35<00:27,  1.88it/s, loss=0.341, v_num=1, val_loss=0.257, val_acc=0.849, train_loss=0.287]
Validating:  11%|█         | 6/57 [00:02<00:18,  2.76it/s][A
Epoch 2:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.341, v_num=1, val_loss=0.257, val_acc=0.849, train_loss=0.287]
Val

Epoch 2, global step 456: val_loss reached 0.24772 (best 0.24772), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/tf_efficientnet_b4_ns_adamw_oencycle_smoothing=0.05_weighted_t1=0.8-t2=1/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.248_val_acc=0.856_fold3.ckpt" as top 1


Epoch 2: 100%|██████████| 967/967 [04:50<00:00,  1.95it/s, loss=0.341, v_num=1, val_loss=0.248, val_acc=0.856, train_loss=0.588]
Epoch 11: 100%|██████████| 967/967 [20:51<00:00,  2.20s/it, loss=0.249, v_num=1, val_loss=nan, val_acc=0.052, train_loss=0.0584]     

Finding best initial lr: 100%|██████████| 100/100 [15:54<00:00,  9.55s/it]


Epoch 3:  87%|████████▋ | 840/967 [04:11<01:12,  1.75it/s, loss=0.298, v_num=1, val_loss=0.248, val_acc=0.856, train_loss=0.363]




Epoch 3:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.299, v_num=1, val_loss=0.248, val_acc=0.856, train_loss=0.18]  
Validating: 0it [00:00, ?it/s][A
Epoch 3:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.299, v_num=1, val_loss=0.248, val_acc=0.856, train_loss=0.18]
Validating:   4%|▎         | 2/57 [00:02<00:49,  1.10it/s][A
Epoch 3:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.299, v_num=1, val_loss=0.248, val_acc=0.856, train_loss=0.18]
Validating:   7%|▋         | 4/57 [00:02<00:27,  1.96it/s][A
Epoch 3:  95%|█████████▍| 916/967 [04:35<00:27,  1.88it/s, loss=0.299, v_num=1, val_loss=0.248, val_acc=0.856, train_loss=0.18]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.60it/s][A
Epoch 3:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.299, v_num=1, val_loss=0.248, val_acc=0.856, train_loss=0.18]
Validating:  14%|█▍        | 8/57 [00:03<00:16,  3.06it/s][A
Epoch 3:  95%|█████████▌| 920/967 [04:36<00:24,  1.88it/s, loss=0.299, v_num

Epoch 3, global step 684: val_loss reached 0.24401 (best 0.24401), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/tf_efficientnet_b4_ns_adamw_oencycle_smoothing=0.05_weighted_t1=0.8-t2=1/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.244_val_acc=0.860_fold3.ckpt" as top 1


Epoch 3: 100%|██████████| 967/967 [04:50<00:00,  1.95it/s, loss=0.299, v_num=1, val_loss=0.244, val_acc=0.86, train_loss=0.155]
Epoch 4:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.281, v_num=1, val_loss=0.244, val_acc=0.86, train_loss=0.0661]   
Validating: 0it [00:00, ?it/s][A
Epoch 4:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.281, v_num=1, val_loss=0.244, val_acc=0.86, train_loss=0.0661]
Validating:   4%|▎         | 2/57 [00:02<00:50,  1.10it/s][A
Epoch 4:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.281, v_num=1, val_loss=0.244, val_acc=0.86, train_loss=0.0661]
Validating:   7%|▋         | 4/57 [00:02<00:25,  2.05it/s][A
Epoch 4:  95%|█████████▍| 916/967 [04:35<00:27,  1.88it/s, loss=0.281, v_num=1, val_loss=0.244, val_acc=0.86, train_loss=0.0661]
Validating:  11%|█         | 6/57 [00:03<00:18,  2.69it/s][A
Epoch 4:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.281, v_num=1, val_loss=0.244, val_acc=0.86, train_loss=0.0661]
Vali

Epoch 4, global step 912: val_loss reached 0.23698 (best 0.23698), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/tf_efficientnet_b4_ns_adamw_oencycle_smoothing=0.05_weighted_t1=0.8-t2=1/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.237_val_acc=0.867_fold3.ckpt" as top 1


Epoch 4: 100%|██████████| 967/967 [04:50<00:00,  1.95it/s, loss=0.281, v_num=1, val_loss=0.237, val_acc=0.867, train_loss=0.541]
Epoch 5:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.284, v_num=1, val_loss=0.237, val_acc=0.867, train_loss=0.516]     
Validating: 0it [00:00, ?it/s][A
Epoch 5:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.284, v_num=1, val_loss=0.237, val_acc=0.867, train_loss=0.516]
Validating:   4%|▎         | 2/57 [00:01<00:45,  1.20it/s][A
Epoch 5:  95%|█████████▍| 914/967 [04:34<00:28,  1.87it/s, loss=0.284, v_num=1, val_loss=0.237, val_acc=0.867, train_loss=0.516]
Validating:   7%|▋         | 4/57 [00:02<00:24,  2.17it/s][A
Epoch 5:  95%|█████████▍| 916/967 [04:35<00:27,  1.88it/s, loss=0.284, v_num=1, val_loss=0.237, val_acc=0.867, train_loss=0.516]
Validating:  11%|█         | 6/57 [00:03<00:18,  2.77it/s][A
Epoch 5:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.284, v_num=1, val_loss=0.237, val_acc=0.867, train_loss=0.516]
V

Epoch 5, step 1140: val_loss was not in top 1


Epoch 5: 100%|██████████| 967/967 [04:50<00:00,  1.96it/s, loss=0.284, v_num=1, val_loss=0.249, val_acc=0.863, train_loss=0.582]
Epoch 6:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.277, v_num=1, val_loss=0.249, val_acc=0.863, train_loss=0.26]      
Validating: 0it [00:00, ?it/s][A
Epoch 6:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.277, v_num=1, val_loss=0.249, val_acc=0.863, train_loss=0.26]
Validating:   4%|▎         | 2/57 [00:01<00:47,  1.15it/s][A
Epoch 6:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.277, v_num=1, val_loss=0.249, val_acc=0.863, train_loss=0.26]
Validating:   7%|▋         | 4/57 [00:02<00:24,  2.12it/s][A
Epoch 6:  95%|█████████▍| 916/967 [04:35<00:27,  1.88it/s, loss=0.277, v_num=1, val_loss=0.249, val_acc=0.863, train_loss=0.26]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.61it/s][A
Epoch 6:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.277, v_num=1, val_loss=0.249, val_acc=0.863, train_loss=0.26]
Valid

Epoch 6, step 1368: val_loss was not in top 1


Epoch 6: 100%|██████████| 967/967 [04:50<00:00,  1.96it/s, loss=0.277, v_num=1, val_loss=0.248, val_acc=0.861, train_loss=0.287]
Epoch 7:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.24, v_num=1, val_loss=0.248, val_acc=0.861, train_loss=0.354]     
Validating: 0it [00:00, ?it/s][A
Epoch 7:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.24, v_num=1, val_loss=0.248, val_acc=0.861, train_loss=0.354]
Validating:   4%|▎         | 2/57 [00:01<00:46,  1.17it/s][A
Epoch 7:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.24, v_num=1, val_loss=0.248, val_acc=0.861, train_loss=0.354]
Validating:   7%|▋         | 4/57 [00:02<00:24,  2.14it/s][A
Epoch 7:  95%|█████████▍| 916/967 [04:35<00:27,  1.87it/s, loss=0.24, v_num=1, val_loss=0.248, val_acc=0.861, train_loss=0.354]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.66it/s][A
Epoch 7:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.24, v_num=1, val_loss=0.248, val_acc=0.861, train_loss=0.354]
Valida

Epoch 7, step 1596: val_loss was not in top 1


Epoch 7: 100%|██████████| 967/967 [04:50<00:00,  1.95it/s, loss=0.24, v_num=1, val_loss=0.254, val_acc=0.858, train_loss=0.235]
Epoch 8:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.242, v_num=1, val_loss=0.254, val_acc=0.858, train_loss=0.225]    
Validating: 0it [00:00, ?it/s][A
Epoch 8:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.242, v_num=1, val_loss=0.254, val_acc=0.858, train_loss=0.225]
Validating:   4%|▎         | 2/57 [00:02<00:48,  1.14it/s][A
Epoch 8:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.242, v_num=1, val_loss=0.254, val_acc=0.858, train_loss=0.225]
Validating:   7%|▋         | 4/57 [00:02<00:25,  2.09it/s][A
Epoch 8:  95%|█████████▍| 916/967 [04:35<00:27,  1.87it/s, loss=0.242, v_num=1, val_loss=0.254, val_acc=0.858, train_loss=0.225]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.61it/s][A
Epoch 8:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.242, v_num=1, val_loss=0.254, val_acc=0.858, train_loss=0.225]
Val

Epoch 8, step 1824: val_loss was not in top 1


Epoch 8: 100%|██████████| 967/967 [04:50<00:00,  1.96it/s, loss=0.242, v_num=1, val_loss=0.279, val_acc=0.849, train_loss=0.343]
Epoch 9:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.233, v_num=1, val_loss=0.279, val_acc=0.849, train_loss=0.0291]    
Validating: 0it [00:00, ?it/s][A
Epoch 9:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.233, v_num=1, val_loss=0.279, val_acc=0.849, train_loss=0.0291]
Validating:   4%|▎         | 2/57 [00:01<00:46,  1.18it/s][A
Epoch 9:  95%|█████████▍| 914/967 [04:34<00:28,  1.87it/s, loss=0.233, v_num=1, val_loss=0.279, val_acc=0.849, train_loss=0.0291]
Validating:   7%|▋         | 4/57 [00:02<00:24,  2.16it/s][A
Epoch 9:  95%|█████████▍| 916/967 [04:35<00:27,  1.88it/s, loss=0.233, v_num=1, val_loss=0.279, val_acc=0.849, train_loss=0.0291]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.64it/s][A
Epoch 9:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.233, v_num=1, val_loss=0.279, val_acc=0.849, train_loss=0.029

Epoch 9, step 2052: val_loss was not in top 1


Epoch 9: 100%|██████████| 967/967 [04:50<00:00,  1.96it/s, loss=0.233, v_num=1, val_loss=0.286, val_acc=0.845, train_loss=0.185] 
Epoch 10:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.26, v_num=1, val_loss=0.286, val_acc=0.845, train_loss=0.168]     
Validating: 0it [00:00, ?it/s][A
Epoch 10:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.26, v_num=1, val_loss=0.286, val_acc=0.845, train_loss=0.168]
Validating:   4%|▎         | 2/57 [00:02<00:48,  1.13it/s][A
Epoch 10:  95%|█████████▍| 914/967 [04:34<00:28,  1.87it/s, loss=0.26, v_num=1, val_loss=0.286, val_acc=0.845, train_loss=0.168]
Validating:   7%|▋         | 4/57 [00:02<00:25,  2.09it/s][A
Epoch 10:  95%|█████████▍| 916/967 [04:35<00:27,  1.88it/s, loss=0.26, v_num=1, val_loss=0.286, val_acc=0.845, train_loss=0.168]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.62it/s][A
Epoch 10:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.26, v_num=1, val_loss=0.286, val_acc=0.845, train_loss=0.168]


Epoch 10, step 2280: val_loss was not in top 1


Epoch 10: 100%|██████████| 967/967 [04:50<00:00,  1.96it/s, loss=0.26, v_num=1, val_loss=0.272, val_acc=0.85, train_loss=0.471] 
Epoch 11:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.268, v_num=1, val_loss=0.272, val_acc=0.85, train_loss=0.247]    
Validating: 0it [00:00, ?it/s][A
Epoch 11:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.268, v_num=1, val_loss=0.272, val_acc=0.85, train_loss=0.247]
Validating:   4%|▎         | 2/57 [00:02<00:55,  1.01s/it][A
Epoch 11:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.268, v_num=1, val_loss=0.272, val_acc=0.85, train_loss=0.247]
Validating:   7%|▋         | 4/57 [00:02<00:27,  1.91it/s][A
Epoch 11:  95%|█████████▍| 916/967 [04:36<00:27,  1.87it/s, loss=0.268, v_num=1, val_loss=0.272, val_acc=0.85, train_loss=0.247]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.60it/s][A
Epoch 11:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.268, v_num=1, val_loss=0.272, val_acc=0.85, train_loss=0.247]
Va

Epoch 11, step 2508: val_loss was not in top 1


Epoch 11: 100%|██████████| 967/967 [04:50<00:00,  1.95it/s, loss=0.268, v_num=1, val_loss=0.247, val_acc=0.863, train_loss=0.551]
Epoch 12:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.256, v_num=1, val_loss=0.247, val_acc=0.863, train_loss=0.246]    
Validating: 0it [00:00, ?it/s][A
Epoch 12:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.256, v_num=1, val_loss=0.247, val_acc=0.863, train_loss=0.246]
Validating:   4%|▎         | 2/57 [00:02<00:52,  1.04it/s][A
Epoch 12:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.256, v_num=1, val_loss=0.247, val_acc=0.863, train_loss=0.246]
Validating:   7%|▋         | 4/57 [00:02<00:26,  1.97it/s][A
Epoch 12:  95%|█████████▍| 916/967 [04:35<00:27,  1.87it/s, loss=0.256, v_num=1, val_loss=0.247, val_acc=0.863, train_loss=0.246]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.65it/s][A
Epoch 12:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.256, v_num=1, val_loss=0.247, val_acc=0.863, train_loss=0.2

Epoch 12, step 2736: val_loss was not in top 1


Epoch 12: 100%|██████████| 967/967 [04:50<00:00,  1.96it/s, loss=0.256, v_num=1, val_loss=0.248, val_acc=0.864, train_loss=0.297]
                                                           [A

GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.


Training fold 4
Class sample counts [ 758 1470 1623 8934 1765]
After class sample counts [2274 2940 3732 8934 4765]



  | Name           | Type           | Params
--------------------------------------------------
0 | valid_accuracy | Accuracy       | 0     
1 | test_accuracy  | Accuracy       | 0     
2 | criterion      | BiTemperedLoss | 0     
3 | model          | EfficientNet   | 17.6 M
--------------------------------------------------
17.4 M    Trainable params
125 K     Non-trainable params
17.6 M    Total params

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s][A
Finding best initial lr:   1%|          | 1/100 [00:00<00:32,  3.07it/s][A
Finding best initial lr:   2%|▏         | 2/100 [00:01<01:21,  1.21it/s][A
Finding best initial lr:   3%|▎         | 3/100 [00:02<01:35,  1.01it/s][A
Finding best initial lr:   4%|▍         | 4/100 [00:03<01:42,  1.06s/it][A
Finding best initial lr:   5%|▌         | 5/100 [00:05<01:45,  1.11s/it][A
Finding best initial lr:   6%|▌         | 6/100 [00:06<01:46,  1.13s/it][A

Epoch 12: 100%|██████████| 967/967 [05:03<00:00,  1.87it/s, loss=0.256, v_num=1, val_loss=0.248, val_acc=0.864, train_loss=0.297]


Finding best initial lr:   7%|▋         | 7/100 [00:07<01:46,  1.15s/it][A
Finding best initial lr:   8%|▊         | 8/100 [00:08<01:46,  1.15s/it][A
Finding best initial lr:   9%|▉         | 9/100 [00:09<01:45,  1.16s/it][A
Finding best initial lr:  10%|█         | 10/100 [00:10<01:45,  1.17s/it][A
Finding best initial lr:  11%|█         | 11/100 [00:12<01:45,  1.18s/it][A
Finding best initial lr:  12%|█▏        | 12/100 [00:13<01:43,  1.18s/it][A
Finding best initial lr:  13%|█▎        | 13/100 [00:14<01:42,  1.18s/it][A
Finding best initial lr:  14%|█▍        | 14/100 [00:15<01:41,  1.18s/it][A
Finding best initial lr:  15%|█▌        | 15/100 [00:16<01:40,  1.18s/it][A
Finding best initial lr:  16%|█▌        | 16/100 [00:18<01:39,  1.18s/it][A
Finding best initial lr:  17%|█▋        | 17/100 [00:19<01:37,  1.18s/it][A
Finding best initial lr:  18%|█▊        | 18/100 [00:20<01:36,  1.18s/it][A
Finding best initial lr:  19%|█▉        | 19/100 [00:21<01:35,  1.18s/it][A
F

Epoch 1:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.386, v_num=1, val_loss=1.01, val_acc=0.0781, train_loss=0.343]    
Validating: 0it [00:00, ?it/s][A
Epoch 1:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.386, v_num=1, val_loss=1.01, val_acc=0.0781, train_loss=0.343]
Validating:   4%|▎         | 2/57 [00:02<00:49,  1.11it/s][A
Epoch 1:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.386, v_num=1, val_loss=1.01, val_acc=0.0781, train_loss=0.343]
Validating:   7%|▋         | 4/57 [00:02<00:26,  2.03it/s][A
Epoch 1:  95%|█████████▍| 916/967 [04:35<00:27,  1.87it/s, loss=0.386, v_num=1, val_loss=1.01, val_acc=0.0781, train_loss=0.343]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.66it/s][A
Epoch 1:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.386, v_num=1, val_loss=1.01, val_acc=0.0781, train_loss=0.343]
Validating:  14%|█▍        | 8/57 [00:03<00:15,  3.09it/s][A
Epoch 1:  95%|█████████▌| 920/967 [04:37<00:24,  1.88it/s, loss=0.386

Epoch 1, global step 228: val_loss reached 0.27668 (best 0.27668), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/tf_efficientnet_b4_ns_adamw_oencycle_smoothing=0.05_weighted_t1=0.8-t2=1/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.277_val_acc=0.841_fold4.ckpt" as top 1


Epoch 1: 100%|██████████| 967/967 [04:50<00:00,  1.95it/s, loss=0.386, v_num=1, val_loss=0.277, val_acc=0.841, train_loss=0.343]
Epoch 2:  94%|█████████▍| 910/967 [04:33<00:30,  1.87it/s, loss=0.339, v_num=1, val_loss=0.277, val_acc=0.841, train_loss=0.347]    
Validating: 0it [00:00, ?it/s][A
Epoch 2:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.339, v_num=1, val_loss=0.277, val_acc=0.841, train_loss=0.347]
Validating:   4%|▎         | 2/57 [00:02<00:52,  1.05it/s][A
Epoch 2:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.339, v_num=1, val_loss=0.277, val_acc=0.841, train_loss=0.347]
Validating:   7%|▋         | 4/57 [00:02<00:26,  1.97it/s][A
Epoch 2:  95%|█████████▍| 916/967 [04:36<00:27,  1.87it/s, loss=0.339, v_num=1, val_loss=0.277, val_acc=0.841, train_loss=0.347]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.64it/s][A
Epoch 2:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.339, v_num=1, val_loss=0.277, val_acc=0.841, train_loss=0.347]
Va

Epoch 2, step 456: val_loss was not in top 1


Epoch 2: 100%|██████████| 967/967 [04:50<00:00,  1.95it/s, loss=0.339, v_num=1, val_loss=nan, val_acc=0.854, train_loss=0.357]  
Epoch 3:  94%|█████████▍| 910/967 [04:32<00:30,  1.88it/s, loss=0.336, v_num=1, val_loss=nan, val_acc=0.854, train_loss=0.574]    
Validating: 0it [00:00, ?it/s][A
Epoch 3:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.336, v_num=1, val_loss=nan, val_acc=0.854, train_loss=0.574]
Validating:   4%|▎         | 2/57 [00:02<00:50,  1.08it/s][A
Epoch 3:  95%|█████████▍| 914/967 [04:34<00:28,  1.87it/s, loss=0.336, v_num=1, val_loss=nan, val_acc=0.854, train_loss=0.574]
Validating:   7%|▋         | 4/57 [00:02<00:26,  2.03it/s][A
Epoch 3:  95%|█████████▍| 916/967 [04:35<00:27,  1.88it/s, loss=0.336, v_num=1, val_loss=nan, val_acc=0.854, train_loss=0.574]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.67it/s][A
Epoch 3:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.336, v_num=1, val_loss=nan, val_acc=0.854, train_loss=0.574]
Validating: 

Epoch 3, global step 684: val_loss reached 0.22180 (best 0.22180), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/tf_efficientnet_b4_ns_adamw_oencycle_smoothing=0.05_weighted_t1=0.8-t2=1/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.222_val_acc=0.875_fold4.ckpt" as top 1


Epoch 3: 100%|██████████| 967/967 [04:50<00:00,  1.96it/s, loss=0.336, v_num=1, val_loss=0.222, val_acc=0.875, train_loss=0.845]
Epoch 4:  94%|█████████▍| 910/967 [04:32<00:30,  1.88it/s, loss=0.317, v_num=1, val_loss=0.222, val_acc=0.875, train_loss=0.23]      
Validating: 0it [00:00, ?it/s][A
Epoch 4:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.317, v_num=1, val_loss=0.222, val_acc=0.875, train_loss=0.23]
Validating:   4%|▎         | 2/57 [00:02<00:49,  1.10it/s][A
Epoch 4:  95%|█████████▍| 914/967 [04:34<00:28,  1.87it/s, loss=0.317, v_num=1, val_loss=0.222, val_acc=0.875, train_loss=0.23]
Validating:   7%|▋         | 4/57 [00:02<00:25,  2.05it/s][A
Epoch 4:  95%|█████████▍| 916/967 [04:35<00:27,  1.88it/s, loss=0.317, v_num=1, val_loss=0.222, val_acc=0.875, train_loss=0.23]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.65it/s][A
Epoch 4:  95%|█████████▍| 918/967 [04:35<00:26,  1.88it/s, loss=0.317, v_num=1, val_loss=0.222, val_acc=0.875, train_loss=0.23]
Valid

Epoch 4, global step 912: val_loss reached 0.21580 (best 0.21580), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/tf_efficientnet_b4_ns_adamw_oencycle_smoothing=0.05_weighted_t1=0.8-t2=1/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.216_val_acc=0.878_fold4.ckpt" as top 1


Epoch 12: 100%|██████████| 967/967 [26:20<00:00,  2.78s/it, loss=0.256, v_num=1, val_loss=0.248, val_acc=0.864, train_loss=0.297]

Finding best initial lr: 100%|██████████| 100/100 [21:24<00:00, 12.84s/it]







Epoch 4: 100%|██████████| 967/967 [04:50<00:00,  1.95it/s, loss=0.317, v_num=1, val_loss=0.216, val_acc=0.878, train_loss=0.872]
Epoch 5:  94%|█████████▍| 910/967 [04:33<00:30,  1.87it/s, loss=0.261, v_num=1, val_loss=0.216, val_acc=0.878, train_loss=0.537]     
Validating: 0it [00:00, ?it/s][A
Epoch 5:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.261, v_num=1, val_loss=0.216, val_acc=0.878, train_loss=0.537]
Validating:   4%|▎         | 2/57 [00:01<00:44,  1.24it/s][A
Epoch 5:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.261, v_num=1, val_loss=0.216, val_acc=0.878, train_loss=0.537]
Validating:   7%|▋         | 4/57 [00:02<00:23,  2.22it/s][A
Epoch 5:  95%|█████████▍| 916/967 [04:35<00:27,  1.87it/s, loss=0.261, v_num=1, val_loss=0.216, val_acc=0.878, train_loss=0.537]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.58it/s][A
Epoch 5:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.261, v_num=1, val_loss=0.216, val_acc=0.878, train_loss=0.537]
V

Epoch 5, step 1140: val_loss was not in top 1


Epoch 5: 100%|██████████| 967/967 [04:50<00:00,  1.95it/s, loss=0.261, v_num=1, val_loss=0.228, val_acc=0.873, train_loss=0.0738]
Epoch 6:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.266, v_num=1, val_loss=0.228, val_acc=0.873, train_loss=0.233]     
Validating: 0it [00:00, ?it/s][A
Epoch 6:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.266, v_num=1, val_loss=0.228, val_acc=0.873, train_loss=0.233]
Validating:   4%|▎         | 2/57 [00:02<00:52,  1.04it/s][A
Epoch 6:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.266, v_num=1, val_loss=0.228, val_acc=0.873, train_loss=0.233]
Validating:   7%|▋         | 4/57 [00:02<00:27,  1.92it/s][A
Epoch 6:  95%|█████████▍| 916/967 [04:35<00:27,  1.87it/s, loss=0.266, v_num=1, val_loss=0.228, val_acc=0.873, train_loss=0.233]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.62it/s][A
Epoch 6:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.266, v_num=1, val_loss=0.228, val_acc=0.873, train_loss=0.233]


Epoch 6, step 1368: val_loss was not in top 1


Epoch 6: 100%|██████████| 967/967 [04:50<00:00,  1.95it/s, loss=0.266, v_num=1, val_loss=0.243, val_acc=0.861, train_loss=0.431]
Epoch 7:  94%|█████████▍| 910/967 [04:33<00:30,  1.87it/s, loss=0.257, v_num=1, val_loss=0.243, val_acc=0.861, train_loss=0.188]    
Validating: 0it [00:00, ?it/s][A
Epoch 7:  94%|█████████▍| 912/967 [04:35<00:29,  1.87it/s, loss=0.257, v_num=1, val_loss=0.243, val_acc=0.861, train_loss=0.188]
Validating:   4%|▎         | 2/57 [00:02<00:51,  1.07it/s][A
Epoch 7:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.257, v_num=1, val_loss=0.243, val_acc=0.861, train_loss=0.188]
Validating:   7%|▋         | 4/57 [00:02<00:26,  2.00it/s][A
Epoch 7:  95%|█████████▍| 916/967 [04:36<00:27,  1.87it/s, loss=0.257, v_num=1, val_loss=0.243, val_acc=0.861, train_loss=0.188]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.65it/s][A
Epoch 7:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.257, v_num=1, val_loss=0.243, val_acc=0.861, train_loss=0.188]
Va

Epoch 7, step 1596: val_loss was not in top 1


Epoch 7: 100%|██████████| 967/967 [04:50<00:00,  1.95it/s, loss=0.257, v_num=1, val_loss=0.235, val_acc=0.872, train_loss=0.495]
Epoch 8:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.275, v_num=1, val_loss=0.235, val_acc=0.872, train_loss=0.263]     
Validating: 0it [00:00, ?it/s][A
Epoch 8:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.275, v_num=1, val_loss=0.235, val_acc=0.872, train_loss=0.263]
Validating:   4%|▎         | 2/57 [00:01<00:47,  1.16it/s][A
Epoch 8:  95%|█████████▍| 914/967 [04:34<00:28,  1.87it/s, loss=0.275, v_num=1, val_loss=0.235, val_acc=0.872, train_loss=0.263]
Validating:   7%|▋         | 4/57 [00:02<00:24,  2.13it/s][A
Epoch 8:  95%|█████████▍| 916/967 [04:35<00:27,  1.88it/s, loss=0.275, v_num=1, val_loss=0.235, val_acc=0.872, train_loss=0.263]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.68it/s][A
Epoch 8:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.275, v_num=1, val_loss=0.235, val_acc=0.872, train_loss=0.263]
V

Epoch 8, step 1824: val_loss was not in top 1


Epoch 8: 100%|██████████| 967/967 [04:50<00:00,  1.96it/s, loss=0.275, v_num=1, val_loss=0.246, val_acc=0.859, train_loss=0.275]
Epoch 9:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.246, v_num=1, val_loss=0.246, val_acc=0.859, train_loss=0.397]     
Validating: 0it [00:00, ?it/s][A
Epoch 9:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.246, v_num=1, val_loss=0.246, val_acc=0.859, train_loss=0.397]
Validating:   4%|▎         | 2/57 [00:02<00:53,  1.02it/s][A
Epoch 9:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.246, v_num=1, val_loss=0.246, val_acc=0.859, train_loss=0.397]
Validating:   7%|▋         | 4/57 [00:02<00:27,  1.91it/s][A
Epoch 9:  95%|█████████▍| 916/967 [04:36<00:27,  1.87it/s, loss=0.246, v_num=1, val_loss=0.246, val_acc=0.859, train_loss=0.397]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.61it/s][A
Epoch 9:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.246, v_num=1, val_loss=0.246, val_acc=0.859, train_loss=0.397]
V

Epoch 9, step 2052: val_loss was not in top 1


Epoch 9: 100%|██████████| 967/967 [04:50<00:00,  1.95it/s, loss=0.246, v_num=1, val_loss=0.226, val_acc=0.872, train_loss=0.245]
Epoch 10:  94%|█████████▍| 910/967 [04:33<00:30,  1.87it/s, loss=0.258, v_num=1, val_loss=0.226, val_acc=0.872, train_loss=0.467]    
Validating: 0it [00:00, ?it/s][A
Epoch 10:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.258, v_num=1, val_loss=0.226, val_acc=0.872, train_loss=0.467]
Validating:   4%|▎         | 2/57 [00:01<00:48,  1.13it/s][A
Epoch 10:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.258, v_num=1, val_loss=0.226, val_acc=0.872, train_loss=0.467]
Validating:   7%|▋         | 4/57 [00:02<00:25,  2.09it/s][A
Epoch 10:  95%|█████████▍| 916/967 [04:35<00:27,  1.87it/s, loss=0.258, v_num=1, val_loss=0.226, val_acc=0.872, train_loss=0.467]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.68it/s][A
Epoch 10:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.258, v_num=1, val_loss=0.226, val_acc=0.872, train_loss=0.46

Epoch 10, step 2280: val_loss was not in top 1


Epoch 10: 100%|██████████| 967/967 [04:50<00:00,  1.95it/s, loss=0.258, v_num=1, val_loss=0.282, val_acc=0.844, train_loss=0.558]
Epoch 11:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.269, v_num=1, val_loss=0.282, val_acc=0.844, train_loss=0.176]     
Validating: 0it [00:00, ?it/s][A
Epoch 11:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.269, v_num=1, val_loss=0.282, val_acc=0.844, train_loss=0.176]
Validating:   4%|▎         | 2/57 [00:02<00:49,  1.11it/s][A
Epoch 11:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.269, v_num=1, val_loss=0.282, val_acc=0.844, train_loss=0.176]
Validating:   7%|▋         | 4/57 [00:02<00:26,  2.03it/s][A
Epoch 11:  95%|█████████▍| 916/967 [04:35<00:27,  1.87it/s, loss=0.269, v_num=1, val_loss=0.282, val_acc=0.844, train_loss=0.176]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.65it/s][A
Epoch 11:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.269, v_num=1, val_loss=0.282, val_acc=0.844, train_loss=0.

Epoch 11, step 2508: val_loss was not in top 1


Epoch 11: 100%|██████████| 967/967 [04:50<00:00,  1.95it/s, loss=0.269, v_num=1, val_loss=0.257, val_acc=0.857, train_loss=0.681]
Epoch 12:  94%|█████████▍| 910/967 [04:32<00:30,  1.87it/s, loss=0.243, v_num=1, val_loss=0.257, val_acc=0.857, train_loss=0.166]    
Validating: 0it [00:00, ?it/s][A
Epoch 12:  94%|█████████▍| 912/967 [04:34<00:29,  1.87it/s, loss=0.243, v_num=1, val_loss=0.257, val_acc=0.857, train_loss=0.166]
Validating:   4%|▎         | 2/57 [00:02<00:54,  1.01it/s][A
Epoch 12:  95%|█████████▍| 914/967 [04:35<00:28,  1.87it/s, loss=0.243, v_num=1, val_loss=0.257, val_acc=0.857, train_loss=0.166]
Validating:   7%|▋         | 4/57 [00:02<00:27,  1.93it/s][A
Epoch 12:  95%|█████████▍| 916/967 [04:36<00:27,  1.87it/s, loss=0.243, v_num=1, val_loss=0.257, val_acc=0.857, train_loss=0.166]
Validating:  11%|█         | 6/57 [00:03<00:19,  2.64it/s][A
Epoch 12:  95%|█████████▍| 918/967 [04:36<00:26,  1.88it/s, loss=0.243, v_num=1, val_loss=0.257, val_acc=0.857, train_loss=0.1

Epoch 12, step 2736: val_loss was not in top 1


Epoch 12: 100%|██████████| 967/967 [04:50<00:00,  1.95it/s, loss=0.243, v_num=1, val_loss=0.321, val_acc=0.826, train_loss=0.0658]
Epoch 12: 100%|██████████| 967/967 [05:07<00:00,  1.85it/s, loss=0.243, v_num=1, val_loss=0.321, val_acc=0.826, train_loss=0.0658]