In [1]:
import argparse
import errno
import glob
import json
import os
import re
import shutil
from types import SimpleNamespace
import cv2
import torch
import warnings
from lightning_objects import LightningModel
warnings.filterwarnings('ignore')
from config import Configuration
import pandas as pd
from utils import stratify_split, make_holdout_df, set_seeds
from train_manager import TrainManager

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
def main(experiment_name: str, debug, resume=False,
         finetune=False, freeze_bn=True, freeze_feature_extractor=False,
         data_csv='/train.csv'):

    experiment_dir = os.path.abspath(f'trained-models/{experiment_name}')
    print('Experiment directory', experiment_dir)

    try:
        # -------- SETUP --------
        checkpoint_params = None
        finetune_model_fnames = None
        folds_df, holdout_df = None, None

        if not resume and not finetune: # totally new experiment
            make_experiment_directory(experiment_dir)
            config = Configuration()
            config.debug = debug
            set_seeds(config.seed)

            # -------- LOAD DATA FROM TRAIN FILE --------
            data_df = pd.read_csv(config.data_dir + data_csv, engine='python')
            data_df, holdout_df = make_holdout_df(data_df, seed=config.seed)
            folds_df = stratify_split(data_df, config.fold_num, config.seed, config.target_col)

            # -------- SAVE FILES (experiment state: things like resuming, fine tuning, and inference on holdout) --------
            folds_df.to_csv(experiment_dir + '/folds.csv', index=False)
            holdout_df.to_csv(experiment_dir + '/holdout.csv', index=False)
            with open(experiment_dir + '/experiment_config.json', 'w') as f:
                json.dump(config.__dict__, f)
        elif resume or finetune:
            # LOAD DATA FROM SAVED FILES
            with open(experiment_dir + '/experiment_config.json', 'r') as f:
                config = json.load(f, object_hook=lambda d: SimpleNamespace(**d))
                set_seeds(config.seed)
                config.debug = debug
            
            holdout_df = pd.read_csv(experiment_dir + '/holdout.csv', engine='python')
            if data_csv != '/train.csv': # create new folds from new data file
                data_df = pd.read_csv(config.data_dir + data_csv, engine='python') 
                data_df, _ = make_holdout_df(data_df, seed=config.seed) # use same previous holdout with noise
                folds_df = stratify_split(data_df, config.fold_num, config.seed, config.target_col)
            else: # use existing folds
                folds_df = pd.read_csv(experiment_dir + '/folds.csv', engine='python')

            if finetune and not resume:
                print('finetuning...')
                # verify there are checkpoints to fine tune
                finetune_model_fnames = glob.glob(experiment_dir + '/*fold*.ckpt')
                assert len(finetune_model_fnames) > 0
                finetune_model_fnames.sort()

                # make new directory for tuning experiment with files from training run 1
                make_experiment_directory(experiment_dir + '_tune')
                for f in os.listdir(experiment_dir):
                    print(f"copying {f} to {experiment_dir + '_tune'}")
                    shutil.copy2(experiment_dir + '/' + f, experiment_dir + '_tune')
                experiment_dir += '_tune'
                experiment_name += '_tune'
    
                # overwrite folds from old experiment
                folds_df.to_csv(experiment_dir + '/folds.csv', index=False) 
                
            else:
                print('resuming from last checkpoint...')
                checkpoint_params = get_checkpoint_params(experiment_dir, resume)

        assert holdout_df is not None, 'holdout_df is None'
        assert folds_df is not None, 'folds_df is None'

        # cv2 multithreading seems to go into deadlock with PyTorch data loaders
        if config.num_workers > 0:
            cv2.setNumThreads(0)

        config.lr = 0.00003
        config.lr_test = True
        config.train_bs = 8

        trainer = TrainManager(experiment_name=experiment_name, experiment_dir=experiment_dir,
                               folds_df=folds_df, holdout_df=holdout_df,
                               checkpoint_params=checkpoint_params, config=config,
                               finetune=finetune, freeze_bn=freeze_bn,
                               freeze_feature_extractor=freeze_feature_extractor,
                               finetune_model_fnames=finetune_model_fnames)
        trainer.run()
    finally:
        torch.cuda.empty_cache()

def make_experiment_directory(name):
    try:
        os.makedirs(name)
    except FileExistsError as e:
        print('Experiment already exists. Be sure to resume training appropriately or start a new experiment.')
        if e.errno == errno.EEXIST: raise


def get_checkpoint_params(basename, resume):
    checkpoint_params = None
    if resume:
        checkpoint_params = {}
        model_filenames = glob.glob(basename + '/*fold*.ckpt')
        model_filenames.sort()
        trained_folds = [re.findall(r'fold\d+', f)[0][len('fold'):] for f in model_filenames]
        most_recent_fold = int(max(trained_folds)) if len(trained_folds) > 0 else 0

        checkpoint_params['restart_from'] = most_recent_fold
        checkpoint_params['checkpoint_file_path'] = model_filenames[-1]

    return checkpoint_params

In [4]:
if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Kaggle Cassava Disease Training')

    try:
        debug = False
        print('Running in debug mode:', debug)
        # fine tuned with SGD, one cycle
        main(experiment_name='sgd_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0_89-53', debug=debug,
             resume=False, finetune=True, freeze_bn=True, freeze_feature_extractor=False,
            data_csv='/train_cleaned-0.6.csv')
    except KeyboardInterrupt:
        pass

Running in debug mode: False
Experiment directory /opt/favordata/AI/Felix/kaggle-cassava/trained-models/sgd_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0_89-53
finetuning...
copying tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.103_val_acc=0.889_fold1.ckpt to /opt/favordata/AI/Felix/kaggle-cassava/trained-models/sgd_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0_89-53_tune
copying tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.109_val_acc=0.878_fold2.ckpt to /opt/favordata/AI/Felix/kaggle-cassava/trained-models/sgd_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0_89-53_tune
copying experiment_config.json to /opt/favordata/AI/Felix/kaggle-cassava/trained-models/sgd_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0_89-53_tune
copying tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.104_val_acc=0.887_fold3.ckpt to /opt/favordata/AI/Felix/kaggle-cassava/trained-models/sgd_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0_89-53_tune
copying folds.csv to /opt/fav

GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Models to fine tune
 ['/opt/favordata/AI/Felix/kaggle-cassava/trained-models/sgd_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0_89-53/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.101_val_acc=0.892_fold0.ckpt', '/opt/favordata/AI/Felix/kaggle-cassava/trained-models/sgd_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0_89-53/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.103_val_acc=0.889_fold1.ckpt', '/opt/favordata/AI/Felix/kaggle-cassava/trained-models/sgd_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0_89-53/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.104_val_acc=0.887_fold3.ckpt', '/opt/favordata/AI/Felix/kaggle-cassava/trained-models/sgd_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0_89-53/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.108_val_acc=0.883_fold4.ckpt', '/opt/favordata/AI/Felix/kaggle-cassava/trained-models/sgd_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0_89-53/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.109_


  | Name           | Type           | Params
--------------------------------------------------
0 | valid_accuracy | Accuracy       | 0     
1 | test_accuracy  | Accuracy       | 0     
2 | criterion      | BiTemperedLoss | 0     
3 | model          | EfficientNet   | 17.6 M
--------------------------------------------------
17.4 M    Trainable params
125 K     Non-trainable params
17.6 M    Total params
Finding best initial lr: 100%|██████████| 100/100 [01:37<00:00,  1.01it/s]Restored states from the checkpoint file at /opt/favordata/AI/Felix/kaggle-cassava/trained-models/sgd_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0_89-53_tune/lr_find_temp_model.ckpt
Learning rate set to 1e-06

  | Name           | Type           | Params
--------------------------------------------------
0 | valid_accuracy | Accuracy       | 0     
1 | test_accuracy  | Accuracy       | 0     
2 | criterion      | BiTemperedLoss | 0     
3 | model          | EfficientNet   | 17.6 M
---------------------------

Epoch 1:   9%|▉         | 167/1770 [00:42<-1:55:08, -5.48it/s, loss=0.168, v_num=7, val_loss=0.0557, val_acc=0.961, train_loss=0.0484] 

Finding best initial lr: 100%|██████████| 100/100 [02:24<00:00,  1.44s/it]

Epoch 1:   9%|▉         | 168/1770 [00:42<-1:55:05, -5.42it/s, loss=0.166, v_num=7, val_loss=0.0557, val_acc=0.961, train_loss=0.464] 




Epoch 1:  97%|█████████▋| 1716/1770 [07:12<00:17,  3.04it/s, loss=0.194, v_num=7, val_loss=0.0557, val_acc=0.961, train_loss=0.433]    
Validating: 0it [00:00, ?it/s][A
Epoch 1:  97%|█████████▋| 1718/1770 [07:14<00:17,  3.03it/s, loss=0.194, v_num=7, val_loss=0.0557, val_acc=0.961, train_loss=0.433]
Validating:   4%|▎         | 2/54 [00:02<01:09,  1.34s/it][A
Epoch 1:  97%|█████████▋| 1720/1770 [07:16<00:16,  3.03it/s, loss=0.194, v_num=7, val_loss=0.0557, val_acc=0.961, train_loss=0.433]
Validating:   7%|▋         | 4/54 [00:04<00:43,  1.15it/s][A
Epoch 1:  97%|█████████▋| 1722/1770 [07:17<00:15,  3.02it/s, loss=0.194, v_num=7, val_loss=0.0557, val_acc=0.961, train_loss=0.433]
Validating:  11%|█         | 6/54 [00:05<00:33,  1.43it/s][A
Epoch 1:  97%|█████████▋| 1724/1770 [07:18<00:15,  3.02it/s, loss=0.194, v_num=7, val_loss=0.0557, val_acc=0.961, train_loss=0.433]
Validating:  15%|█▍        | 8/54 [00:06<00:29,  1.58it/s][A
Epoch 1:  98%|█████████▊| 1726/1770 [07:19<00:14,  3.0

Epoch 1, global step 429: val_loss reached 0.05619 (best 0.05619), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/sgd_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0_89-53_tune/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.056_val_acc=0.958_fold0.ckpt" as top 1


Epoch 1: 100%|██████████| 1770/1770 [07:46<00:00,  2.94it/s, loss=0.194, v_num=7, val_loss=0.0562, val_acc=0.958, train_loss=0.0171]
Epoch 2:  97%|█████████▋| 1716/1770 [07:11<00:17,  3.05it/s, loss=0.196, v_num=7, val_loss=0.0562, val_acc=0.958, train_loss=0.273]    
Validating: 0it [00:00, ?it/s][A
Epoch 2:  97%|█████████▋| 1718/1770 [07:13<00:17,  3.04it/s, loss=0.196, v_num=7, val_loss=0.0562, val_acc=0.958, train_loss=0.273]
Validating:   4%|▎         | 2/54 [00:03<01:12,  1.39s/it][A
Epoch 2:  97%|█████████▋| 1720/1770 [07:14<00:16,  3.04it/s, loss=0.196, v_num=7, val_loss=0.0562, val_acc=0.958, train_loss=0.273]
Validating:   7%|▋         | 4/54 [00:04<00:42,  1.19it/s][A
Epoch 2:  97%|█████████▋| 1722/1770 [07:15<00:15,  3.03it/s, loss=0.196, v_num=7, val_loss=0.0562, val_acc=0.958, train_loss=0.273]
Validating:  11%|█         | 6/54 [00:05<00:32,  1.46it/s][A
Epoch 2:  97%|█████████▋| 1724/1770 [07:17<00:15,  3.03it/s, loss=0.196, v_num=7, val_loss=0.0562, val_acc=0.958, t

Epoch 2, step 858: val_loss was not in top 1


Epoch 2: 100%|██████████| 1770/1770 [07:43<00:00,  2.96it/s, loss=0.196, v_num=7, val_loss=0.0629, val_acc=0.948, train_loss=0.399]
Epoch 3:  97%|█████████▋| 1716/1770 [07:11<00:17,  3.05it/s, loss=0.15, v_num=7, val_loss=0.0629, val_acc=0.948, train_loss=0.113]     
Validating: 0it [00:00, ?it/s][A
Epoch 3:  97%|█████████▋| 1718/1770 [07:13<00:17,  3.04it/s, loss=0.15, v_num=7, val_loss=0.0629, val_acc=0.948, train_loss=0.113]
Validating:   4%|▎         | 2/54 [00:02<01:07,  1.29s/it][A
Epoch 3:  97%|█████████▋| 1720/1770 [07:14<00:16,  3.04it/s, loss=0.15, v_num=7, val_loss=0.0629, val_acc=0.948, train_loss=0.113]
Validating:   7%|▋         | 4/54 [00:04<00:43,  1.14it/s][A
Epoch 3:  97%|█████████▋| 1722/1770 [07:15<00:15,  3.03it/s, loss=0.15, v_num=7, val_loss=0.0629, val_acc=0.948, train_loss=0.113]
Validating:  11%|█         | 6/54 [00:05<00:33,  1.42it/s][A
Epoch 3:  97%|█████████▋| 1724/1770 [07:17<00:15,  3.03it/s, loss=0.15, v_num=7, val_loss=0.0629, val_acc=0.948, train_

Epoch 3, step 1287: val_loss was not in top 1


Epoch 3: 100%|██████████| 1770/1770 [07:43<00:00,  2.96it/s, loss=0.15, v_num=7, val_loss=0.0574, val_acc=0.956, train_loss=0.315]
Epoch 4:  97%|█████████▋| 1716/1770 [07:10<00:17,  3.06it/s, loss=0.172, v_num=7, val_loss=0.0574, val_acc=0.956, train_loss=0.026]    
Validating: 0it [00:00, ?it/s][A
Epoch 4:  97%|█████████▋| 1718/1770 [07:12<00:17,  3.05it/s, loss=0.172, v_num=7, val_loss=0.0574, val_acc=0.956, train_loss=0.026]
Validating:   4%|▎         | 2/54 [00:02<01:05,  1.27s/it][A
Epoch 4:  97%|█████████▋| 1720/1770 [07:14<00:16,  3.04it/s, loss=0.172, v_num=7, val_loss=0.0574, val_acc=0.956, train_loss=0.026]
Validating:   7%|▋         | 4/54 [00:04<00:42,  1.18it/s][A
Epoch 4:  97%|█████████▋| 1722/1770 [07:15<00:15,  3.04it/s, loss=0.172, v_num=7, val_loss=0.0574, val_acc=0.956, train_loss=0.026]
Validating:  11%|█         | 6/54 [00:05<00:33,  1.45it/s][A
Epoch 4:  97%|█████████▋| 1724/1770 [07:16<00:15,  3.04it/s, loss=0.172, v_num=7, val_loss=0.0574, val_acc=0.956, tra

Epoch 4, global step 1716: val_loss reached 0.05545 (best 0.05545), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/sgd_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0_89-53_tune/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.055_val_acc=0.956_fold0.ckpt" as top 1


Epoch 4: 100%|██████████| 1770/1770 [07:43<00:00,  2.96it/s, loss=0.172, v_num=7, val_loss=0.0554, val_acc=0.956, train_loss=0.161]
Epoch 5:  97%|█████████▋| 1716/1770 [07:11<00:17,  3.06it/s, loss=0.186, v_num=7, val_loss=0.0554, val_acc=0.956, train_loss=0.0379]   
Validating: 0it [00:00, ?it/s][A
Epoch 5:  97%|█████████▋| 1718/1770 [07:13<00:17,  3.04it/s, loss=0.186, v_num=7, val_loss=0.0554, val_acc=0.956, train_loss=0.0379]
Validating:   4%|▎         | 2/54 [00:02<01:06,  1.28s/it][A
Epoch 5:  97%|█████████▋| 1720/1770 [07:14<00:16,  3.04it/s, loss=0.186, v_num=7, val_loss=0.0554, val_acc=0.956, train_loss=0.0379]
Validating:   7%|▋         | 4/54 [00:04<00:41,  1.21it/s][A
Epoch 5:  97%|█████████▋| 1722/1770 [07:15<00:15,  3.04it/s, loss=0.186, v_num=7, val_loss=0.0554, val_acc=0.956, train_loss=0.0379]
Validating:  11%|█         | 6/54 [00:05<00:32,  1.47it/s][A
Epoch 5:  97%|█████████▋| 1724/1770 [07:16<00:15,  3.03it/s, loss=0.186, v_num=7, val_loss=0.0554, val_acc=0.956,

Epoch 5, step 2145: val_loss was not in top 1


Epoch 5: 100%|██████████| 1770/1770 [07:43<00:00,  2.96it/s, loss=0.186, v_num=7, val_loss=0.058, val_acc=0.955, train_loss=0.361]  
Epoch 6:  97%|█████████▋| 1716/1770 [07:10<00:17,  3.06it/s, loss=0.194, v_num=7, val_loss=0.058, val_acc=0.955, train_loss=0.738]    
Validating: 0it [00:00, ?it/s][A
Epoch 6:  97%|█████████▋| 1718/1770 [07:13<00:17,  3.05it/s, loss=0.194, v_num=7, val_loss=0.058, val_acc=0.955, train_loss=0.738]
Validating:   4%|▎         | 2/54 [00:02<01:05,  1.26s/it][A
Epoch 6:  97%|█████████▋| 1720/1770 [07:14<00:16,  3.04it/s, loss=0.194, v_num=7, val_loss=0.058, val_acc=0.955, train_loss=0.738]
Validating:   7%|▋         | 4/54 [00:04<00:42,  1.19it/s][A
Epoch 6:  97%|█████████▋| 1722/1770 [07:15<00:15,  3.04it/s, loss=0.194, v_num=7, val_loss=0.058, val_acc=0.955, train_loss=0.738]
Validating:  11%|█         | 6/54 [00:05<00:32,  1.46it/s][A
Epoch 6:  97%|█████████▋| 1724/1770 [07:16<00:15,  3.03it/s, loss=0.194, v_num=7, val_loss=0.058, val_acc=0.955, train_

Epoch 6, step 2574: val_loss was not in top 1


Epoch 6: 100%|██████████| 1770/1770 [07:43<00:00,  2.96it/s, loss=0.194, v_num=7, val_loss=0.0558, val_acc=0.957, train_loss=0.257]
Epoch 8:  97%|█████████▋| 1716/1770 [07:10<00:17,  3.06it/s, loss=0.188, v_num=7, val_loss=0.0577, val_acc=0.955, train_loss=0.0578]   
Validating: 0it [00:00, ?it/s][A
Epoch 8:  97%|█████████▋| 1718/1770 [07:13<00:17,  3.04it/s, loss=0.188, v_num=7, val_loss=0.0577, val_acc=0.955, train_loss=0.0578]
Validating:   4%|▎         | 2/54 [00:02<01:10,  1.35s/it][A
Epoch 8:  97%|█████████▋| 1720/1770 [07:14<00:16,  3.04it/s, loss=0.188, v_num=7, val_loss=0.0577, val_acc=0.955, train_loss=0.0578]
Validating:   7%|▋         | 4/54 [00:04<00:42,  1.17it/s][A
Epoch 8:  97%|█████████▋| 1722/1770 [07:15<00:15,  3.04it/s, loss=0.188, v_num=7, val_loss=0.0577, val_acc=0.955, train_loss=0.0578]
Validating:  11%|█         | 6/54 [00:05<00:33,  1.44it/s][A
Epoch 8:  97%|█████████▋| 1724/1770 [07:16<00:15,  3.03it/s, loss=0.188, v_num=7, val_loss=0.0577, val_acc=0.955,

Epoch 8, step 3432: val_loss was not in top 1


Epoch 8: 100%|██████████| 1770/1770 [07:43<00:00,  2.96it/s, loss=0.188, v_num=7, val_loss=0.0583, val_acc=0.955, train_loss=0.241] 
Epoch 9:  97%|█████████▋| 1716/1770 [07:11<00:17,  3.06it/s, loss=0.193, v_num=7, val_loss=0.0583, val_acc=0.955, train_loss=0.145]    
Validating: 0it [00:00, ?it/s][A
Epoch 9:  97%|█████████▋| 1718/1770 [07:13<00:17,  3.04it/s, loss=0.193, v_num=7, val_loss=0.0583, val_acc=0.955, train_loss=0.145]
Validating:   4%|▎         | 2/54 [00:03<01:11,  1.38s/it][A
Epoch 9:  97%|█████████▋| 1720/1770 [07:14<00:16,  3.04it/s, loss=0.193, v_num=7, val_loss=0.0583, val_acc=0.955, train_loss=0.145]
Validating:   7%|▋         | 4/54 [00:04<00:43,  1.14it/s][A
Epoch 9:  97%|█████████▋| 1722/1770 [07:15<00:15,  3.03it/s, loss=0.193, v_num=7, val_loss=0.0583, val_acc=0.955, train_loss=0.145]
Validating:  11%|█         | 6/54 [00:05<00:33,  1.42it/s][A
Epoch 9:  97%|█████████▋| 1724/1770 [07:17<00:15,  3.03it/s, loss=0.193, v_num=7, val_loss=0.0583, val_acc=0.955, t

Epoch 9, step 3861: val_loss was not in top 1


Epoch 9: 100%|██████████| 1770/1770 [07:43<00:00,  2.96it/s, loss=0.193, v_num=7, val_loss=0.0559, val_acc=0.956, train_loss=0.0343]
Epoch 10:  97%|█████████▋| 1716/1770 [07:10<00:17,  3.06it/s, loss=0.167, v_num=7, val_loss=0.0559, val_acc=0.956, train_loss=0.0849]   
Validating: 0it [00:00, ?it/s][A
Epoch 10:  97%|█████████▋| 1718/1770 [07:13<00:17,  3.04it/s, loss=0.167, v_num=7, val_loss=0.0559, val_acc=0.956, train_loss=0.0849]
Validating:   4%|▎         | 2/54 [00:03<01:12,  1.38s/it][A
Epoch 10:  97%|█████████▋| 1720/1770 [07:14<00:16,  3.04it/s, loss=0.167, v_num=7, val_loss=0.0559, val_acc=0.956, train_loss=0.0849]
Validating:   7%|▋         | 4/54 [00:04<00:43,  1.16it/s][A
Epoch 10:  97%|█████████▋| 1722/1770 [07:15<00:15,  3.04it/s, loss=0.167, v_num=7, val_loss=0.0559, val_acc=0.956, train_loss=0.0849]
Validating:  11%|█         | 6/54 [00:05<00:33,  1.43it/s][A
Epoch 10:  97%|█████████▋| 1724/1770 [07:16<00:15,  3.03it/s, loss=0.167, v_num=7, val_loss=0.0559, val_acc=

Epoch 10, step 4290: val_loss was not in top 1


Epoch 10: 100%|██████████| 1770/1770 [07:43<00:00,  2.96it/s, loss=0.167, v_num=7, val_loss=0.0566, val_acc=0.958, train_loss=0.291] 
                                                           [A

Trainer was signaled to stop but required minimum epochs (15) or minimum steps (None) has not been met. Training will continue...


Epoch 11:   0%|          | 2/1770 [00:00<-1:59:57, -583.91it/s, loss=0.167, v_num=7, val_loss=0.0566, val_acc=0.958, train_loss=0.291]  
Validating: 0it [00:00, ?it/s][A
Validating:   2%|▏         | 1/54 [00:02<01:50,  2.09s/it][A
Epoch 11:   0%|          | 4/1770 [00:03<-1:59:45, -115.53it/s, loss=0.167, v_num=7, val_loss=0.0566, val_acc=0.958, train_loss=0.291]
Validating:   6%|▌         | 3/54 [00:03<00:50,  1.01it/s][A
Epoch 11:   0%|          | 6/1770 [00:04<-1:59:39, -82.32it/s, loss=0.167, v_num=7, val_loss=0.0566, val_acc=0.958, train_loss=0.291] 
Validating:   9%|▉         | 5/54 [00:04<00:37,  1.32it/s][A
Epoch 11:   0%|          | 8/1770 [00:05<-1:59:34, -65.99it/s, loss=0.167, v_num=7, val_loss=0.0566, val_acc=0.958, train_loss=0.291]
Validating:  13%|█▎        | 7/54 [00:05<00:30,  1.52it/s][A
Epoch 11:   1%|          | 10/1770 [00:07<-1:59:28, -54.91it/s, loss=0.167, v_num=7, val_loss=0.0566, val_acc=0.958, train_loss=0.291]
Validating:  17%|█▋        | 9/54 [00:06<0

Epoch 11, step 4291: val_loss was not in top 1


Epoch 11:   3%|▎         | 56/1770 [00:33<-1:57:14, -10.31it/s, loss=0.167, v_num=7, val_loss=0.0567, val_acc=0.957, train_loss=0.0768]
                                                           [A

Trainer was signaled to stop but required minimum epochs (15) or minimum steps (None) has not been met. Training will continue...


Epoch 12:   0%|          | 2/1770 [00:00<-1:59:57, -558.05it/s, loss=0.167, v_num=7, val_loss=0.0567, val_acc=0.957, train_loss=0.0768] 
Validating: 0it [00:00, ?it/s][A
Validating:   2%|▏         | 1/54 [00:01<01:40,  1.90s/it][A
Epoch 12:   0%|          | 4/1770 [00:03<-1:59:46, -122.66it/s, loss=0.167, v_num=7, val_loss=0.0567, val_acc=0.957, train_loss=0.0768]
Validating:   6%|▌         | 3/54 [00:03<00:46,  1.09it/s][A
Epoch 12:   0%|          | 6/1770 [00:04<-1:59:40, -87.31it/s, loss=0.167, v_num=7, val_loss=0.0567, val_acc=0.957, train_loss=0.0768] 
Validating:   9%|▉         | 5/54 [00:04<00:36,  1.34it/s][A
Epoch 12:   0%|          | 8/1770 [00:05<-1:59:35, -68.31it/s, loss=0.167, v_num=7, val_loss=0.0567, val_acc=0.957, train_loss=0.0768]
Validating:  13%|█▎        | 7/54 [00:05<00:30,  1.53it/s][A
Epoch 12:   1%|          | 10/1770 [00:06<-1:59:29, -56.52it/s, loss=0.167, v_num=7, val_loss=0.0567, val_acc=0.957, train_loss=0.0768]
Validating:  17%|█▋        | 9/54 [00:

Trainer was signaled to stop but required minimum epochs (15) or minimum steps (None) has not been met. Training will continue...


Epoch 13:   0%|          | 2/1770 [00:00<-1:59:57, -560.59it/s, loss=0.167, v_num=7, val_loss=0.0571, val_acc=0.956, train_loss=0.189] 
Validating: 0it [00:00, ?it/s][A
Validating:   2%|▏         | 1/54 [00:02<02:07,  2.41s/it][A
Epoch 13:   0%|          | 4/1770 [00:03<-1:59:44, -106.35it/s, loss=0.167, v_num=7, val_loss=0.0571, val_acc=0.956, train_loss=0.189]
Validating:   6%|▌         | 3/54 [00:03<00:50,  1.00it/s][A
Epoch 13:   0%|          | 6/1770 [00:04<-1:59:38, -79.69it/s, loss=0.167, v_num=7, val_loss=0.0571, val_acc=0.956, train_loss=0.189] 
Validating:   9%|▉         | 5/54 [00:04<00:36,  1.33it/s][A
Epoch 13:   0%|          | 8/1770 [00:06<-1:59:33, -64.19it/s, loss=0.167, v_num=7, val_loss=0.0571, val_acc=0.956, train_loss=0.189]
Validating:  13%|█▎        | 7/54 [00:05<00:30,  1.52it/s][A
Epoch 13:   1%|          | 10/1770 [00:07<-1:59:28, -53.67it/s, loss=0.167, v_num=7, val_loss=0.0571, val_acc=0.956, train_loss=0.189]
Validating:  17%|█▋        | 9/54 [00:07<00

Trainer was signaled to stop but required minimum epochs (15) or minimum steps (None) has not been met. Training will continue...


Epoch 14:   0%|          | 2/1770 [00:00<-1:59:57, -565.83it/s, loss=0.167, v_num=7, val_loss=0.0576, val_acc=0.957, train_loss=0.136] 
Validating: 0it [00:00, ?it/s][A
Validating:   2%|▏         | 1/54 [00:01<01:42,  1.93s/it][A
Epoch 14:   0%|          | 4/1770 [00:03<-1:59:46, -120.26it/s, loss=0.167, v_num=7, val_loss=0.0576, val_acc=0.957, train_loss=0.136]
Validating:   6%|▌         | 3/54 [00:03<00:47,  1.07it/s][A
Epoch 14:   0%|          | 6/1770 [00:04<-1:59:40, -86.21it/s, loss=0.167, v_num=7, val_loss=0.0576, val_acc=0.957, train_loss=0.136] 
Validating:   9%|▉         | 5/54 [00:04<00:35,  1.38it/s][A
Epoch 14:   0%|          | 8/1770 [00:05<-1:59:35, -68.43it/s, loss=0.167, v_num=7, val_loss=0.0576, val_acc=0.957, train_loss=0.136]
Validating:  13%|█▎        | 7/54 [00:05<00:30,  1.55it/s][A
Epoch 14:   1%|          | 10/1770 [00:06<-1:59:29, -56.61it/s, loss=0.167, v_num=7, val_loss=0.0576, val_acc=0.957, train_loss=0.136]
Validating:  17%|█▋        | 9/54 [00:06<00

GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Training fold 1
Class sample counts [ 675 1381 1481 8725 1463]
After class sample counts [2025 2762 3406 8725 3950]
conv_stem.weight True
bn1.weight False
bn1.bias False
blocks.0.0.conv_dw.weight True
blocks.0.0.bn1.weight False
blocks.0.0.bn1.bias False
blocks.0.0.se.conv_reduce.weight True
blocks.0.0.se.conv_reduce.bias True
blocks.0.0.se.conv_expand.weight True
blocks.0.0.se.conv_expand.bias True
blocks.0.0.conv_pw.weight True
blocks.0.0.bn2.weight False
blocks.0.0.bn2.bias False
blocks.0.1.conv_dw.weight True
blocks.0.1.bn1.weight False
blocks.0.1.bn1.bias False
blocks.0.1.se.conv_reduce.weight True
blocks.0.1.se.conv_reduce.bias True
blocks.0.1.se.conv_expand.weight True
blocks.0.1.se.conv_expand.bias True
blocks.0.1.conv_pw.weight True
blocks.0.1.bn2.weight False
blocks.0.1.bn2.bias False
blocks.1.0.conv_pw.weight True
blocks.1.0.bn1.weight False
blocks.1.0.bn1.bias False
blocks.1.0.conv_dw.weight True
blocks.1.0.bn2.weight False
blocks.1.0.bn2.bias False
blocks.1.0.se.conv_reduc


  | Name           | Type           | Params
--------------------------------------------------
0 | valid_accuracy | Accuracy       | 0     
1 | test_accuracy  | Accuracy       | 0     
2 | criterion      | BiTemperedLoss | 0     
3 | model          | EfficientNet   | 17.6 M
--------------------------------------------------
17.4 M    Trainable params
125 K     Non-trainable params
17.6 M    Total params

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s][A
Finding best initial lr:   1%|          | 1/100 [00:00<00:26,  3.79it/s][A
Finding best initial lr:   2%|▏         | 2/100 [00:01<01:07,  1.45it/s][A
Finding best initial lr:   3%|▎         | 3/100 [00:02<01:20,  1.21it/s][A
Finding best initial lr:   4%|▍         | 4/100 [00:03<01:25,  1.12it/s][A
Finding best initial lr:   5%|▌         | 5/100 [00:04<01:27,  1.08it/s][A
Finding best initial lr:   6%|▌         | 6/100 [00:05<01:28,  1.06it/s][A
Finding best initial lr:   7%|▋         | 7/100 [00:06<01:29,  1.0

Epoch 14:   3%|▎         | 56/1770 [00:47<-1:56:04, -7.23it/s, loss=0.167, v_num=7, val_loss=0.0577, val_acc=0.957, train_loss=0.226] 


Finding best initial lr:   9%|▉         | 9/100 [00:08<01:28,  1.03it/s][A
Finding best initial lr:  10%|█         | 10/100 [00:09<01:28,  1.01it/s][A
Finding best initial lr:  11%|█         | 11/100 [00:10<01:27,  1.01it/s][A
Finding best initial lr:  12%|█▏        | 12/100 [00:11<01:27,  1.01it/s][A
Finding best initial lr:  13%|█▎        | 13/100 [00:12<01:26,  1.01it/s][A
Finding best initial lr:  14%|█▍        | 14/100 [00:13<01:25,  1.01it/s][A
Finding best initial lr:  15%|█▌        | 15/100 [00:14<01:24,  1.01it/s][A
Finding best initial lr:  16%|█▌        | 16/100 [00:15<01:23,  1.01it/s][A
Finding best initial lr:  17%|█▋        | 17/100 [00:16<01:22,  1.01it/s][A
Finding best initial lr:  18%|█▊        | 18/100 [00:17<01:21,  1.01it/s][A
Finding best initial lr:  19%|█▉        | 19/100 [00:18<01:20,  1.01it/s][A
Finding best initial lr:  20%|██        | 20/100 [00:19<01:19,  1.00it/s][A
Finding best initial lr:  21%|██        | 21/100 [00:20<01:18,  1.00it/s][A

Epoch 1:  97%|█████████▋| 1716/1770 [07:11<00:17,  3.05it/s, loss=0.155, v_num=7, val_loss=0.0714, val_acc=0.938, train_loss=0.274]  
Validating: 0it [00:00, ?it/s][A
Epoch 1:  97%|█████████▋| 1718/1770 [07:14<00:17,  3.04it/s, loss=0.155, v_num=7, val_loss=0.0714, val_acc=0.938, train_loss=0.274]
Validating:   4%|▎         | 2/54 [00:03<01:16,  1.47s/it][A
Epoch 1:  97%|█████████▋| 1720/1770 [07:15<00:16,  3.03it/s, loss=0.155, v_num=7, val_loss=0.0714, val_acc=0.938, train_loss=0.274]
Validating:   7%|▋         | 4/54 [00:04<00:43,  1.15it/s][A
Epoch 1:  97%|█████████▋| 1722/1770 [07:16<00:15,  3.03it/s, loss=0.155, v_num=7, val_loss=0.0714, val_acc=0.938, train_loss=0.274]
Validating:  11%|█         | 6/54 [00:05<00:33,  1.42it/s][A
Epoch 1:  97%|█████████▋| 1724/1770 [07:17<00:15,  3.03it/s, loss=0.155, v_num=7, val_loss=0.0714, val_acc=0.938, train_loss=0.274]
Validating:  15%|█▍        | 8/54 [00:06<00:29,  1.58it/s][A
Epoch 1:  98%|█████████▊| 1726/1770 [07:18<00:14,  3.02i

Epoch 1, global step 429: val_loss reached 0.05885 (best 0.05885), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/sgd_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0_89-53_tune/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.059_val_acc=0.953_fold1.ckpt" as top 1


Epoch 1: 100%|██████████| 1770/1770 [07:44<00:00,  2.95it/s, loss=0.155, v_num=7, val_loss=0.0588, val_acc=0.953, train_loss=0.327]
Epoch 2:  97%|█████████▋| 1716/1770 [07:10<00:17,  3.06it/s, loss=0.169, v_num=7, val_loss=0.0588, val_acc=0.953, train_loss=0.261]    
Validating: 0it [00:00, ?it/s][A
Epoch 2:  97%|█████████▋| 1718/1770 [07:12<00:17,  3.05it/s, loss=0.169, v_num=7, val_loss=0.0588, val_acc=0.953, train_loss=0.261]
Validating:   4%|▎         | 2/54 [00:02<01:02,  1.21s/it][A
Epoch 2:  97%|█████████▋| 1720/1770 [07:13<00:16,  3.04it/s, loss=0.169, v_num=7, val_loss=0.0588, val_acc=0.953, train_loss=0.261]
Validating:   7%|▋         | 4/54 [00:03<00:41,  1.22it/s][A
Epoch 2:  97%|█████████▋| 1722/1770 [07:15<00:15,  3.04it/s, loss=0.169, v_num=7, val_loss=0.0588, val_acc=0.953, train_loss=0.261]
Validating:  11%|█         | 6/54 [00:05<00:33,  1.45it/s][A
Epoch 2:  97%|█████████▋| 1724/1770 [07:16<00:15,  3.04it/s, loss=0.169, v_num=7, val_loss=0.0588, val_acc=0.953, tr

Epoch 2, global step 858: val_loss reached 0.05599 (best 0.05599), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/sgd_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0_89-53_tune/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.056_val_acc=0.955_fold1.ckpt" as top 1


Epoch 14:   3%|▎         | 56/1770 [17:51<-2:30:47, -0.32it/s, loss=0.167, v_num=7, val_loss=0.0577, val_acc=0.957, train_loss=0.226]

Finding best initial lr: 100%|██████████| 100/100 [17:11<00:00, 10.31s/it]







Epoch 2: 100%|██████████| 1770/1770 [07:43<00:00,  2.96it/s, loss=0.169, v_num=7, val_loss=0.056, val_acc=0.955, train_loss=0.0448]
Epoch 3:  97%|█████████▋| 1716/1770 [07:11<00:17,  3.05it/s, loss=0.146, v_num=7, val_loss=0.056, val_acc=0.955, train_loss=0.119]    
Validating: 0it [00:00, ?it/s][A
Epoch 3:  97%|█████████▋| 1718/1770 [07:13<00:17,  3.04it/s, loss=0.146, v_num=7, val_loss=0.056, val_acc=0.955, train_loss=0.119]
Validating:   4%|▎         | 2/54 [00:03<01:11,  1.38s/it][A
Epoch 3:  97%|█████████▋| 1720/1770 [07:14<00:16,  3.04it/s, loss=0.146, v_num=7, val_loss=0.056, val_acc=0.955, train_loss=0.119]
Validating:   7%|▋         | 4/54 [00:04<00:43,  1.15it/s][A
Epoch 3:  97%|█████████▋| 1722/1770 [07:16<00:15,  3.03it/s, loss=0.146, v_num=7, val_loss=0.056, val_acc=0.955, train_loss=0.119]
Validating:  11%|█         | 6/54 [00:05<00:33,  1.42it/s][A
Epoch 3:  97%|█████████▋| 1724/1770 [07:17<00:15,  3.03it/s, loss=0.146, v_num=7, val_loss=0.056, val_acc=0.955, train_l

Epoch 3, step 1287: val_loss was not in top 1


Epoch 3: 100%|██████████| 1770/1770 [07:44<00:00,  2.95it/s, loss=0.146, v_num=7, val_loss=0.056, val_acc=0.957, train_loss=0.146]
Epoch 4:  97%|█████████▋| 1716/1770 [07:10<00:17,  3.06it/s, loss=0.189, v_num=7, val_loss=0.056, val_acc=0.957, train_loss=0.21]     
Validating: 0it [00:00, ?it/s][A
Epoch 4:  97%|█████████▋| 1718/1770 [07:13<00:17,  3.04it/s, loss=0.189, v_num=7, val_loss=0.056, val_acc=0.957, train_loss=0.21]
Validating:   4%|▎         | 2/54 [00:02<01:09,  1.33s/it][A
Epoch 4:  97%|█████████▋| 1720/1770 [07:14<00:16,  3.04it/s, loss=0.189, v_num=7, val_loss=0.056, val_acc=0.957, train_loss=0.21]
Validating:   7%|▋         | 4/54 [00:04<00:41,  1.19it/s][A
Epoch 4:  97%|█████████▋| 1722/1770 [07:15<00:15,  3.04it/s, loss=0.189, v_num=7, val_loss=0.056, val_acc=0.957, train_loss=0.21]
Validating:  11%|█         | 6/54 [00:05<00:32,  1.46it/s][A
Epoch 4:  97%|█████████▋| 1724/1770 [07:16<00:15,  3.03it/s, loss=0.189, v_num=7, val_loss=0.056, val_acc=0.957, train_loss=

Epoch 4, step 1716: val_loss was not in top 1


Epoch 4: 100%|██████████| 1770/1770 [07:43<00:00,  2.96it/s, loss=0.189, v_num=7, val_loss=0.0578, val_acc=0.954, train_loss=0.444]
Epoch 5:  97%|█████████▋| 1716/1770 [07:10<00:17,  3.06it/s, loss=0.172, v_num=7, val_loss=0.0578, val_acc=0.954, train_loss=0.241]    
Validating: 0it [00:00, ?it/s][A
Epoch 5:  97%|█████████▋| 1718/1770 [07:12<00:17,  3.05it/s, loss=0.172, v_num=7, val_loss=0.0578, val_acc=0.954, train_loss=0.241]
Validating:   4%|▎         | 2/54 [00:02<01:06,  1.28s/it][A
Epoch 5:  97%|█████████▋| 1720/1770 [07:14<00:16,  3.04it/s, loss=0.172, v_num=7, val_loss=0.0578, val_acc=0.954, train_loss=0.241]
Validating:   7%|▋         | 4/54 [00:04<00:42,  1.17it/s][A
Epoch 5:  97%|█████████▋| 1722/1770 [07:15<00:15,  3.04it/s, loss=0.172, v_num=7, val_loss=0.0578, val_acc=0.954, train_loss=0.241]
Validating:  11%|█         | 6/54 [00:05<00:33,  1.44it/s][A
Epoch 5:  97%|█████████▋| 1724/1770 [07:16<00:15,  3.04it/s, loss=0.172, v_num=7, val_loss=0.0578, val_acc=0.954, tr

Epoch 5, step 2145: val_loss was not in top 1


Epoch 5: 100%|██████████| 1770/1770 [07:43<00:00,  2.96it/s, loss=0.172, v_num=7, val_loss=0.059, val_acc=0.957, train_loss=0.279] 
Epoch 7:  97%|█████████▋| 1716/1770 [07:10<00:17,  3.06it/s, loss=0.163, v_num=7, val_loss=0.057, val_acc=0.955, train_loss=0.151]    
Validating: 0it [00:00, ?it/s][A
Epoch 7:  97%|█████████▋| 1718/1770 [07:13<00:17,  3.04it/s, loss=0.163, v_num=7, val_loss=0.057, val_acc=0.955, train_loss=0.151]
Validating:   4%|▎         | 2/54 [00:03<01:12,  1.39s/it][A
Epoch 7:  97%|█████████▋| 1720/1770 [07:14<00:16,  3.04it/s, loss=0.163, v_num=7, val_loss=0.057, val_acc=0.955, train_loss=0.151]
Validating:   7%|▋         | 4/54 [00:04<00:42,  1.19it/s][A
Epoch 7:  97%|█████████▋| 1722/1770 [07:15<00:15,  3.04it/s, loss=0.163, v_num=7, val_loss=0.057, val_acc=0.955, train_loss=0.151]
Validating:  11%|█         | 6/54 [00:05<00:33,  1.45it/s][A
Epoch 7:  97%|█████████▋| 1724/1770 [07:16<00:15,  3.03it/s, loss=0.163, v_num=7, val_loss=0.057, val_acc=0.955, train_l

Epoch 7, step 3003: val_loss was not in top 1


Epoch 7: 100%|██████████| 1770/1770 [07:43<00:00,  2.96it/s, loss=0.163, v_num=7, val_loss=0.0629, val_acc=0.95, train_loss=0.288]
Epoch 8:  97%|█████████▋| 1716/1770 [07:11<00:17,  3.05it/s, loss=0.157, v_num=7, val_loss=0.0629, val_acc=0.95, train_loss=0.284]    
Validating: 0it [00:00, ?it/s][A
Epoch 8:  97%|█████████▋| 1718/1770 [07:13<00:17,  3.04it/s, loss=0.157, v_num=7, val_loss=0.0629, val_acc=0.95, train_loss=0.284]
Validating:   4%|▎         | 2/54 [00:03<01:13,  1.41s/it][A
Epoch 8:  97%|█████████▋| 1720/1770 [07:15<00:16,  3.04it/s, loss=0.157, v_num=7, val_loss=0.0629, val_acc=0.95, train_loss=0.284]
Validating:   7%|▋         | 4/54 [00:04<00:42,  1.17it/s][A
Epoch 8:  97%|█████████▋| 1722/1770 [07:16<00:15,  3.03it/s, loss=0.157, v_num=7, val_loss=0.0629, val_acc=0.95, train_loss=0.284]
Validating:  11%|█         | 6/54 [00:05<00:33,  1.44it/s][A
Epoch 8:  97%|█████████▋| 1724/1770 [07:17<00:15,  3.03it/s, loss=0.157, v_num=7, val_loss=0.0629, val_acc=0.95, train_lo

Epoch 8, global step 3432: val_loss reached 0.05466 (best 0.05466), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/sgd_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0_89-53_tune/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.055_val_acc=0.959_fold1.ckpt" as top 1


Epoch 8: 100%|██████████| 1770/1770 [07:44<00:00,  2.95it/s, loss=0.157, v_num=7, val_loss=0.0547, val_acc=0.959, train_loss=0.00386]
Epoch 9:  97%|█████████▋| 1716/1770 [07:10<00:17,  3.06it/s, loss=0.175, v_num=7, val_loss=0.0547, val_acc=0.959, train_loss=0.0673]    
Validating: 0it [00:00, ?it/s][A
Epoch 9:  97%|█████████▋| 1718/1770 [07:12<00:17,  3.05it/s, loss=0.175, v_num=7, val_loss=0.0547, val_acc=0.959, train_loss=0.0673]
Validating:   4%|▎         | 2/54 [00:02<01:07,  1.29s/it][A
Epoch 9:  97%|█████████▋| 1720/1770 [07:14<00:16,  3.04it/s, loss=0.175, v_num=7, val_loss=0.0547, val_acc=0.959, train_loss=0.0673]
Validating:   7%|▋         | 4/54 [00:04<00:43,  1.16it/s][A
Epoch 9:  97%|█████████▋| 1722/1770 [07:15<00:15,  3.04it/s, loss=0.175, v_num=7, val_loss=0.0547, val_acc=0.959, train_loss=0.0673]
Validating:  11%|█         | 6/54 [00:05<00:33,  1.43it/s][A
Epoch 9:  97%|█████████▋| 1724/1770 [07:16<00:15,  3.03it/s, loss=0.175, v_num=7, val_loss=0.0547, val_acc=0.9

Epoch 9, step 3861: val_loss was not in top 1


Epoch 9: 100%|██████████| 1770/1770 [07:43<00:00,  2.96it/s, loss=0.175, v_num=7, val_loss=0.0577, val_acc=0.955, train_loss=0.308] 
Epoch 10:  97%|█████████▋| 1716/1770 [07:11<00:17,  3.06it/s, loss=0.173, v_num=7, val_loss=0.0577, val_acc=0.955, train_loss=0.458]    
Validating: 0it [00:00, ?it/s][A
Epoch 10:  97%|█████████▋| 1718/1770 [07:13<00:17,  3.04it/s, loss=0.173, v_num=7, val_loss=0.0577, val_acc=0.955, train_loss=0.458]
Validating:   4%|▎         | 2/54 [00:02<01:10,  1.35s/it][A
Epoch 10:  97%|█████████▋| 1720/1770 [07:14<00:16,  3.04it/s, loss=0.173, v_num=7, val_loss=0.0577, val_acc=0.955, train_loss=0.458]
Validating:   7%|▋         | 4/54 [00:04<00:43,  1.15it/s][A
Epoch 10:  97%|█████████▋| 1722/1770 [07:15<00:15,  3.04it/s, loss=0.173, v_num=7, val_loss=0.0577, val_acc=0.955, train_loss=0.458]
Validating:  11%|█         | 6/54 [00:05<00:33,  1.43it/s][A
Epoch 10:  97%|█████████▋| 1724/1770 [07:17<00:15,  3.03it/s, loss=0.173, v_num=7, val_loss=0.0577, val_acc=0.9

Epoch 10, step 4290: val_loss was not in top 1


Epoch 10: 100%|██████████| 1770/1770 [07:43<00:00,  2.96it/s, loss=0.173, v_num=7, val_loss=0.0553, val_acc=0.957, train_loss=0.172]
Epoch 11:  97%|█████████▋| 1716/1770 [07:10<00:17,  3.06it/s, loss=0.168, v_num=7, val_loss=0.0553, val_acc=0.957, train_loss=0.122]    
Validating: 0it [00:00, ?it/s][A
Epoch 11:  97%|█████████▋| 1718/1770 [07:12<00:17,  3.05it/s, loss=0.168, v_num=7, val_loss=0.0553, val_acc=0.957, train_loss=0.122]
Validating:   4%|▎         | 2/54 [00:02<01:01,  1.19s/it][A
Epoch 11:  97%|█████████▋| 1720/1770 [07:14<00:16,  3.04it/s, loss=0.168, v_num=7, val_loss=0.0553, val_acc=0.957, train_loss=0.122]
Validating:   7%|▋         | 4/54 [00:03<00:42,  1.18it/s][A
Epoch 11:  97%|█████████▋| 1722/1770 [07:15<00:15,  3.04it/s, loss=0.168, v_num=7, val_loss=0.0553, val_acc=0.957, train_loss=0.122]
Validating:  11%|█         | 6/54 [00:05<00:33,  1.45it/s][A
Epoch 11:  97%|█████████▋| 1724/1770 [07:16<00:15,  3.04it/s, loss=0.168, v_num=7, val_loss=0.0553, val_acc=0.9

Epoch 11, step 4719: val_loss was not in top 1


Epoch 11: 100%|██████████| 1770/1770 [07:43<00:00,  2.96it/s, loss=0.168, v_num=7, val_loss=0.0592, val_acc=0.953, train_loss=0.112]
Epoch 12:  97%|█████████▋| 1716/1770 [07:10<00:17,  3.06it/s, loss=0.155, v_num=7, val_loss=0.0592, val_acc=0.953, train_loss=0.312]    
Validating: 0it [00:00, ?it/s][A
Epoch 12:  97%|█████████▋| 1718/1770 [07:12<00:17,  3.05it/s, loss=0.155, v_num=7, val_loss=0.0592, val_acc=0.953, train_loss=0.312]
Validating:   4%|▎         | 2/54 [00:02<01:02,  1.21s/it][A
Epoch 12:  97%|█████████▋| 1720/1770 [07:14<00:16,  3.04it/s, loss=0.155, v_num=7, val_loss=0.0592, val_acc=0.953, train_loss=0.312]
Validating:   7%|▋         | 4/54 [00:03<00:41,  1.21it/s][A
Epoch 12:  97%|█████████▋| 1722/1770 [07:15<00:15,  3.04it/s, loss=0.155, v_num=7, val_loss=0.0592, val_acc=0.953, train_loss=0.312]
Validating:  11%|█         | 6/54 [00:05<00:32,  1.46it/s][A
Epoch 12:  97%|█████████▋| 1724/1770 [07:16<00:15,  3.03it/s, loss=0.155, v_num=7, val_loss=0.0592, val_acc=0.9

Epoch 12, step 5148: val_loss was not in top 1


Epoch 12: 100%|██████████| 1770/1770 [07:43<00:00,  2.96it/s, loss=0.155, v_num=7, val_loss=0.0555, val_acc=0.957, train_loss=0.0841]
Epoch 13:  97%|█████████▋| 1716/1770 [07:10<00:17,  3.06it/s, loss=0.158, v_num=7, val_loss=0.0555, val_acc=0.957, train_loss=0.319]     
Validating: 0it [00:00, ?it/s][A
Epoch 13:  97%|█████████▋| 1718/1770 [07:12<00:17,  3.05it/s, loss=0.158, v_num=7, val_loss=0.0555, val_acc=0.957, train_loss=0.319]
Validating:   4%|▎         | 2/54 [00:02<01:00,  1.17s/it][A
Epoch 13:  97%|█████████▋| 1720/1770 [07:14<00:16,  3.04it/s, loss=0.158, v_num=7, val_loss=0.0555, val_acc=0.957, train_loss=0.319]
Validating:   7%|▋         | 4/54 [00:03<00:40,  1.23it/s][A
Epoch 13:  97%|█████████▋| 1722/1770 [07:15<00:15,  3.04it/s, loss=0.158, v_num=7, val_loss=0.0555, val_acc=0.957, train_loss=0.319]
Validating:  11%|█         | 6/54 [00:05<00:32,  1.48it/s][A
Epoch 13:  97%|█████████▋| 1724/1770 [07:16<00:15,  3.04it/s, loss=0.158, v_num=7, val_loss=0.0555, val_acc=0

Epoch 13, step 5577: val_loss was not in top 1


Epoch 13: 100%|██████████| 1770/1770 [07:43<00:00,  2.96it/s, loss=0.158, v_num=7, val_loss=0.0568, val_acc=0.956, train_loss=0.0763]
Epoch 14:  97%|█████████▋| 1716/1770 [07:11<00:17,  3.05it/s, loss=0.147, v_num=7, val_loss=0.0568, val_acc=0.956, train_loss=0.738]    
Validating: 0it [00:00, ?it/s][A
Epoch 14:  97%|█████████▋| 1718/1770 [07:14<00:17,  3.04it/s, loss=0.147, v_num=7, val_loss=0.0568, val_acc=0.956, train_loss=0.738]
Validating:   4%|▎         | 2/54 [00:03<01:13,  1.41s/it][A
Epoch 14:  97%|█████████▋| 1720/1770 [07:15<00:16,  3.03it/s, loss=0.147, v_num=7, val_loss=0.0568, val_acc=0.956, train_loss=0.738]
Validating:   7%|▋         | 4/54 [00:04<00:43,  1.14it/s][A
Epoch 14:  97%|█████████▋| 1722/1770 [07:16<00:15,  3.03it/s, loss=0.147, v_num=7, val_loss=0.0568, val_acc=0.956, train_loss=0.738]
Validating:  11%|█         | 6/54 [00:05<00:33,  1.42it/s][A
Epoch 14:  97%|█████████▋| 1724/1770 [07:17<00:15,  3.03it/s, loss=0.147, v_num=7, val_loss=0.0568, val_acc=0.

Epoch 14, step 6006: val_loss was not in top 1


Epoch 14: 100%|██████████| 1770/1770 [07:44<00:00,  2.95it/s, loss=0.147, v_num=7, val_loss=0.0586, val_acc=0.95, train_loss=0.379] 
                                                           [A

GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Training fold 2
Class sample counts [ 676 1381 1481 8725 1463]
After class sample counts [2028 2762 3406 8725 3950]
conv_stem.weight True
bn1.weight False
bn1.bias False
blocks.0.0.conv_dw.weight True
blocks.0.0.bn1.weight False
blocks.0.0.bn1.bias False
blocks.0.0.se.conv_reduce.weight True
blocks.0.0.se.conv_reduce.bias True
blocks.0.0.se.conv_expand.weight True
blocks.0.0.se.conv_expand.bias True
blocks.0.0.conv_pw.weight True
blocks.0.0.bn2.weight False
blocks.0.0.bn2.bias False
blocks.0.1.conv_dw.weight True
blocks.0.1.bn1.weight False
blocks.0.1.bn1.bias False
blocks.0.1.se.conv_reduce.weight True
blocks.0.1.se.conv_reduce.bias True
blocks.0.1.se.conv_expand.weight True
blocks.0.1.se.conv_expand.bias True
blocks.0.1.conv_pw.weight True
blocks.0.1.bn2.weight False
blocks.0.1.bn2.bias False
blocks.1.0.conv_pw.weight True
blocks.1.0.bn1.weight False
blocks.1.0.bn1.bias False
blocks.1.0.conv_dw.weight True
blocks.1.0.bn2.weight False
blocks.1.0.bn2.bias False
blocks.1.0.se.conv_reduc


  | Name           | Type           | Params
--------------------------------------------------
0 | valid_accuracy | Accuracy       | 0     
1 | test_accuracy  | Accuracy       | 0     
2 | criterion      | BiTemperedLoss | 0     
3 | model          | EfficientNet   | 17.6 M
--------------------------------------------------
17.4 M    Trainable params
125 K     Non-trainable params
17.6 M    Total params

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s][A
Finding best initial lr:   1%|          | 1/100 [00:00<00:25,  3.82it/s][A
Finding best initial lr:   2%|▏         | 2/100 [00:01<01:07,  1.45it/s][A
Finding best initial lr:   3%|▎         | 3/100 [00:02<01:20,  1.21it/s][A

Epoch 14: 100%|██████████| 1770/1770 [07:55<00:00,  2.89it/s, loss=0.147, v_num=7, val_loss=0.0586, val_acc=0.95, train_loss=0.379]


Finding best initial lr:   4%|▍         | 4/100 [00:03<01:25,  1.12it/s][A
Finding best initial lr:   5%|▌         | 5/100 [00:04<01:28,  1.08it/s][A
Finding best initial lr:   6%|▌         | 6/100 [00:05<01:29,  1.05it/s][A
Finding best initial lr:   7%|▋         | 7/100 [00:06<01:29,  1.04it/s][A
Finding best initial lr:   8%|▊         | 8/100 [00:07<01:29,  1.03it/s][A
Finding best initial lr:   9%|▉         | 9/100 [00:08<01:28,  1.02it/s][A
Finding best initial lr:  10%|█         | 10/100 [00:09<01:29,  1.01it/s][A
Finding best initial lr:  11%|█         | 11/100 [00:10<01:28,  1.01it/s][A
Finding best initial lr:  12%|█▏        | 12/100 [00:11<01:27,  1.01it/s][A
Finding best initial lr:  13%|█▎        | 13/100 [00:12<01:26,  1.01it/s][A
Finding best initial lr:  14%|█▍        | 14/100 [00:13<01:25,  1.01it/s][A
Finding best initial lr:  15%|█▌        | 15/100 [00:14<01:24,  1.01it/s][A
Finding best initial lr:  16%|█▌        | 16/100 [00:15<01:23,  1.01it/s][A
Find

Epoch 14: 100%|██████████| 1770/1770 [08:37<00:00,  2.65it/s, loss=0.147, v_num=7, val_loss=0.0586, val_acc=0.95, train_loss=0.379]


Finding best initial lr:  46%|████▌     | 46/100 [00:44<00:53,  1.01it/s][A





Finding best initial lr:  47%|████▋     | 47/100 [00:45<00:52,  1.01it/s][A
Finding best initial lr:  48%|████▊     | 48/100 [00:46<00:51,  1.01it/s][A
Finding best initial lr:  49%|████▉     | 49/100 [00:47<00:50,  1.01it/s][A
Finding best initial lr:  50%|█████     | 50/100 [00:48<00:49,  1.00it/s][A
Finding best initial lr:  51%|█████     | 51/100 [00:49<00:48,  1.00it/s][A
Finding best initial lr:  52%|█████▏    | 52/100 [00:50<00:47,  1.00it/s][A
Finding best initial lr:  53%|█████▎    | 53/100 [00:51<00:46,  1.01it/s][A
Finding best initial lr:  54%|█████▍    | 54/100 [00:52<00:45,  1.01it/s][A
Finding best initial lr:  55%|█████▌    | 55/100 [00:53<00:44,  1.01it/s][A
Finding best initial lr:  56%|█████▌    | 56/100 [00:54<00:43,  1.01it/s][A
Finding best initial lr:  57%|█████▋    | 57/100 [00:55<00:42,  1.01it/s][A
Finding best initial lr:  58%|█████▊    | 58/100 [00:56<00:41,  1.01it/s][A
Finding best initial lr:  59%|█████▉    | 59/100 [00:57<00:40,  1.01it/s][

Epoch 1:  97%|█████████▋| 1716/1770 [07:13<00:17,  3.04it/s, loss=0.138, v_num=7, val_loss=0.0459, val_acc=0.953, train_loss=0.261]     
Validating: 0it [00:00, ?it/s][A
Epoch 1:  97%|█████████▋| 1718/1770 [07:15<00:17,  3.03it/s, loss=0.138, v_num=7, val_loss=0.0459, val_acc=0.953, train_loss=0.261]
Validating:   4%|▎         | 2/54 [00:02<01:09,  1.34s/it][A
Epoch 1:  97%|█████████▋| 1720/1770 [07:17<00:16,  3.02it/s, loss=0.138, v_num=7, val_loss=0.0459, val_acc=0.953, train_loss=0.261]
Validating:   7%|▋         | 4/54 [00:04<00:43,  1.15it/s][A
Epoch 1:  97%|█████████▋| 1722/1770 [07:18<00:15,  3.02it/s, loss=0.138, v_num=7, val_loss=0.0459, val_acc=0.953, train_loss=0.261]
Validating:  11%|█         | 6/54 [00:05<00:33,  1.43it/s][A
Epoch 1:  97%|█████████▋| 1724/1770 [07:19<00:15,  3.01it/s, loss=0.138, v_num=7, val_loss=0.0459, val_acc=0.953, train_loss=0.261]
Validating:  15%|█▍        | 8/54 [00:06<00:29,  1.57it/s][A
Epoch 1:  98%|█████████▊| 1726/1770 [07:20<00:14,  3.

Epoch 1, global step 429: val_loss reached 0.05561 (best 0.05561), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/sgd_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0_89-53_tune/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.056_val_acc=0.953_fold2.ckpt" as top 1


Epoch 1: 100%|██████████| 1770/1770 [07:47<00:00,  2.93it/s, loss=0.138, v_num=7, val_loss=0.0556, val_acc=0.953, train_loss=0.253]
Epoch 2:  97%|█████████▋| 1716/1770 [07:10<00:17,  3.06it/s, loss=0.133, v_num=7, val_loss=0.0556, val_acc=0.953, train_loss=0.369]    
Validating: 0it [00:00, ?it/s][A
Epoch 2:  97%|█████████▋| 1718/1770 [07:13<00:17,  3.04it/s, loss=0.133, v_num=7, val_loss=0.0556, val_acc=0.953, train_loss=0.369]
Validating:   4%|▎         | 2/54 [00:03<01:13,  1.42s/it][A
Epoch 2:  97%|█████████▋| 1720/1770 [07:14<00:16,  3.04it/s, loss=0.133, v_num=7, val_loss=0.0556, val_acc=0.953, train_loss=0.369]
Validating:   7%|▋         | 4/54 [00:04<00:42,  1.17it/s][A
Epoch 2:  97%|█████████▋| 1722/1770 [07:15<00:15,  3.04it/s, loss=0.133, v_num=7, val_loss=0.0556, val_acc=0.953, train_loss=0.369]
Validating:  11%|█         | 6/54 [00:05<00:33,  1.44it/s][A
Epoch 2:  97%|█████████▋| 1724/1770 [07:16<00:15,  3.03it/s, loss=0.133, v_num=7, val_loss=0.0556, val_acc=0.953, tr

Epoch 2, global step 858: val_loss reached 0.05152 (best 0.05152), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/sgd_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0_89-53_tune/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.052_val_acc=0.959_fold2.ckpt" as top 1


Epoch 2: 100%|██████████| 1770/1770 [07:43<00:00,  2.96it/s, loss=0.133, v_num=7, val_loss=0.0515, val_acc=0.959, train_loss=0.168]
                                                           [A

Finding best initial lr: 100%|██████████| 100/100 [17:14<00:00, 10.35s/it]

Epoch 3:   0%|          | 0/1770 [00:00<00:00, -493520.29it/s, loss=0.133, v_num=7, val_loss=0.0515, val_acc=0.959, train_loss=0.168] 




Epoch 3:  97%|█████████▋| 1716/1770 [07:10<00:17,  3.06it/s, loss=0.129, v_num=7, val_loss=0.0515, val_acc=0.959, train_loss=0.0369]   
Validating: 0it [00:00, ?it/s][A
Epoch 3:  97%|█████████▋| 1718/1770 [07:13<00:17,  3.05it/s, loss=0.129, v_num=7, val_loss=0.0515, val_acc=0.959, train_loss=0.0369]
Validating:   4%|▎         | 2/54 [00:02<01:07,  1.30s/it][A
Epoch 3:  97%|█████████▋| 1720/1770 [07:14<00:16,  3.04it/s, loss=0.129, v_num=7, val_loss=0.0515, val_acc=0.959, train_loss=0.0369]
Validating:   7%|▋         | 4/54 [00:04<00:43,  1.14it/s][A
Epoch 3:  97%|█████████▋| 1722/1770 [07:15<00:15,  3.04it/s, loss=0.129, v_num=7, val_loss=0.0515, val_acc=0.959, train_loss=0.0369]
Validating:  11%|█         | 6/54 [00:05<00:33,  1.42it/s][A
Epoch 3:  97%|█████████▋| 1724/1770 [07:16<00:15,  3.03it/s, loss=0.129, v_num=7, val_loss=0.0515, val_acc=0.959, train_loss=0.0369]
Validating:  15%|█▍        | 8/54 [00:06<00:29,  1.57it/s][A
Epoch 3:  98%|█████████▊| 1726/1770 [07:17<00:14, 

Epoch 3, step 1287: val_loss was not in top 1


Epoch 3: 100%|██████████| 1770/1770 [07:43<00:00,  2.96it/s, loss=0.129, v_num=7, val_loss=0.0521, val_acc=0.958, train_loss=0.209] 
Epoch 4:  97%|█████████▋| 1716/1770 [07:10<00:17,  3.06it/s, loss=0.119, v_num=7, val_loss=0.0521, val_acc=0.958, train_loss=0.0483]    
Validating: 0it [00:00, ?it/s][A
Epoch 4:  97%|█████████▋| 1718/1770 [07:12<00:17,  3.05it/s, loss=0.119, v_num=7, val_loss=0.0521, val_acc=0.958, train_loss=0.0483]
Validating:   4%|▎         | 2/54 [00:02<01:09,  1.34s/it][A
Epoch 4:  97%|█████████▋| 1720/1770 [07:14<00:16,  3.04it/s, loss=0.119, v_num=7, val_loss=0.0521, val_acc=0.958, train_loss=0.0483]
Validating:   7%|▋         | 4/54 [00:04<00:42,  1.17it/s][A
Epoch 4:  97%|█████████▋| 1722/1770 [07:15<00:15,  3.04it/s, loss=0.119, v_num=7, val_loss=0.0521, val_acc=0.958, train_loss=0.0483]
Validating:  11%|█         | 6/54 [00:05<00:33,  1.44it/s][A
Epoch 4:  97%|█████████▋| 1724/1770 [07:16<00:15,  3.03it/s, loss=0.119, v_num=7, val_loss=0.0521, val_acc=0.95

Epoch 4, step 1716: val_loss was not in top 1


Epoch 4: 100%|██████████| 1770/1770 [07:43<00:00,  2.96it/s, loss=0.119, v_num=7, val_loss=0.0545, val_acc=0.955, train_loss=0.0947]
Epoch 8:  97%|█████████▋| 1716/1770 [07:10<00:17,  3.06it/s, loss=0.12, v_num=7, val_loss=0.0587, val_acc=0.95, train_loss=0.0706]      
Validating: 0it [00:00, ?it/s][A
Epoch 8:  97%|█████████▋| 1718/1770 [07:12<00:17,  3.05it/s, loss=0.12, v_num=7, val_loss=0.0587, val_acc=0.95, train_loss=0.0706]
Validating:   4%|▎         | 2/54 [00:03<01:14,  1.44s/it][A
Epoch 8:  97%|█████████▋| 1720/1770 [07:14<00:16,  3.04it/s, loss=0.12, v_num=7, val_loss=0.0587, val_acc=0.95, train_loss=0.0706]
Validating:   7%|▋         | 4/54 [00:04<00:43,  1.15it/s][A
Epoch 8:  97%|█████████▋| 1722/1770 [07:15<00:15,  3.04it/s, loss=0.12, v_num=7, val_loss=0.0587, val_acc=0.95, train_loss=0.0706]
Validating:  11%|█         | 6/54 [00:05<00:33,  1.43it/s][A
Epoch 8:  97%|█████████▋| 1724/1770 [07:16<00:15,  3.04it/s, loss=0.12, v_num=7, val_loss=0.0587, val_acc=0.95, train

Epoch 8, step 3432: val_loss was not in top 1


Epoch 8: 100%|██████████| 1770/1770 [07:43<00:00,  2.96it/s, loss=0.12, v_num=7, val_loss=0.0572, val_acc=0.95, train_loss=0.148] 
Epoch 9:  34%|███▍      | 598/1770 [02:30<14:43,  1.33it/s, loss=0.122, v_num=7, val_loss=0.0572, val_acc=0.95, train_loss=0.0307]     

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 12:  97%|█████████▋| 1716/1770 [07:10<00:17,  3.06it/s, loss=0.104, v_num=7, val_loss=0.0643, val_acc=0.949, train_loss=0.00604] 
Validating: 0it [00:00, ?it/s][A
Epoch 12:  97%|█████████▋| 1718/1770 [07:12<00:17,  3.05it/s, loss=0.104, v_num=7, val_loss=0.0643, val_acc=0.949, train_loss=0.00604]
Validating:   4%|▎         | 2/54 [00:03<01:13,  1.41s/it][A
Epoch 12:  97%|█████████▋| 1720/1770 [07:14<00:16,  3.04it/s, loss=0.104, v_num=7, val_loss=0.0643, val_acc=0.949, train_loss=0.00604]
Validating:   7%|▋         | 4/54 [00:04<00:43,  1.15it/s][A
Epoch 12:  97%|█████████▋| 1722/1770 [07:15<00:15,  3.04it/s, loss=0.104, v_num=7, val_loss=0.0643, val_acc=0.949, train_loss=0.00604]
Validating:  11%|█         | 6/54 [00:05<00:33,  1.43it/s][A
Epoch 12:  97%|█████████▋| 1724/1770 [07:16<00:15,  3.04it/s, loss=0.104, v_num=7, val_loss=0.0643, val_acc=0.949, train_loss=0.00604]
Validating:  15%|█▍        | 8/54 [00:06<00:28,  1.59it/s][A
Epoch 12:  98%|█████████▊| 1726/1770 [07:1

Epoch 12, step 5148: val_loss was not in top 1


Epoch 12: 100%|██████████| 1770/1770 [07:43<00:00,  2.96it/s, loss=0.104, v_num=7, val_loss=0.065, val_acc=0.948, train_loss=0.0214]  
                                                           [A

Trainer was signaled to stop but required minimum epochs (15) or minimum steps (None) has not been met. Training will continue...


Epoch 13:   0%|          | 2/1770 [00:00<-1:59:58, -603.67it/s, loss=0.104, v_num=7, val_loss=0.065, val_acc=0.948, train_loss=0.0214] 
Validating: 0it [00:00, ?it/s][A
Validating:   2%|▏         | 1/54 [00:02<01:47,  2.04s/it][A
Epoch 13:   0%|          | 4/1770 [00:03<-1:59:46, -118.14it/s, loss=0.104, v_num=7, val_loss=0.065, val_acc=0.948, train_loss=0.0214]
Validating:   6%|▌         | 3/54 [00:03<00:48,  1.05it/s][A
Epoch 13:   0%|          | 6/1770 [00:04<-1:59:40, -85.80it/s, loss=0.104, v_num=7, val_loss=0.065, val_acc=0.948, train_loss=0.0214] 
Validating:   9%|▉         | 5/54 [00:04<00:35,  1.38it/s][A
Epoch 13:   0%|          | 8/1770 [00:05<-1:59:35, -68.25it/s, loss=0.104, v_num=7, val_loss=0.065, val_acc=0.948, train_loss=0.0214]
Validating:  13%|█▎        | 7/54 [00:05<00:30,  1.56it/s][A
Epoch 13:   1%|          | 10/1770 [00:06<-1:59:29, -56.55it/s, loss=0.104, v_num=7, val_loss=0.065, val_acc=0.948, train_loss=0.0214]
Validating:  17%|█▋        | 9/54 [00:06<00

Epoch 13, step 5149: val_loss was not in top 1


Epoch 13:   3%|▎         | 56/1770 [00:33<-1:57:15, -10.35it/s, loss=0.104, v_num=7, val_loss=0.0668, val_acc=0.945, train_loss=0.118]
                                                           [A

Trainer was signaled to stop but required minimum epochs (15) or minimum steps (None) has not been met. Training will continue...


Epoch 14:   0%|          | 2/1770 [00:00<-1:59:58, -589.92it/s, loss=0.104, v_num=7, val_loss=0.0668, val_acc=0.945, train_loss=0.118]  
Validating: 0it [00:00, ?it/s][A
Validating:   2%|▏         | 1/54 [00:02<01:59,  2.25s/it][A
Epoch 14:   0%|          | 4/1770 [00:03<-1:59:44, -109.77it/s, loss=0.104, v_num=7, val_loss=0.0668, val_acc=0.945, train_loss=0.118]
Validating:   6%|▌         | 3/54 [00:03<00:50,  1.01it/s][A
Epoch 14:   0%|          | 6/1770 [00:04<-1:59:39, -82.09it/s, loss=0.104, v_num=7, val_loss=0.0668, val_acc=0.945, train_loss=0.118] 
Validating:   9%|▉         | 5/54 [00:04<00:36,  1.35it/s][A
Epoch 14:   0%|          | 8/1770 [00:05<-1:59:34, -65.83it/s, loss=0.104, v_num=7, val_loss=0.0668, val_acc=0.945, train_loss=0.118]
Validating:  13%|█▎        | 7/54 [00:05<00:30,  1.54it/s][A
Epoch 14:   1%|          | 10/1770 [00:07<-1:59:28, -54.87it/s, loss=0.104, v_num=7, val_loss=0.0668, val_acc=0.945, train_loss=0.118]
Validating:  17%|█▋        | 9/54 [00:06<0

GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Training fold 3
Class sample counts [ 675 1382 1482 8725 1462]
After class sample counts [2025 2764 3408 8725 3947]
conv_stem.weight True
bn1.weight False
bn1.bias False
blocks.0.0.conv_dw.weight True
blocks.0.0.bn1.weight False
blocks.0.0.bn1.bias False
blocks.0.0.se.conv_reduce.weight True
blocks.0.0.se.conv_reduce.bias True
blocks.0.0.se.conv_expand.weight True
blocks.0.0.se.conv_expand.bias True
blocks.0.0.conv_pw.weight True
blocks.0.0.bn2.weight False
blocks.0.0.bn2.bias False
blocks.0.1.conv_dw.weight True
blocks.0.1.bn1.weight False
blocks.0.1.bn1.bias False
blocks.0.1.se.conv_reduce.weight True
blocks.0.1.se.conv_reduce.bias True
blocks.0.1.se.conv_expand.weight True
blocks.0.1.se.conv_expand.bias True
blocks.0.1.conv_pw.weight True
blocks.0.1.bn2.weight False
blocks.0.1.bn2.bias False
blocks.1.0.conv_pw.weight True
blocks.1.0.bn1.weight False
blocks.1.0.bn1.bias False
blocks.1.0.conv_dw.weight True
blocks.1.0.bn2.weight False
blocks.1.0.bn2.bias False
blocks.1.0.se.conv_reduc


  | Name           | Type           | Params
--------------------------------------------------
0 | valid_accuracy | Accuracy       | 0     
1 | test_accuracy  | Accuracy       | 0     
2 | criterion      | BiTemperedLoss | 0     
3 | model          | EfficientNet   | 17.6 M
--------------------------------------------------
17.4 M    Trainable params
125 K     Non-trainable params
17.6 M    Total params

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s][A
Finding best initial lr:   1%|          | 1/100 [00:00<00:26,  3.81it/s][A
Finding best initial lr:   2%|▏         | 2/100 [00:01<01:07,  1.45it/s][A
Finding best initial lr:   3%|▎         | 3/100 [00:02<01:20,  1.21it/s][A
Finding best initial lr:   4%|▍         | 4/100 [00:03<01:25,  1.12it/s][A
Finding best initial lr:   5%|▌         | 5/100 [00:04<01:28,  1.08it/s][A
Finding best initial lr:   6%|▌         | 6/100 [00:05<01:29,  1.06it/s][A
Finding best initial lr:   7%|▋         | 7/100 [00:06<01:29,  1.0

Epoch 14:   3%|▎         | 56/1770 [00:52<-1:55:36, -6.49it/s, loss=0.104, v_num=7, val_loss=0.0666, val_acc=0.946, train_loss=0.00989] 


Finding best initial lr:  14%|█▍        | 14/100 [00:13<01:25,  1.01it/s][A
Finding best initial lr:  15%|█▌        | 15/100 [00:14<01:24,  1.01it/s][A
Finding best initial lr:  16%|█▌        | 16/100 [00:15<01:23,  1.01it/s][A
Finding best initial lr:  17%|█▋        | 17/100 [00:16<01:22,  1.01it/s][A
Finding best initial lr:  18%|█▊        | 18/100 [00:17<01:21,  1.01it/s][A
Finding best initial lr:  19%|█▉        | 19/100 [00:18<01:20,  1.01it/s][A
Finding best initial lr:  20%|██        | 20/100 [00:19<01:19,  1.00it/s][A
Finding best initial lr:  21%|██        | 21/100 [00:20<01:18,  1.00it/s][A
Finding best initial lr:  22%|██▏       | 22/100 [00:21<01:17,  1.01it/s][A
Finding best initial lr:  23%|██▎       | 23/100 [00:22<01:16,  1.01it/s][A
Finding best initial lr:  24%|██▍       | 24/100 [00:23<01:15,  1.01it/s][A
Finding best initial lr:  25%|██▌       | 25/100 [00:24<01:14,  1.01it/s][A
Finding best initial lr:  26%|██▌       | 26/100 [00:25<01:13,  1.01it/s][

Epoch 1:  51%|█████     | 903/1770 [03:47<06:30,  2.22it/s, loss=0.154, v_num=7, val_loss=0.0561, val_acc=0.945, train_loss=0.109]     

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 4:  97%|█████████▋| 1716/1770 [07:10<00:17,  3.06it/s, loss=0.171, v_num=7, val_loss=0.0445, val_acc=0.967, train_loss=0.0936] 
Validating: 0it [00:00, ?it/s][A
Epoch 4:  97%|█████████▋| 1718/1770 [07:13<00:17,  3.04it/s, loss=0.171, v_num=7, val_loss=0.0445, val_acc=0.967, train_loss=0.0936]
Validating:   4%|▎         | 2/54 [00:03<01:11,  1.37s/it][A
Epoch 4:  97%|█████████▋| 1720/1770 [07:14<00:16,  3.04it/s, loss=0.171, v_num=7, val_loss=0.0445, val_acc=0.967, train_loss=0.0936]
Validating:   7%|▋         | 4/54 [00:04<00:43,  1.15it/s][A
Epoch 4:  97%|█████████▋| 1722/1770 [07:15<00:15,  3.04it/s, loss=0.171, v_num=7, val_loss=0.0445, val_acc=0.967, train_loss=0.0936]
Validating:  11%|█         | 6/54 [00:05<00:33,  1.43it/s][A
Epoch 4:  97%|█████████▋| 1724/1770 [07:16<00:15,  3.03it/s, loss=0.171, v_num=7, val_loss=0.0445, val_acc=0.967, train_loss=0.0936]
Validating:  15%|█▍        | 8/54 [00:06<00:28,  1.59it/s][A
Epoch 4:  98%|█████████▊| 1726/1770 [07:18<00:14,  3

Epoch 4, step 1716: val_loss was not in top 1


Epoch 4: 100%|██████████| 1770/1770 [07:43<00:00,  2.96it/s, loss=0.171, v_num=7, val_loss=0.0464, val_acc=0.964, train_loss=0.182] 
Epoch 5:  97%|█████████▋| 1716/1770 [07:10<00:17,  3.06it/s, loss=0.157, v_num=7, val_loss=0.0464, val_acc=0.964, train_loss=0.227]    
Validating: 0it [00:00, ?it/s][A
Epoch 5:  97%|█████████▋| 1718/1770 [07:12<00:17,  3.05it/s, loss=0.157, v_num=7, val_loss=0.0464, val_acc=0.964, train_loss=0.227]
Validating:   4%|▎         | 2/54 [00:03<01:10,  1.36s/it][A
Epoch 5:  97%|█████████▋| 1720/1770 [07:14<00:16,  3.04it/s, loss=0.157, v_num=7, val_loss=0.0464, val_acc=0.964, train_loss=0.227]
Validating:   7%|▋         | 4/54 [00:04<00:43,  1.16it/s][A
Epoch 5:  97%|█████████▋| 1722/1770 [07:15<00:15,  3.04it/s, loss=0.157, v_num=7, val_loss=0.0464, val_acc=0.964, train_loss=0.227]
Validating:  11%|█         | 6/54 [00:05<00:33,  1.44it/s][A
Epoch 5:  97%|█████████▋| 1724/1770 [07:16<00:15,  3.04it/s, loss=0.157, v_num=7, val_loss=0.0464, val_acc=0.964, t

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)




Epoch 12:   3%|▎         | 52/1770 [00:31<-1:57:25, -11.06it/s, loss=0.163, v_num=7, val_loss=0.0468, val_acc=0.966, train_loss=0.253]
Validating:  94%|█████████▍| 51/54 [00:31<00:01,  1.79it/s][A
Epoch 12:   3%|▎         | 54/1770 [00:32<-1:57:19, -10.62it/s, loss=0.163, v_num=7, val_loss=0.0468, val_acc=0.966, train_loss=0.253]
Validating:  98%|█████████▊| 53/54 [00:32<00:00,  1.79it/s][A
Epoch 12:   3%|▎         | 56/1770 [00:33<-1:57:13, -10.24it/s, loss=0.163, v_num=7, val_loss=0.0471, val_acc=0.967, train_loss=0.0989]
                                                           [A

Trainer was signaled to stop but required minimum epochs (15) or minimum steps (None) has not been met. Training will continue...


Epoch 13:   0%|          | 2/1770 [00:00<-1:59:58, -602.95it/s, loss=0.163, v_num=7, val_loss=0.0471, val_acc=0.967, train_loss=0.0989] 
Validating: 0it [00:00, ?it/s][A
Validating:   2%|▏         | 1/54 [00:01<01:42,  1.93s/it][A
Epoch 13:   0%|          | 4/1770 [00:03<-1:59:46, -122.91it/s, loss=0.163, v_num=7, val_loss=0.0471, val_acc=0.967, train_loss=0.0989]
Validating:   6%|▌         | 3/54 [00:03<00:46,  1.09it/s][A
Epoch 13:   0%|          | 6/1770 [00:04<-1:59:40, -86.77it/s, loss=0.163, v_num=7, val_loss=0.0471, val_acc=0.967, train_loss=0.0989] 
Validating:   9%|▉         | 5/54 [00:04<00:36,  1.34it/s][A
Epoch 13:   0%|          | 8/1770 [00:05<-1:59:35, -68.41it/s, loss=0.163, v_num=7, val_loss=0.0471, val_acc=0.967, train_loss=0.0989]
Validating:  13%|█▎        | 7/54 [00:05<00:30,  1.53it/s][A
Epoch 13:   1%|          | 10/1770 [00:06<-1:59:29, -56.53it/s, loss=0.163, v_num=7, val_loss=0.0471, val_acc=0.967, train_loss=0.0989]
Validating:  17%|█▋        | 9/54 [00:

Trainer was signaled to stop but required minimum epochs (15) or minimum steps (None) has not been met. Training will continue...


Epoch 14:   0%|          | 2/1770 [00:00<-1:59:57, -585.84it/s, loss=0.163, v_num=7, val_loss=0.0462, val_acc=0.968, train_loss=0.0382] 
Validating: 0it [00:00, ?it/s][A
Validating:   2%|▏         | 1/54 [00:02<02:09,  2.44s/it][A
Epoch 14:   0%|          | 4/1770 [00:03<-1:59:44, -104.68it/s, loss=0.163, v_num=7, val_loss=0.0462, val_acc=0.968, train_loss=0.0382]
Validating:   6%|▌         | 3/54 [00:03<00:52,  1.02s/it][A
Epoch 14:   0%|          | 6/1770 [00:04<-1:59:38, -79.61it/s, loss=0.163, v_num=7, val_loss=0.0462, val_acc=0.968, train_loss=0.0382] 
Validating:   9%|▉         | 5/54 [00:04<00:36,  1.33it/s][A
Epoch 14:   0%|          | 8/1770 [00:06<-1:59:33, -64.17it/s, loss=0.163, v_num=7, val_loss=0.0462, val_acc=0.968, train_loss=0.0382]
Validating:  13%|█▎        | 7/54 [00:05<00:30,  1.53it/s][A
Epoch 14:   1%|          | 10/1770 [00:07<-1:59:28, -53.70it/s, loss=0.163, v_num=7, val_loss=0.0462, val_acc=0.968, train_loss=0.0382]
Validating:  17%|█▋        | 9/54 [00:

GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Training fold 4
Class sample counts [ 675 1382 1482 8725 1462]
After class sample counts [2025 2764 3408 8725 3947]
conv_stem.weight True
bn1.weight False
bn1.bias False
blocks.0.0.conv_dw.weight True
blocks.0.0.bn1.weight False
blocks.0.0.bn1.bias False
blocks.0.0.se.conv_reduce.weight True
blocks.0.0.se.conv_reduce.bias True
blocks.0.0.se.conv_expand.weight True
blocks.0.0.se.conv_expand.bias True
blocks.0.0.conv_pw.weight True
blocks.0.0.bn2.weight False
blocks.0.0.bn2.bias False
blocks.0.1.conv_dw.weight True
blocks.0.1.bn1.weight False
blocks.0.1.bn1.bias False
blocks.0.1.se.conv_reduce.weight True
blocks.0.1.se.conv_reduce.bias True
blocks.0.1.se.conv_expand.weight True
blocks.0.1.se.conv_expand.bias True
blocks.0.1.conv_pw.weight True
blocks.0.1.bn2.weight False
blocks.0.1.bn2.bias False
blocks.1.0.conv_pw.weight True
blocks.1.0.bn1.weight False
blocks.1.0.bn1.bias False
blocks.1.0.conv_dw.weight True
blocks.1.0.bn2.weight False
blocks.1.0.bn2.bias False
blocks.1.0.se.conv_reduc


  | Name           | Type           | Params
--------------------------------------------------
0 | valid_accuracy | Accuracy       | 0     
1 | test_accuracy  | Accuracy       | 0     
2 | criterion      | BiTemperedLoss | 0     
3 | model          | EfficientNet   | 17.6 M
--------------------------------------------------
17.4 M    Trainable params
125 K     Non-trainable params
17.6 M    Total params

Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s][A
Finding best initial lr:   1%|          | 1/100 [00:00<00:25,  3.85it/s][A
Finding best initial lr:   2%|▏         | 2/100 [00:01<01:07,  1.45it/s][A
Finding best initial lr:   3%|▎         | 3/100 [00:02<01:20,  1.21it/s][A
Finding best initial lr:   4%|▍         | 4/100 [00:03<01:25,  1.12it/s][A
Finding best initial lr:   5%|▌         | 5/100 [00:04<01:27,  1.08it/s][A
Finding best initial lr:   6%|▌         | 6/100 [00:05<01:28,  1.06it/s][A
Finding best initial lr:   7%|▋         | 7/100 [00:06<01:29,  1.0

Epoch 14:   3%|▎         | 56/1770 [00:52<-1:55:38, -6.52it/s, loss=0.163, v_num=7, val_loss=0.0463, val_acc=0.968, train_loss=0.0952] 


Finding best initial lr:  13%|█▎        | 13/100 [00:12<01:26,  1.01it/s][A
Finding best initial lr:  14%|█▍        | 14/100 [00:13<01:25,  1.01it/s][A
Finding best initial lr:  15%|█▌        | 15/100 [00:14<01:24,  1.01it/s][A
Finding best initial lr:  16%|█▌        | 16/100 [00:15<01:23,  1.01it/s][A
Finding best initial lr:  17%|█▋        | 17/100 [00:16<01:22,  1.01it/s][A
Finding best initial lr:  18%|█▊        | 18/100 [00:17<01:21,  1.01it/s][A
Finding best initial lr:  19%|█▉        | 19/100 [00:18<01:20,  1.01it/s][A
Finding best initial lr:  20%|██        | 20/100 [00:19<01:19,  1.00it/s][A
Finding best initial lr:  21%|██        | 21/100 [00:20<01:18,  1.00it/s][A
Finding best initial lr:  22%|██▏       | 22/100 [00:21<01:17,  1.00it/s][A
Finding best initial lr:  23%|██▎       | 23/100 [00:22<01:16,  1.01it/s][A
Finding best initial lr:  24%|██▍       | 24/100 [00:23<01:15,  1.01it/s][A
Finding best initial lr:  25%|██▌       | 25/100 [00:24<01:14,  1.01it/s][

Epoch 1:  64%|██████▎   | 1128/1770 [04:44<04:10,  2.56it/s, loss=0.163, v_num=7, val_loss=0.0543, val_acc=0.977, train_loss=0.058]    

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 4:  97%|█████████▋| 1718/1770 [07:12<00:17,  3.05it/s, loss=0.158, v_num=7, val_loss=0.0706, val_acc=0.948, train_loss=0.048]
Validating:   4%|▎         | 2/54 [00:02<01:07,  1.29s/it][A
Epoch 4:  97%|█████████▋| 1720/1770 [07:13<00:16,  3.05it/s, loss=0.158, v_num=7, val_loss=0.0706, val_acc=0.948, train_loss=0.048]
Validating:   7%|▋         | 4/54 [00:04<00:42,  1.19it/s][A
Epoch 4:  97%|█████████▋| 1722/1770 [07:14<00:15,  3.04it/s, loss=0.158, v_num=7, val_loss=0.0706, val_acc=0.948, train_loss=0.048]
Validating:  11%|█         | 6/54 [00:05<00:32,  1.46it/s][A
Epoch 4:  97%|█████████▋| 1724/1770 [07:15<00:15,  3.04it/s, loss=0.158, v_num=7, val_loss=0.0706, val_acc=0.948, train_loss=0.048]
Validating:  15%|█▍        | 8/54 [00:06<00:28,  1.60it/s][A
Epoch 4:  98%|█████████▊| 1726/1770 [07:16<00:14,  3.04it/s, loss=0.158, v_num=7, val_loss=0.0706, val_acc=0.948, train_loss=0.048]
Validating:  19%|█▊        | 10/54 [00:07<00:26,  1.68it/s][A
Epoch 4:  98%|█████████▊| 172

Epoch 4, global step 1716: val_loss reached 0.06533 (best 0.06533), saving model to "/opt/favordata/AI/Felix/kaggle-cassava/trained-models/sgd_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0_89-53_tune/tf_efficientnet_b4_ns_bitempered_smooth=0.05_val_loss=0.065_val_acc=0.951_fold4.ckpt" as top 1


Epoch 4: 100%|██████████| 1770/1770 [07:42<00:00,  2.96it/s, loss=0.158, v_num=7, val_loss=0.0653, val_acc=0.951, train_loss=0.202]
Epoch 5:  97%|█████████▋| 1716/1770 [07:10<00:17,  3.06it/s, loss=0.191, v_num=7, val_loss=0.0653, val_acc=0.951, train_loss=0.014]    
Validating: 0it [00:00, ?it/s][A
Epoch 5:  97%|█████████▋| 1718/1770 [07:12<00:17,  3.05it/s, loss=0.191, v_num=7, val_loss=0.0653, val_acc=0.951, train_loss=0.014]
Validating:   4%|▎         | 2/54 [00:02<01:09,  1.34s/it][A
Epoch 5:  97%|█████████▋| 1720/1770 [07:14<00:16,  3.04it/s, loss=0.191, v_num=7, val_loss=0.0653, val_acc=0.951, train_loss=0.014]
Validating:   7%|▋         | 4/54 [00:04<00:41,  1.19it/s][A
Epoch 5:  97%|█████████▋| 1722/1770 [07:15<00:15,  3.04it/s, loss=0.191, v_num=7, val_loss=0.0653, val_acc=0.951, train_loss=0.014]
Validating:  11%|█         | 6/54 [00:05<00:32,  1.46it/s][A
Epoch 5:  97%|█████████▋| 1724/1770 [07:16<00:15,  3.04it/s, loss=0.191, v_num=7, val_loss=0.0653, val_acc=0.951, tr

Epoch 5, step 2145: val_loss was not in top 1


Epoch 5: 100%|██████████| 1770/1770 [07:42<00:00,  2.96it/s, loss=0.191, v_num=7, val_loss=0.0763, val_acc=0.939, train_loss=0.0875]
Epoch 6:   1%|          | 22/1770 [00:05<-1:59:33, -62.90it/s, loss=0.191, v_num=7, val_loss=0.0763, val_acc=0.939, train_loss=0.225]  

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 9:  97%|█████████▋| 1716/1770 [07:10<00:17,  3.06it/s, loss=0.2, v_num=7, val_loss=0.0677, val_acc=0.951, train_loss=0.0123]   
Validating: 0it [00:00, ?it/s][A
Epoch 9:  97%|█████████▋| 1718/1770 [07:12<00:17,  3.05it/s, loss=0.2, v_num=7, val_loss=0.0677, val_acc=0.951, train_loss=0.0123]
Validating:   4%|▎         | 2/54 [00:02<01:10,  1.36s/it][A
Epoch 9:  97%|█████████▋| 1720/1770 [07:14<00:16,  3.04it/s, loss=0.2, v_num=7, val_loss=0.0677, val_acc=0.951, train_loss=0.0123]
Validating:   7%|▋         | 4/54 [00:04<00:43,  1.16it/s][A
Epoch 9:  97%|█████████▋| 1722/1770 [07:15<00:15,  3.04it/s, loss=0.2, v_num=7, val_loss=0.0677, val_acc=0.951, train_loss=0.0123]
Validating:  11%|█         | 6/54 [00:05<00:33,  1.44it/s][A
Epoch 9:  97%|█████████▋| 1724/1770 [07:16<00:15,  3.03it/s, loss=0.2, v_num=7, val_loss=0.0677, val_acc=0.951, train_loss=0.0123]
Validating:  15%|█▍        | 8/54 [00:06<00:28,  1.60it/s][A
Epoch 9:  98%|█████████▊| 1726/1770 [07:17<00:14,  3.03it/s,

Epoch 9, step 3861: val_loss was not in top 1


Epoch 9: 100%|██████████| 1770/1770 [07:43<00:00,  2.96it/s, loss=0.2, v_num=7, val_loss=0.0655, val_acc=0.951, train_loss=0.238] 
Epoch 10:  97%|█████████▋| 1716/1770 [07:11<00:17,  3.06it/s, loss=0.18, v_num=7, val_loss=0.0655, val_acc=0.951, train_loss=0.146]     
Validating: 0it [00:00, ?it/s][A
Epoch 10:  97%|█████████▋| 1718/1770 [07:13<00:17,  3.05it/s, loss=0.18, v_num=7, val_loss=0.0655, val_acc=0.951, train_loss=0.146]
Validating:   4%|▎         | 2/54 [00:02<01:03,  1.22s/it][A
Epoch 10:  97%|█████████▋| 1720/1770 [07:14<00:16,  3.04it/s, loss=0.18, v_num=7, val_loss=0.0655, val_acc=0.951, train_loss=0.146]
Validating:   7%|▋         | 4/54 [00:04<00:43,  1.16it/s][A
Epoch 10:  97%|█████████▋| 1722/1770 [07:15<00:15,  3.04it/s, loss=0.18, v_num=7, val_loss=0.0655, val_acc=0.951, train_loss=0.146]
Validating:  11%|█         | 6/54 [00:05<00:33,  1.44it/s][A
Epoch 10:  97%|█████████▋| 1724/1770 [07:16<00:15,  3.03it/s, loss=0.18, v_num=7, val_loss=0.0655, val_acc=0.951, tr

Epoch 10, step 4290: val_loss was not in top 1


Epoch 10: 100%|██████████| 1770/1770 [07:43<00:00,  2.96it/s, loss=0.18, v_num=7, val_loss=0.0747, val_acc=0.942, train_loss=0.149]
Epoch 11:  97%|█████████▋| 1716/1770 [07:10<00:17,  3.06it/s, loss=0.188, v_num=7, val_loss=0.0747, val_acc=0.942, train_loss=0.476]    
Validating: 0it [00:00, ?it/s][A
Epoch 11:  97%|█████████▋| 1718/1770 [07:12<00:17,  3.05it/s, loss=0.188, v_num=7, val_loss=0.0747, val_acc=0.942, train_loss=0.476]
Validating:   4%|▎         | 2/54 [00:02<01:08,  1.32s/it][A
Epoch 11:  97%|█████████▋| 1720/1770 [07:14<00:16,  3.04it/s, loss=0.188, v_num=7, val_loss=0.0747, val_acc=0.942, train_loss=0.476]
Validating:   7%|▋         | 4/54 [00:04<00:42,  1.19it/s][A
Epoch 11:  97%|█████████▋| 1722/1770 [07:15<00:15,  3.04it/s, loss=0.188, v_num=7, val_loss=0.0747, val_acc=0.942, train_loss=0.476]
Validating:  11%|█         | 6/54 [00:05<00:32,  1.46it/s][A
Epoch 11:  97%|█████████▋| 1724/1770 [07:16<00:15,  3.04it/s, loss=0.188, v_num=7, val_loss=0.0747, val_acc=0.94

Epoch 11, step 4719: val_loss was not in top 1


Epoch 11: 100%|██████████| 1770/1770 [07:42<00:00,  2.96it/s, loss=0.188, v_num=7, val_loss=0.0674, val_acc=0.949, train_loss=0.152]
Epoch 12:  97%|█████████▋| 1716/1770 [07:11<00:17,  3.05it/s, loss=0.181, v_num=7, val_loss=0.0674, val_acc=0.949, train_loss=0.298]    
Validating: 0it [00:00, ?it/s][A
Epoch 12:  97%|█████████▋| 1718/1770 [07:13<00:17,  3.04it/s, loss=0.181, v_num=7, val_loss=0.0674, val_acc=0.949, train_loss=0.298]
Validating:   4%|▎         | 2/54 [00:03<01:10,  1.36s/it][A
Epoch 12:  97%|█████████▋| 1720/1770 [07:14<00:16,  3.04it/s, loss=0.181, v_num=7, val_loss=0.0674, val_acc=0.949, train_loss=0.298]
Validating:   7%|▋         | 4/54 [00:04<00:41,  1.19it/s][A
Epoch 12:  97%|█████████▋| 1722/1770 [07:15<00:15,  3.04it/s, loss=0.181, v_num=7, val_loss=0.0674, val_acc=0.949, train_loss=0.298]
Validating:  11%|█         | 6/54 [00:05<00:32,  1.46it/s][A
Epoch 12:  97%|█████████▋| 1724/1770 [07:17<00:15,  3.03it/s, loss=0.181, v_num=7, val_loss=0.0674, val_acc=0.9

Epoch 12, step 5148: val_loss was not in top 1


Epoch 12: 100%|██████████| 1770/1770 [07:43<00:00,  2.96it/s, loss=0.181, v_num=7, val_loss=0.075, val_acc=0.939, train_loss=0.789] 
Epoch 13:  97%|█████████▋| 1716/1770 [07:10<00:17,  3.06it/s, loss=0.186, v_num=7, val_loss=0.075, val_acc=0.939, train_loss=0.298]    
Validating: 0it [00:00, ?it/s][A
Epoch 13:  97%|█████████▋| 1718/1770 [07:13<00:17,  3.05it/s, loss=0.186, v_num=7, val_loss=0.075, val_acc=0.939, train_loss=0.298]
Validating:   4%|▎         | 2/54 [00:02<01:09,  1.33s/it][A
Epoch 13:  97%|█████████▋| 1720/1770 [07:14<00:16,  3.04it/s, loss=0.186, v_num=7, val_loss=0.075, val_acc=0.939, train_loss=0.298]
Validating:   7%|▋         | 4/54 [00:04<00:42,  1.18it/s][A
Epoch 13:  97%|█████████▋| 1722/1770 [07:15<00:15,  3.04it/s, loss=0.186, v_num=7, val_loss=0.075, val_acc=0.939, train_loss=0.298]
Validating:  11%|█         | 6/54 [00:05<00:32,  1.46it/s][A
Epoch 13:  97%|█████████▋| 1724/1770 [07:16<00:15,  3.03it/s, loss=0.186, v_num=7, val_loss=0.075, val_acc=0.939, t

Epoch 13, step 5577: val_loss was not in top 1


Epoch 13: 100%|██████████| 1770/1770 [07:43<00:00,  2.96it/s, loss=0.186, v_num=7, val_loss=0.0743, val_acc=0.941, train_loss=0.0467]
                                                           [A

Trainer was signaled to stop but required minimum epochs (15) or minimum steps (None) has not been met. Training will continue...


Epoch 14:   0%|          | 2/1770 [00:00<-1:59:58, -602.12it/s, loss=0.186, v_num=7, val_loss=0.0743, val_acc=0.941, train_loss=0.0467]  
Validating: 0it [00:00, ?it/s][A
Validating:   2%|▏         | 1/54 [00:02<02:02,  2.31s/it][A
Epoch 14:   0%|          | 4/1770 [00:03<-1:59:44, -107.19it/s, loss=0.186, v_num=7, val_loss=0.0743, val_acc=0.941, train_loss=0.0467]
Validating:   6%|▌         | 3/54 [00:03<00:51,  1.01s/it][A
Epoch 14:   0%|          | 6/1770 [00:04<-1:59:39, -81.16it/s, loss=0.186, v_num=7, val_loss=0.0743, val_acc=0.941, train_loss=0.0467] 
Validating:   9%|▉         | 5/54 [00:04<00:36,  1.35it/s][A
Epoch 14:   0%|          | 8/1770 [00:05<-1:59:34, -65.31it/s, loss=0.186, v_num=7, val_loss=0.0743, val_acc=0.941, train_loss=0.0467]
Validating:  13%|█▎        | 7/54 [00:05<00:30,  1.55it/s][A
Epoch 14:   1%|          | 10/1770 [00:07<-1:59:28, -54.57it/s, loss=0.186, v_num=7, val_loss=0.0743, val_acc=0.941, train_loss=0.0467]
Validating:  17%|█▋        | 9/54 [00

Epoch 14, step 5578: val_loss was not in top 1


Epoch 14:   3%|▎         | 56/1770 [00:33<-1:57:15, -10.33it/s, loss=0.186, v_num=7, val_loss=0.0736, val_acc=0.943, train_loss=0.394] 
                                                           [A