# TensorFlow ArcFace

### Ref https://github.com/peteryuX/arcface-tf2

In [1]:
from insightface.recognition.arcface_torch.dataset import MXFaceDataset, DataLoaderX
import torch.utils.data.distributed
import torch.distributed as dist
from easydict import EasyDict as edict
import time
import os

cfg = edict()
cfg.rec = "/raid/workspace/honghee/faces_emore/"
local_rank = 0
cfg.batch_size = 64

try:
    world_size = int(os.environ['WORLD_SIZE'])
    rank = int(os.environ['RANK'])
    dist_url = "tcp://{}:{}".format(os.environ["MASTER_ADDR"], os.environ["MASTER_PORT"])
except KeyError:
    world_size = 1
    rank = 0
    dist_url = "tcp://127.0.0.1:12584"

os.environ["CUDA_VISIBLE_DEVICES"] = "1"
dist.init_process_group(backend='nccl', init_method=dist_url, rank=rank, world_size=world_size)
torch.cuda.set_device(local_rank)    

A100-SXM4-40GB with CUDA capability sm_80 is not compatible with the current PyTorch installation.
The current PyTorch install supports CUDA capabilities sm_37 sm_50 sm_60 sm_70 sm_75.
If you want to use the A100-SXM4-40GB GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/



In [2]:
!echo $CUDA_VISIBLE_DEVICES

1


In [3]:
train_set = MXFaceDataset(root_dir=cfg.rec, local_rank=local_rank)
train_sampler = torch.utils.data.distributed.DistributedSampler(
    train_set, shuffle=True)
train_loader = DataLoaderX(
    local_rank=local_rank, dataset=train_set, batch_size=cfg.batch_size,
    sampler=train_sampler, num_workers=2, pin_memory=True, drop_last=True)

In [4]:
# for step, (x_batch_train, y_batch_train) in enumerate(train_loader):
#     x_batch_train = x_batch_train.cpu().numpy()
#     y_batch_train = y_batch_train.cpu().numpy()
#     print(y_batch_train)
#     time.sleep(1)

In [5]:
from tensorflow.keras.callbacks import LearningRateScheduler, EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
from models import ArcFaceModel
# from losses import SoftmaxLoss
from losses import softmax_loss
import dataset
import tensorflow as tf
import os
import logging

tf.get_logger().setLevel(logging.ERROR)

In [6]:
### IJB-C Dataset
# batch_size = 128
# input_size = 112
# embd_shape = 512
# head_type = 'ArcHead'
# backbone_type = 'MobileNetV2'
# w_decay=5e-4
# num_classes = 3584 
# base_lr = 0.01
# dataset_len = 13033 
# epochs = 100
# steps_per_epoch = dataset_len // batch_size

### MS1M dataset
num_classes = 85742 
dataset_len = 5822653
batch_size = 64 # Initially 128
input_size = 112
embd_shape = 512
train_size = int(0.8 * dataset_len)
steps_per_epoch = train_size // batch_size
val_size = dataset_len - train_size
validation_steps = val_size // batch_size

w_decay=5e-4
epochs = 100

save_steps = 1000
steps = 1
is_ccrop=False
binary_img=True

is_Adam = False   # True
projection_head = False  # True
dgx = True

head_type = 'ArcHead' # ''ArcHead', CosHead', 'SphereHead', 'CurHead', 'AdaHead', CadHead'  
# Backbones w/ pretrained weights:
#     MobileNet, MobileNetV2, InceptionResNetV2, InceptionV3, ResNet50, ResNet50V2, ResNet101V2, NASNetLarge, NASNetMobile, Xception, MobileNetV3Large, MobileNetV3Small, EfficientNetLite0~6, EfficientNetB0~7
# Backbones w/o pretrained weights:
#      MnasNetA1, MnasNetB1, MnasNetSmall 
backbone_type = 'ResNet50' 

if head_type == 'SphereHead':
    base_lr = 0.01 
    margin = 1.35
    logist_scale = 30.0 
elif head_type == 'CosHead':
    base_lr = 0.01 
    margin=0.35
    logist_scale=64
elif head_type == 'ArcHead':
    base_lr = 0.1 
    margin=0.5
    logist_scale=64
elif head_type == 'CurHead': 
    base_lr = 0.1 
    margin=0.5
    logist_scale=64
elif head_type == 'CadHead': 
    base_lr = 0.1 
    margin=0.0
    logist_scale=64
elif head_type == 'AdaHead':
    base_lr = 0.1 
    margin=0.0
    logist_scale=64
else:
    base_lr = 0.01 # initially 0.01
    
print(head_type)
print(backbone_type)
print("projection head:", projection_head)
print("Adam:", is_Adam)
print("epoch:", epochs)
print("batch size:", batch_size)

ArcHead
ResNet50
projection head: False
Adam: False
epoch: 100
batch size: 64


In [7]:
model = ArcFaceModel(size=input_size,
                         backbone_type=backbone_type,
                         num_classes=num_classes,
                         margin=margin, 
                         logist_scale=logist_scale,
                         head_type=head_type,
                         embd_shape=embd_shape,
                         w_decay=w_decay,
                         training=True,
                         projection_head=projection_head)
model.summary()

learning_rate = tf.constant(base_lr)
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(learning_rate,
                                                             decay_steps=300000, 
                                               decay_rate=0.1,staircase=True)
if is_Adam:
    optimizer = tf.keras.optimizers.Adam(
        learning_rate=lr_schedule)
else:
#         optimizer = tf.keras.optimizers.SGD(
#             learning_rate=learning_rate, momentum=0.9)
    optimizer = tf.keras.optimizers.SGD(
        learning_rate=lr_schedule, momentum=0.9)

# checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model)
# manager = tf.train.CheckpointManager(
#     checkpoint, directory="tmp/ckpt", max_to_keep=5)
# status = checkpoint.restore(manager.latest_checkpoint)

# Prepare the metrics.
train_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()
val_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()

#     model.compile(optimizer=optimizer, loss=loss_fn)

Model: "arcface_model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_image (InputLayer)        [(None, 112, 112, 3) 0                                            
__________________________________________________________________________________________________
resnet50 (Functional)           (None, 4, 4, 2048)   23587712    input_image[0][0]                
__________________________________________________________________________________________________
OutputLayer (Functional)        (None, 512)          16787968    resnet50[0][0]                   
__________________________________________________________________________________________________
label (InputLayer)              [(None,)]            0                                            
______________________________________________________________________________________

In [None]:
import time
from absl import app, flags, logging
from absl.flags import FLAGS
import cv2
import os

import numpy as np
import tensorflow as tf
from glob import glob

from evaluations import get_val_data, perform_val
from models import ArcFaceModel
from utils import set_memory_growth, load_yaml


lfw, agedb_30, cfp_fp, lfw_issame, agedb_30_issame, cfp_fp_issame = get_val_data("/raid/workspace/honghee/")

loss_fn = softmax_loss
epochs = 25

best_val_acc = 0

for epoch in range(epochs):
    print("======================================================")
    print("================= Start of epoch %d ==================" % (epoch,))
    print("======================================================")
    start_time = time.time()

    # Iterate over the batches of the dataset.
    for step, (x_batch_train, y_batch_train) in enumerate(train_loader):
        x_batch_train = x_batch_train.cpu().numpy()
        y_batch_train = y_batch_train.cpu().numpy()
        with tf.GradientTape() as tape:
            logits = model((x_batch_train, y_batch_train), training=True)
            loss_value = loss_fn(y_batch_train, logits)
        grads = tape.gradient(loss_value, model.trainable_weights)
        optimizer.apply_gradients(zip(grads, model.trainable_weights))

        # Update training metric.
        train_acc_metric.update_state(y_batch_train, logits)

        # Log every 5000 batches.
        if step % 5000 == 0:
            print(
                "Training loss (for one batch) at step %d: %.4f"
                % (step, float(loss_value))
            )
            print("Seen so far: %d samples" % ((step + 1) * batch_size))
            model.save_weights("tmp/ckpt-h5/ckpt-1.h5")
            
            # Run a validation loop at the end of each epoch.
            best_checkpoint = "tmp/ckpt-h5/ckpt-1.h5"
            vf_model = ArcFaceModel(size=input_size,
                                 backbone_type=backbone_type,               
                                 training=False)
            vf_model.load_weights(best_checkpoint, by_name=True, skip_mismatch= True)

            acc_lfw, best_th = perform_val(
                embd_shape, batch_size, vf_model, lfw, lfw_issame,
                is_ccrop=False)
            val_acc = acc_lfw
            print("Validation acc: %.4f" % (float(val_acc),))
            print("Time taken: %.2fs" % (time.time() - start_time))
            if best_val_acc < val_acc:
                best_val_acc = val_acc
                model.save_weights("tmp/ckpt-h5/ckpt-best.h5")               
            
            # Display metrics at the end of each epoch.
            train_acc = train_acc_metric.result()
            print("Training acc over epoch: %.4f" % (float(train_acc),))
            # Reset training metrics at the end of each epoch
            train_acc_metric.reset_states()
    model.save_weights(f'tmp/ckpt-h5/epoch-{epoch}.h5')

Training loss (for one batch) at step 0: 45.2470
Seen so far: 64 samples


100%|██████████| 188/188 [00:19<00:00,  9.48it/s]


Validation acc: 0.5000
Time taken: 39.46s
Training acc over epoch: 0.0000
Training loss (for one batch) at step 5000: 45.5688
Seen so far: 320064 samples


100%|██████████| 188/188 [00:18<00:00, 10.11it/s]


Validation acc: 0.5000
Time taken: 744.41s
Training acc over epoch: 0.0000
Training loss (for one batch) at step 10000: 45.3880
Seen so far: 640064 samples


100%|██████████| 188/188 [00:19<00:00,  9.80it/s]


Validation acc: 0.5000
Time taken: 1446.08s
Training acc over epoch: 0.0000
Training loss (for one batch) at step 15000: 42.9450
Seen so far: 960064 samples


100%|██████████| 188/188 [00:19<00:00,  9.57it/s]


Validation acc: 0.5000
Time taken: 2145.34s
Training acc over epoch: 0.0000
Training loss (for one batch) at step 20000: 42.4492
Seen so far: 1280064 samples


100%|██████████| 188/188 [00:19<00:00,  9.69it/s]


Validation acc: 0.5000
Time taken: 2845.91s
Training acc over epoch: 0.0000
Training loss (for one batch) at step 25000: 41.5925
Seen so far: 1600064 samples


100%|██████████| 188/188 [00:18<00:00,  9.98it/s]


Validation acc: 0.5000
Time taken: 3548.40s
Training acc over epoch: 0.0000
Training loss (for one batch) at step 30000: 40.8943
Seen so far: 1920064 samples


100%|██████████| 188/188 [00:18<00:00, 10.19it/s]


Validation acc: 0.5000
Time taken: 4247.29s
Training acc over epoch: 0.0000
Training loss (for one batch) at step 35000: 40.7633
Seen so far: 2240064 samples


100%|██████████| 188/188 [00:18<00:00, 10.12it/s]


Validation acc: 0.5000
Time taken: 4948.48s
Training acc over epoch: 0.0000
Training loss (for one batch) at step 40000: 40.4220
Seen so far: 2560064 samples


100%|██████████| 188/188 [00:18<00:00, 10.16it/s]


Validation acc: 0.5000
Time taken: 5654.17s
Training acc over epoch: 0.0000
Training loss (for one batch) at step 45000: 39.8747
Seen so far: 2880064 samples


100%|██████████| 188/188 [00:18<00:00,  9.98it/s]


Validation acc: 0.5000
Time taken: 6356.26s
Training acc over epoch: 0.0000
Training loss (for one batch) at step 50000: 39.7535
Seen so far: 3200064 samples


100%|██████████| 188/188 [00:18<00:00, 10.19it/s]


Validation acc: 0.5000
Time taken: 7059.25s
Training acc over epoch: 0.0000
Training loss (for one batch) at step 55000: 39.5637
Seen so far: 3520064 samples


100%|██████████| 188/188 [00:18<00:00,  9.94it/s]


Validation acc: 0.5000
Time taken: 7760.17s
Training acc over epoch: 0.0000
Training loss (for one batch) at step 60000: 39.8332
Seen so far: 3840064 samples


100%|██████████| 188/188 [00:18<00:00, 10.19it/s]


Validation acc: 0.5000
Time taken: 8462.59s
Training acc over epoch: 0.0000
Training loss (for one batch) at step 65000: 39.4782
Seen so far: 4160064 samples


100%|██████████| 188/188 [00:18<00:00, 10.11it/s]


Validation acc: 0.5000
Time taken: 9165.17s
Training acc over epoch: 0.0000
Training loss (for one batch) at step 70000: 39.1592
Seen so far: 4480064 samples


100%|██████████| 188/188 [00:18<00:00, 10.05it/s]


Validation acc: 0.5000
Time taken: 9874.52s
Training acc over epoch: 0.0000
Training loss (for one batch) at step 75000: 39.1277
Seen so far: 4800064 samples


100%|██████████| 188/188 [00:18<00:00,  9.98it/s]


Validation acc: 0.5000
Time taken: 10580.37s
Training acc over epoch: 0.0000
Training loss (for one batch) at step 80000: 39.4851
Seen so far: 5120064 samples


100%|██████████| 188/188 [00:19<00:00,  9.71it/s]


Validation acc: 0.5000
Time taken: 11288.66s
Training acc over epoch: 0.0000
Training loss (for one batch) at step 85000: 37.7680
Seen so far: 5440064 samples


100%|██████████| 188/188 [00:18<00:00, 10.05it/s]


Validation acc: 0.5000
Time taken: 11993.18s
Training acc over epoch: 0.0000
Training loss (for one batch) at step 90000: 38.2663
Seen so far: 5760064 samples


100%|██████████| 188/188 [00:18<00:00, 10.16it/s]


Validation acc: 0.5000
Time taken: 12698.59s
Training acc over epoch: 0.0000
Training loss (for one batch) at step 0: 37.5715
Seen so far: 64 samples


100%|██████████| 188/188 [00:18<00:00,  9.93it/s]


Validation acc: 0.5000
Time taken: 29.74s
Training acc over epoch: 0.0000
Training loss (for one batch) at step 5000: 38.1056
Seen so far: 320064 samples


100%|██████████| 188/188 [00:18<00:00,  9.93it/s]


Validation acc: 0.5000
Time taken: 735.72s
Training acc over epoch: 0.0000
Training loss (for one batch) at step 10000: 37.9646
Seen so far: 640064 samples


100%|██████████| 188/188 [00:18<00:00, 10.16it/s]


Validation acc: 0.5000
Time taken: 1443.36s
Training acc over epoch: 0.0000
Training loss (for one batch) at step 15000: 37.1844
Seen so far: 960064 samples


100%|██████████| 188/188 [00:18<00:00,  9.91it/s]


Validation acc: 0.5000
Time taken: 2146.61s
Training acc over epoch: 0.0000
Training loss (for one batch) at step 20000: 37.6303
Seen so far: 1280064 samples


100%|██████████| 188/188 [00:18<00:00, 10.26it/s]


Validation acc: 0.5000
Time taken: 2852.33s
Training acc over epoch: 0.0000
Training loss (for one batch) at step 25000: 36.9671
Seen so far: 1600064 samples


100%|██████████| 188/188 [00:18<00:00, 10.08it/s]


Validation acc: 0.5000
Time taken: 3556.05s
Training acc over epoch: 0.0000
Training loss (for one batch) at step 30000: 36.0979
Seen so far: 1920064 samples


100%|██████████| 188/188 [00:18<00:00,  9.93it/s]


Validation acc: 0.5000
Time taken: 4261.79s
Training acc over epoch: 0.0000
Training loss (for one batch) at step 35000: 36.9750
Seen so far: 2240064 samples


100%|██████████| 188/188 [00:18<00:00, 10.32it/s]


Validation acc: 0.5000
Time taken: 4967.89s
Training acc over epoch: 0.0001
Training loss (for one batch) at step 40000: 36.3601
Seen so far: 2560064 samples


100%|██████████| 188/188 [00:18<00:00, 10.21it/s]


Validation acc: 0.5000
Time taken: 5678.82s
Training acc over epoch: 0.0000
Training loss (for one batch) at step 45000: 35.2577
Seen so far: 2880064 samples


100%|██████████| 188/188 [00:18<00:00, 10.38it/s]


Validation acc: 0.5000
Time taken: 6382.96s
Training acc over epoch: 0.0001
Training loss (for one batch) at step 50000: 34.3282
Seen so far: 3200064 samples


100%|██████████| 188/188 [00:18<00:00, 10.23it/s]


Validation acc: 0.5000
Time taken: 7087.48s
Training acc over epoch: 0.0001
Training loss (for one batch) at step 55000: 34.3408
Seen so far: 3520064 samples


100%|██████████| 188/188 [00:18<00:00, 10.12it/s]


Validation acc: 0.5000
Time taken: 7793.66s
Training acc over epoch: 0.0001
Training loss (for one batch) at step 60000: 34.7112
Seen so far: 3840064 samples


100%|██████████| 188/188 [00:18<00:00, 10.35it/s]


Validation acc: 0.5000
Time taken: 8497.62s
Training acc over epoch: 0.0001
Training loss (for one batch) at step 65000: 33.4561
Seen so far: 4160064 samples


100%|██████████| 188/188 [00:19<00:00,  9.89it/s]


Validation acc: 0.5000
Time taken: 9201.59s
Training acc over epoch: 0.0002
Training loss (for one batch) at step 70000: 33.0762
Seen so far: 4480064 samples


100%|██████████| 188/188 [00:18<00:00, 10.43it/s]


Validation acc: 0.5000
Time taken: 9902.94s
Training acc over epoch: 0.0002
Training loss (for one batch) at step 75000: 34.0663
Seen so far: 4800064 samples


100%|██████████| 188/188 [00:18<00:00, 10.19it/s]


Validation acc: 0.5000
Time taken: 10605.84s
Training acc over epoch: 0.0004
Training loss (for one batch) at step 80000: 34.8502
Seen so far: 5120064 samples


100%|██████████| 188/188 [00:18<00:00, 10.38it/s]


Validation acc: 0.5000
Time taken: 11307.36s
Training acc over epoch: 0.0005
Training loss (for one batch) at step 85000: 31.3591
Seen so far: 5440064 samples


100%|██████████| 188/188 [00:18<00:00, 10.35it/s]


Validation acc: 0.5000
Time taken: 12009.12s
Training acc over epoch: 0.0005
Training loss (for one batch) at step 90000: 32.4133
Seen so far: 5760064 samples


100%|██████████| 188/188 [00:17<00:00, 10.45it/s]


Validation acc: 0.5000
Time taken: 12717.46s
Training acc over epoch: 0.0005
Training loss (for one batch) at step 0: 31.6775
Seen so far: 64 samples


100%|██████████| 188/188 [00:18<00:00, 10.13it/s]


Validation acc: 0.5000
Time taken: 28.88s
Training acc over epoch: 0.0007
Training loss (for one batch) at step 5000: 32.3268
Seen so far: 320064 samples


100%|██████████| 188/188 [00:18<00:00,  9.95it/s]


Validation acc: 0.5000
Time taken: 737.15s
Training acc over epoch: 0.0007
Training loss (for one batch) at step 10000: 31.7519
Seen so far: 640064 samples


100%|██████████| 188/188 [00:18<00:00, 10.38it/s]


Validation acc: 0.5000
Time taken: 1444.96s
Training acc over epoch: 0.0009
Training loss (for one batch) at step 15000: 30.9365
Seen so far: 960064 samples


100%|██████████| 188/188 [00:19<00:00,  9.41it/s]


Validation acc: 0.5000
Time taken: 2152.14s
Training acc over epoch: 0.0010
Training loss (for one batch) at step 20000: 31.2309
Seen so far: 1280064 samples


100%|██████████| 188/188 [00:18<00:00, 10.28it/s]


Validation acc: 0.5000
Time taken: 2857.37s
Training acc over epoch: 0.0012
Training loss (for one batch) at step 25000: 30.5850
Seen so far: 1600064 samples


100%|██████████| 188/188 [00:18<00:00, 10.05it/s]


Validation acc: 0.5000
Time taken: 3569.07s
Training acc over epoch: 0.0015
Training loss (for one batch) at step 30000: 29.6470
Seen so far: 1920064 samples


100%|██████████| 188/188 [00:18<00:00, 10.01it/s]


Validation acc: 0.5005
Time taken: 4288.14s
Training acc over epoch: 0.0020
Training loss (for one batch) at step 35000: 29.9953
Seen so far: 2240064 samples


100%|██████████| 188/188 [00:18<00:00,  9.93it/s]


Validation acc: 0.5022
Time taken: 5010.36s
Training acc over epoch: 0.0023
Training loss (for one batch) at step 40000: 28.9588
Seen so far: 2560064 samples


100%|██████████| 188/188 [00:18<00:00, 10.13it/s]


Validation acc: 0.5000
Time taken: 5728.53s
Training acc over epoch: 0.0024
Training loss (for one batch) at step 45000: 28.9621
Seen so far: 2880064 samples


100%|██████████| 188/188 [00:19<00:00,  9.83it/s]


Validation acc: 0.5000
Time taken: 6446.43s
Training acc over epoch: 0.0032
Training loss (for one batch) at step 50000: 26.7856
Seen so far: 3200064 samples


100%|██████████| 188/188 [00:18<00:00,  9.92it/s]


Validation acc: 0.5112
Time taken: 7168.47s
Training acc over epoch: 0.0036
Training loss (for one batch) at step 55000: 26.4393
Seen so far: 3520064 samples


100%|██████████| 188/188 [00:18<00:00, 10.17it/s]


Validation acc: 0.5118
Time taken: 7887.61s
Training acc over epoch: 0.0044
Training loss (for one batch) at step 60000: 27.1088
Seen so far: 3840064 samples


100%|██████████| 188/188 [00:18<00:00, 10.23it/s]


Validation acc: 0.5230
Time taken: 8610.76s
Training acc over epoch: 0.0049
Training loss (for one batch) at step 65000: 25.7578
Seen so far: 4160064 samples


100%|██████████| 188/188 [00:18<00:00, 10.00it/s]


Validation acc: 0.5420
Time taken: 9331.83s
Training acc over epoch: 0.0057
Training loss (for one batch) at step 70000: 24.7274
Seen so far: 4480064 samples


100%|██████████| 188/188 [00:19<00:00,  9.60it/s]


Validation acc: 0.5530
Time taken: 10056.58s
Training acc over epoch: 0.0065
Training loss (for one batch) at step 75000: 26.8271
Seen so far: 4800064 samples


100%|██████████| 188/188 [00:18<00:00, 10.12it/s]


Validation acc: 0.5957
Time taken: 10765.17s
Training acc over epoch: 0.0076
Training loss (for one batch) at step 80000: 27.3730
Seen so far: 5120064 samples


100%|██████████| 188/188 [00:19<00:00,  9.85it/s]


Validation acc: 0.6528
Time taken: 11469.10s
Training acc over epoch: 0.0091
Training loss (for one batch) at step 85000: 22.4449
Seen so far: 5440064 samples


100%|██████████| 188/188 [00:18<00:00, 10.19it/s]


Validation acc: 0.6888
Time taken: 12183.83s
Training acc over epoch: 0.0100
Training loss (for one batch) at step 90000: 24.4663
Seen so far: 5760064 samples


100%|██████████| 188/188 [00:18<00:00, 10.23it/s]


Validation acc: 0.7915
Time taken: 12886.59s
Training acc over epoch: 0.0112
Training loss (for one batch) at step 0: 23.9307
Seen so far: 64 samples


100%|██████████| 188/188 [00:18<00:00, 10.03it/s]


Validation acc: 0.8642
Time taken: 28.90s
Training acc over epoch: 0.0122
Training loss (for one batch) at step 5000: 24.3979
Seen so far: 320064 samples


100%|██████████| 188/188 [00:18<00:00, 10.21it/s]


Validation acc: 0.7820
Time taken: 736.14s
Training acc over epoch: 0.0127
Training loss (for one batch) at step 10000: 23.9832
Seen so far: 640064 samples


100%|██████████| 188/188 [00:18<00:00,  9.99it/s]


Validation acc: 0.8355
Time taken: 1435.89s
Training acc over epoch: 0.0145
Training loss (for one batch) at step 15000: 23.0517
Seen so far: 960064 samples


100%|██████████| 188/188 [00:18<00:00, 10.06it/s]


Validation acc: 0.8632
Time taken: 2142.23s
Training acc over epoch: 0.0164
Training loss (for one batch) at step 20000: 23.7198
Seen so far: 1280064 samples


100%|██████████| 188/188 [00:19<00:00,  9.87it/s]


Validation acc: 0.8530
Time taken: 2849.15s
Training acc over epoch: 0.0185
Training loss (for one batch) at step 25000: 22.5839
Seen so far: 1600064 samples


100%|██████████| 188/188 [00:20<00:00,  9.35it/s]


Validation acc: 0.9358
Time taken: 3560.05s
Training acc over epoch: 0.0217
Training loss (for one batch) at step 30000: 21.5705
Seen so far: 1920064 samples


100%|██████████| 188/188 [00:19<00:00,  9.64it/s]


Validation acc: 0.9510
Time taken: 4265.97s
Training acc over epoch: 0.0273
Training loss (for one batch) at step 35000: 22.1834
Seen so far: 2240064 samples


100%|██████████| 188/188 [00:20<00:00,  9.38it/s]


Validation acc: 0.9538
Time taken: 4976.15s
Training acc over epoch: 0.0335
Training loss (for one batch) at step 40000: 21.4151
Seen so far: 2560064 samples


100%|██████████| 188/188 [00:19<00:00,  9.76it/s]


Validation acc: 0.9530
Time taken: 5687.25s
Training acc over epoch: 0.0359
Training loss (for one batch) at step 45000: 20.6119
Seen so far: 2880064 samples


100%|██████████| 188/188 [00:18<00:00, 10.10it/s]


Validation acc: 0.9563
Time taken: 6393.65s
Training acc over epoch: 0.0374
Training loss (for one batch) at step 50000: 19.5147
Seen so far: 3200064 samples


100%|██████████| 188/188 [00:18<00:00, 10.16it/s]


Validation acc: 0.9463
Time taken: 7097.76s
Training acc over epoch: 0.0384
Training loss (for one batch) at step 55000: 18.8420
Seen so far: 3520064 samples


100%|██████████| 188/188 [00:18<00:00,  9.97it/s]


Validation acc: 0.9388
Time taken: 7804.12s
Training acc over epoch: 0.0402
Training loss (for one batch) at step 60000: 20.3172
Seen so far: 3840064 samples


100%|██████████| 188/188 [00:18<00:00,  9.91it/s]


Validation acc: 0.9488
Time taken: 8508.77s
Training acc over epoch: 0.0411
Training loss (for one batch) at step 65000: 19.0058
Seen so far: 4160064 samples


100%|██████████| 188/188 [00:18<00:00,  9.97it/s]


Validation acc: 0.9232
Time taken: 9209.50s
Training acc over epoch: 0.0420
Training loss (for one batch) at step 70000: 18.9306
Seen so far: 4480064 samples


100%|██████████| 188/188 [00:18<00:00, 10.05it/s]


Validation acc: 0.9353
Time taken: 9914.37s
Training acc over epoch: 0.0431
Training loss (for one batch) at step 75000: 21.0873
Seen so far: 4800064 samples


100%|██████████| 188/188 [00:18<00:00, 10.20it/s]


Validation acc: 0.9220
Time taken: 10620.97s
Training acc over epoch: 0.0449
Training loss (for one batch) at step 80000: 22.4214
Seen so far: 5120064 samples


100%|██████████| 188/188 [00:18<00:00, 10.26it/s]


Validation acc: 0.9277
Time taken: 11321.72s
Training acc over epoch: 0.0462
Training loss (for one batch) at step 85000: 16.8803
Seen so far: 5440064 samples


100%|██████████| 188/188 [00:17<00:00, 10.45it/s]


Validation acc: 0.9293
Time taken: 12022.76s
Training acc over epoch: 0.0474
Training loss (for one batch) at step 90000: 19.2082
Seen so far: 5760064 samples


100%|██████████| 188/188 [00:18<00:00, 10.16it/s]


Validation acc: 0.9150
Time taken: 12722.12s
Training acc over epoch: 0.0487
Training loss (for one batch) at step 0: 19.2076
Seen so far: 64 samples


100%|██████████| 188/188 [00:18<00:00, 10.01it/s]


Validation acc: 0.9135
Time taken: 29.52s
Training acc over epoch: 0.0486
Training loss (for one batch) at step 5000: 19.5405
Seen so far: 320064 samples


100%|██████████| 188/188 [00:18<00:00, 10.15it/s]


Validation acc: 0.9030
Time taken: 731.39s
Training acc over epoch: 0.0495
Training loss (for one batch) at step 10000: 19.1154
Seen so far: 640064 samples


100%|██████████| 188/188 [00:18<00:00, 10.21it/s]


Validation acc: 0.8805
Time taken: 1429.57s
Training acc over epoch: 0.0523
Training loss (for one batch) at step 15000: 18.4961
Seen so far: 960064 samples


100%|██████████| 188/188 [00:18<00:00, 10.17it/s]


Validation acc: 0.8675
Time taken: 2129.45s
Training acc over epoch: 0.0538
Training loss (for one batch) at step 20000: 20.4063
Seen so far: 1280064 samples


100%|██████████| 188/188 [00:18<00:00, 10.15it/s]


Validation acc: 0.8870
Time taken: 2827.46s
Training acc over epoch: 0.0556
Training loss (for one batch) at step 25000: 18.7115
Seen so far: 1600064 samples


100%|██████████| 188/188 [00:18<00:00, 10.11it/s]


Validation acc: 0.8785
Time taken: 3524.57s
Training acc over epoch: 0.0597
Training loss (for one batch) at step 30000: 20.0924
Seen so far: 1920064 samples


100%|██████████| 188/188 [00:18<00:00, 10.31it/s]


Validation acc: 0.8633
Time taken: 4223.83s
Training acc over epoch: 0.0550
Training loss (for one batch) at step 35000: 20.8436
Seen so far: 2240064 samples


100%|██████████| 188/188 [00:18<00:00, 10.24it/s]


Validation acc: 0.8673
Time taken: 4919.35s
Training acc over epoch: 0.0511
Training loss (for one batch) at step 40000: 20.4212
Seen so far: 2560064 samples


100%|██████████| 188/188 [00:18<00:00,  9.93it/s]


Validation acc: 0.8683
Time taken: 5615.53s
Training acc over epoch: 0.0520
Training loss (for one batch) at step 45000: 19.2706
Seen so far: 2880064 samples


100%|██████████| 188/188 [00:19<00:00,  9.62it/s]


Validation acc: 0.8830
Time taken: 6316.70s
Training acc over epoch: 0.0529
Training loss (for one batch) at step 50000: 17.9416
Seen so far: 3200064 samples


100%|██████████| 188/188 [00:18<00:00, 10.17it/s]


Validation acc: 0.8697
Time taken: 7014.16s
Training acc over epoch: 0.0533
Training loss (for one batch) at step 55000: 17.6803
Seen so far: 3520064 samples


100%|██████████| 188/188 [00:19<00:00,  9.85it/s]


Validation acc: 0.8727
Time taken: 7710.04s
Training acc over epoch: 0.0546
Training loss (for one batch) at step 60000: 18.9944
Seen so far: 3840064 samples


100%|██████████| 188/188 [00:18<00:00,  9.96it/s]


Validation acc: 0.8973
Time taken: 8410.49s
Training acc over epoch: 0.0558
Training loss (for one batch) at step 65000: 17.7015
Seen so far: 4160064 samples


100%|██████████| 188/188 [00:18<00:00, 10.17it/s]


Validation acc: 0.8560
Time taken: 9109.40s
Training acc over epoch: 0.0563
Training loss (for one batch) at step 70000: 17.3816
Seen so far: 4480064 samples


100%|██████████| 188/188 [00:18<00:00, 10.17it/s]


Validation acc: 0.8567
Time taken: 9812.51s
Training acc over epoch: 0.0573
Training loss (for one batch) at step 75000: 19.8371
Seen so far: 4800064 samples


100%|██████████| 188/188 [00:18<00:00, 10.15it/s]


Validation acc: 0.8572
Time taken: 10517.83s
Training acc over epoch: 0.0593
Training loss (for one batch) at step 80000: 21.1550
Seen so far: 5120064 samples


100%|██████████| 188/188 [00:18<00:00, 10.09it/s]


Validation acc: 0.8438
Time taken: 11229.57s
Training acc over epoch: 0.0600
Training loss (for one batch) at step 85000: 15.7859
Seen so far: 5440064 samples


100%|██████████| 188/188 [00:19<00:00,  9.60it/s]


Validation acc: 0.8485
Time taken: 11944.42s
Training acc over epoch: 0.0617
Training loss (for one batch) at step 90000: 17.5944
Seen so far: 5760064 samples


100%|██████████| 188/188 [00:18<00:00,  9.90it/s]


Validation acc: 0.8293
Time taken: 12658.24s
Training acc over epoch: 0.0624
Training loss (for one batch) at step 0: 18.1923
Seen so far: 64 samples


100%|██████████| 188/188 [00:18<00:00, 10.13it/s]


Validation acc: 0.8525
Time taken: 28.95s
Training acc over epoch: 0.0627
Training loss (for one batch) at step 5000: 18.1395
Seen so far: 320064 samples


100%|██████████| 188/188 [00:19<00:00,  9.51it/s]


Validation acc: 0.8355
Time taken: 745.10s
Training acc over epoch: 0.0633
Training loss (for one batch) at step 10000: 17.9391
Seen so far: 640064 samples


100%|██████████| 188/188 [00:19<00:00,  9.49it/s]


Validation acc: 0.8033
Time taken: 1466.75s
Training acc over epoch: 0.0661
Training loss (for one batch) at step 15000: 17.4669
Seen so far: 960064 samples


100%|██████████| 188/188 [00:18<00:00,  9.91it/s]


Validation acc: 0.8182
Time taken: 2186.30s
Training acc over epoch: 0.0678
Training loss (for one batch) at step 20000: 19.2513
Seen so far: 1280064 samples


100%|██████████| 188/188 [00:19<00:00,  9.87it/s]


Validation acc: 0.8222
Time taken: 2910.64s
Training acc over epoch: 0.0692
Training loss (for one batch) at step 25000: 18.0390
Seen so far: 1600064 samples


100%|██████████| 188/188 [00:19<00:00,  9.68it/s]


Validation acc: 0.8225
Time taken: 3618.32s
Training acc over epoch: 0.0732
Training loss (for one batch) at step 30000: 18.9208
Seen so far: 1920064 samples


100%|██████████| 188/188 [00:19<00:00,  9.61it/s]


Validation acc: 0.8082
Time taken: 4323.64s
Training acc over epoch: 0.0684
Training loss (for one batch) at step 35000: 19.7276
Seen so far: 2240064 samples


100%|██████████| 188/188 [00:19<00:00,  9.65it/s]


Validation acc: 0.8115
Time taken: 5038.86s
Training acc over epoch: 0.0643
Training loss (for one batch) at step 40000: 18.8579
Seen so far: 2560064 samples


100%|██████████| 188/188 [00:19<00:00,  9.89it/s]


Validation acc: 0.8143
Time taken: 5755.00s
Training acc over epoch: 0.0662
Training loss (for one batch) at step 45000: 18.2526
Seen so far: 2880064 samples


100%|██████████| 188/188 [00:18<00:00, 10.36it/s]


Validation acc: 0.8465
Time taken: 6464.80s
Training acc over epoch: 0.0666
Training loss (for one batch) at step 50000: 16.7286
Seen so far: 3200064 samples


100%|██████████| 188/188 [00:18<00:00, 10.14it/s]


Validation acc: 0.8128
Time taken: 7170.51s
Training acc over epoch: 0.0674
Training loss (for one batch) at step 55000: 16.6853
Seen so far: 3520064 samples


100%|██████████| 188/188 [00:18<00:00, 10.19it/s]


Validation acc: 0.8170
Time taken: 7880.94s
Training acc over epoch: 0.0686
Training loss (for one batch) at step 60000: 17.6859
Seen so far: 3840064 samples


100%|██████████| 188/188 [00:19<00:00,  9.75it/s]


Validation acc: 0.8098
Time taken: 8590.14s
Training acc over epoch: 0.0698
Training loss (for one batch) at step 65000: 16.7110
Seen so far: 4160064 samples


100%|██████████| 188/188 [00:18<00:00, 10.06it/s]


Validation acc: 0.7915
Time taken: 9302.41s
Training acc over epoch: 0.0706
Training loss (for one batch) at step 70000: 16.1826
Seen so far: 4480064 samples


100%|██████████| 188/188 [00:19<00:00,  9.84it/s]


Validation acc: 0.8227
Time taken: 10011.75s
Training acc over epoch: 0.0717
Training loss (for one batch) at step 75000: 18.7863
Seen so far: 4800064 samples


100%|██████████| 188/188 [00:19<00:00,  9.72it/s]


Validation acc: 0.8002
Time taken: 10719.39s
Training acc over epoch: 0.0739
Training loss (for one batch) at step 80000: 20.2413
Seen so far: 5120064 samples


100%|██████████| 188/188 [00:18<00:00, 10.10it/s]


Validation acc: 0.8002
Time taken: 11431.25s
Training acc over epoch: 0.0747
Training loss (for one batch) at step 85000: 14.7655
Seen so far: 5440064 samples


100%|██████████| 188/188 [00:19<00:00,  9.88it/s]


Validation acc: 0.8013
Time taken: 12138.05s
Training acc over epoch: 0.0756
Training loss (for one batch) at step 90000: 16.6590
Seen so far: 5760064 samples


100%|██████████| 188/188 [00:18<00:00, 10.26it/s]


Validation acc: 0.7852
Time taken: 12841.26s
Training acc over epoch: 0.0772
Training loss (for one batch) at step 0: 17.3441
Seen so far: 64 samples


100%|██████████| 188/188 [00:18<00:00, 10.10it/s]


Validation acc: 0.7922
Time taken: 28.59s
Training acc over epoch: 0.0763
Training loss (for one batch) at step 5000: 17.3900
Seen so far: 320064 samples


100%|██████████| 188/188 [00:18<00:00, 10.15it/s]


Validation acc: 0.7772
Time taken: 732.45s
Training acc over epoch: 0.0774
Training loss (for one batch) at step 10000: 17.2087
Seen so far: 640064 samples


100%|██████████| 188/188 [00:18<00:00, 10.18it/s]


Validation acc: 0.7632
Time taken: 1439.67s
Training acc over epoch: 0.0806
Training loss (for one batch) at step 15000: 16.5684
Seen so far: 960064 samples


100%|██████████| 188/188 [00:19<00:00,  9.83it/s]


Validation acc: 0.7802
Time taken: 2157.71s
Training acc over epoch: 0.0823
Training loss (for one batch) at step 20000: 18.5802
Seen so far: 1280064 samples


100%|██████████| 188/188 [00:19<00:00,  9.61it/s]


Validation acc: 0.7877
Time taken: 2877.73s
Training acc over epoch: 0.0840
Training loss (for one batch) at step 25000: 16.4690
Seen so far: 1600064 samples


100%|██████████| 188/188 [00:18<00:00, 10.00it/s]


Validation acc: 0.7773
Time taken: 3581.54s
Training acc over epoch: 0.0876
Training loss (for one batch) at step 30000: 17.9489
Seen so far: 1920064 samples


100%|██████████| 188/188 [00:18<00:00, 10.19it/s]


Validation acc: 0.7938
Time taken: 4283.61s
Training acc over epoch: 0.0830
Training loss (for one batch) at step 35000: 19.0192
Seen so far: 2240064 samples


100%|██████████| 188/188 [00:18<00:00, 10.27it/s]


Validation acc: 0.8017
Time taken: 4985.90s
Training acc over epoch: 0.0798
Training loss (for one batch) at step 40000: 17.7520
Seen so far: 2560064 samples


100%|██████████| 188/188 [00:18<00:00, 10.13it/s]


Validation acc: 0.7827
Time taken: 5691.13s
Training acc over epoch: 0.0810
Training loss (for one batch) at step 45000: 17.4894
Seen so far: 2880064 samples


100%|██████████| 188/188 [00:18<00:00, 10.11it/s]


Validation acc: 0.8097
Time taken: 6395.37s
Training acc over epoch: 0.0819
Training loss (for one batch) at step 50000: 15.7360
Seen so far: 3200064 samples


100%|██████████| 188/188 [00:18<00:00, 10.27it/s]


Validation acc: 0.8002
Time taken: 7095.15s
Training acc over epoch: 0.0824
Training loss (for one batch) at step 55000: 15.2275
Seen so far: 3520064 samples


100%|██████████| 188/188 [00:18<00:00, 10.17it/s]


Validation acc: 0.7808
Time taken: 7803.37s
Training acc over epoch: 0.0838
Training loss (for one batch) at step 60000: 16.9115
Seen so far: 3840064 samples


100%|██████████| 188/188 [00:18<00:00, 10.27it/s]


Validation acc: 0.7795
Time taken: 8506.20s
Training acc over epoch: 0.0875
Training loss (for one batch) at step 65000: 15.6303
Seen so far: 4160064 samples


100%|██████████| 188/188 [00:18<00:00, 10.16it/s]


Validation acc: 0.7638
Time taken: 9208.53s
Training acc over epoch: 0.0888
Training loss (for one batch) at step 70000: 15.7068
Seen so far: 4480064 samples


100%|██████████| 188/188 [00:19<00:00,  9.59it/s]


Validation acc: 0.7875
Time taken: 9921.52s
Training acc over epoch: 0.0893
Training loss (for one batch) at step 75000: 17.3404
Seen so far: 4800064 samples


100%|██████████| 188/188 [00:18<00:00,  9.99it/s]


Validation acc: 0.7825
Time taken: 10636.29s
Training acc over epoch: 0.0909
Training loss (for one batch) at step 80000: 19.0193
Seen so far: 5120064 samples


100%|██████████| 188/188 [00:19<00:00,  9.43it/s]


Validation acc: 0.7867
Time taken: 11350.56s
Training acc over epoch: 0.0918
Training loss (for one batch) at step 85000: 14.0248
Seen so far: 5440064 samples


100%|██████████| 188/188 [00:18<00:00, 10.20it/s]


Validation acc: 0.7798
Time taken: 12059.19s
Training acc over epoch: 0.0922
Training loss (for one batch) at step 90000: 15.6137
Seen so far: 5760064 samples


100%|██████████| 188/188 [00:18<00:00, 10.16it/s]


Validation acc: 0.7735
Time taken: 12766.84s
Training acc over epoch: 0.0937
Training loss (for one batch) at step 0: 16.2174
Seen so far: 64 samples


100%|██████████| 188/188 [00:18<00:00,  9.90it/s]


Validation acc: 0.7720
Time taken: 29.29s
Training acc over epoch: 0.0918
Training loss (for one batch) at step 5000: 16.3031
Seen so far: 320064 samples


100%|██████████| 188/188 [00:18<00:00, 10.02it/s]


Validation acc: 0.7895
Time taken: 742.09s
Training acc over epoch: 0.0936
Training loss (for one batch) at step 10000: 16.2241
Seen so far: 640064 samples


100%|██████████| 188/188 [00:18<00:00, 10.16it/s]


Validation acc: 0.8022
Time taken: 1458.52s
Training acc over epoch: 0.0964
Training loss (for one batch) at step 15000: 15.7467
Seen so far: 960064 samples


100%|██████████| 188/188 [00:18<00:00, 10.14it/s]


Validation acc: 0.7753
Time taken: 2176.68s
Training acc over epoch: 0.0981
Training loss (for one batch) at step 20000: 18.1607
Seen so far: 1280064 samples


100%|██████████| 188/188 [00:18<00:00, 10.22it/s]


Validation acc: 0.7698
Time taken: 2890.36s
Training acc over epoch: 0.0987
Training loss (for one batch) at step 25000: 15.9813
Seen so far: 1600064 samples


100%|██████████| 188/188 [00:18<00:00, 10.15it/s]


Validation acc: 0.7607
Time taken: 3601.23s
Training acc over epoch: 0.1025
Training loss (for one batch) at step 30000: 17.3096
Seen so far: 1920064 samples


100%|██████████| 188/188 [00:19<00:00,  9.85it/s]


Validation acc: 0.7632
Time taken: 4314.66s
Training acc over epoch: 0.0978
Training loss (for one batch) at step 35000: 17.4403
Seen so far: 2240064 samples


100%|██████████| 188/188 [00:19<00:00,  9.80it/s]


Validation acc: 0.7760
Time taken: 5027.91s
Training acc over epoch: 0.0939
Training loss (for one batch) at step 40000: 17.1908
Seen so far: 2560064 samples


100%|██████████| 188/188 [00:18<00:00, 10.13it/s]


Validation acc: 0.7740
Time taken: 5735.11s
Training acc over epoch: 0.0949
Training loss (for one batch) at step 45000: 16.3644
Seen so far: 2880064 samples


100%|██████████| 188/188 [00:18<00:00, 10.20it/s]


Validation acc: 0.7708
Time taken: 6445.37s
Training acc over epoch: 0.0958
Training loss (for one batch) at step 50000: 14.7732
Seen so far: 3200064 samples


100%|██████████| 188/188 [00:18<00:00, 10.19it/s]


Validation acc: 0.7882
Time taken: 7153.18s
Training acc over epoch: 0.0959
Training loss (for one batch) at step 55000: 15.2511
Seen so far: 3520064 samples


100%|██████████| 188/188 [00:18<00:00, 10.00it/s]


Validation acc: 0.7710
Time taken: 7861.01s
Training acc over epoch: 0.0966
Training loss (for one batch) at step 60000: 16.8736
Seen so far: 3840064 samples


100%|██████████| 188/188 [00:18<00:00, 10.20it/s]


Validation acc: 0.7772
Time taken: 8562.32s
Training acc over epoch: 0.0928
Training loss (for one batch) at step 65000: 15.2798
Seen so far: 4160064 samples


100%|██████████| 188/188 [00:18<00:00, 10.07it/s]


Validation acc: 0.7673
Time taken: 9268.99s
Training acc over epoch: 0.0932
Training loss (for one batch) at step 70000: 15.5129
Seen so far: 4480064 samples


100%|██████████| 188/188 [00:18<00:00, 10.21it/s]


Validation acc: 0.7737
Time taken: 9971.81s
Training acc over epoch: 0.0935
Training loss (for one batch) at step 75000: 17.6344
Seen so far: 4800064 samples


100%|██████████| 188/188 [00:18<00:00, 10.21it/s]


Validation acc: 0.7667
Time taken: 10675.01s
Training acc over epoch: 0.0956
Training loss (for one batch) at step 80000: 19.3093
Seen so far: 5120064 samples


100%|██████████| 188/188 [00:18<00:00, 10.16it/s]


Validation acc: 0.7680
Time taken: 11382.16s
Training acc over epoch: 0.0956
Training loss (for one batch) at step 85000: 13.5760
Seen so far: 5440064 samples


100%|██████████| 188/188 [00:18<00:00, 10.24it/s]


Validation acc: 0.7477
Time taken: 12086.03s
Training acc over epoch: 0.0962
Training loss (for one batch) at step 90000: 15.5131
Seen so far: 5760064 samples


100%|██████████| 188/188 [00:18<00:00, 10.04it/s]


Validation acc: 0.7700
Time taken: 12787.05s
Training acc over epoch: 0.0965
Training loss (for one batch) at step 0: 16.1252
Seen so far: 64 samples


100%|██████████| 188/188 [00:18<00:00, 10.23it/s]


Validation acc: 0.7752
Time taken: 28.67s
Training acc over epoch: 0.0952
Training loss (for one batch) at step 5000: 15.8306
Seen so far: 320064 samples


100%|██████████| 188/188 [00:18<00:00, 10.23it/s]


Validation acc: 0.7762
Time taken: 733.53s
Training acc over epoch: 0.0967
Training loss (for one batch) at step 10000: 16.1911
Seen so far: 640064 samples


100%|██████████| 188/188 [00:18<00:00, 10.19it/s]


Validation acc: 0.7832
Time taken: 1435.30s
Training acc over epoch: 0.0998
Training loss (for one batch) at step 15000: 15.8140
Seen so far: 960064 samples


100%|██████████| 188/188 [00:18<00:00, 10.03it/s]


Validation acc: 0.7723
Time taken: 2135.55s
Training acc over epoch: 0.1010
Training loss (for one batch) at step 20000: 18.2766
Seen so far: 1280064 samples


100%|██████████| 188/188 [00:18<00:00,  9.94it/s]


Validation acc: 0.7645
Time taken: 2835.68s
Training acc over epoch: 0.1022
Training loss (for one batch) at step 25000: 15.8156
Seen so far: 1600064 samples


100%|██████████| 188/188 [00:18<00:00, 10.29it/s]


Validation acc: 0.7607
Time taken: 3533.77s
Training acc over epoch: 0.1058
Training loss (for one batch) at step 30000: 17.4959
Seen so far: 1920064 samples


100%|██████████| 188/188 [00:18<00:00, 10.19it/s]


Validation acc: 0.7497
Time taken: 4232.65s
Training acc over epoch: 0.1005
Training loss (for one batch) at step 35000: 17.3565
Seen so far: 2240064 samples


100%|██████████| 188/188 [00:18<00:00, 10.14it/s]


Validation acc: 0.7727
Time taken: 4933.67s
Training acc over epoch: 0.0964
Training loss (for one batch) at step 40000: 16.7686
Seen so far: 2560064 samples


100%|██████████| 188/188 [00:18<00:00, 10.07it/s]


Validation acc: 0.7680
Time taken: 5631.95s
Training acc over epoch: 0.0971
Training loss (for one batch) at step 45000: 16.2400
Seen so far: 2880064 samples


100%|██████████| 188/188 [00:18<00:00, 10.20it/s]


Validation acc: 0.7728
Time taken: 6335.24s
Training acc over epoch: 0.0983
Training loss (for one batch) at step 50000: 14.7420
Seen so far: 3200064 samples


100%|██████████| 188/188 [00:18<00:00, 10.15it/s]


Validation acc: 0.7602
Time taken: 7035.62s
Training acc over epoch: 0.0983
Training loss (for one batch) at step 55000: 15.1873
Seen so far: 3520064 samples


100%|██████████| 188/188 [00:18<00:00, 10.21it/s]


Validation acc: 0.7877
Time taken: 7731.90s
Training acc over epoch: 0.0988
Training loss (for one batch) at step 60000: 16.7625
Seen so far: 3840064 samples


100%|██████████| 188/188 [00:18<00:00, 10.26it/s]


Validation acc: 0.7628
Time taken: 8433.26s
Training acc over epoch: 0.0955
Training loss (for one batch) at step 65000: 15.3704
Seen so far: 4160064 samples


100%|██████████| 188/188 [00:18<00:00, 10.17it/s]


Validation acc: 0.7627
Time taken: 9132.80s
Training acc over epoch: 0.0964
Training loss (for one batch) at step 70000: 15.0797
Seen so far: 4480064 samples


100%|██████████| 188/188 [00:18<00:00, 10.28it/s]


Validation acc: 0.7965
Time taken: 9834.93s
Training acc over epoch: 0.0957
Training loss (for one batch) at step 75000: 17.1901
Seen so far: 4800064 samples


100%|██████████| 188/188 [00:18<00:00, 10.31it/s]


Validation acc: 0.7672
Time taken: 10535.46s
Training acc over epoch: 0.0979
Training loss (for one batch) at step 80000: 18.9944
Seen so far: 5120064 samples


100%|██████████| 188/188 [00:18<00:00, 10.21it/s]


Validation acc: 0.7843
Time taken: 11234.46s
Training acc over epoch: 0.0979
Training loss (for one batch) at step 85000: 13.4730
Seen so far: 5440064 samples


100%|██████████| 188/188 [00:18<00:00, 10.18it/s]


Validation acc: 0.7545
Time taken: 11932.93s
Training acc over epoch: 0.0988
Training loss (for one batch) at step 90000: 15.3898
Seen so far: 5760064 samples


100%|██████████| 188/188 [00:18<00:00, 10.08it/s]


Validation acc: 0.7813
Time taken: 12631.63s
Training acc over epoch: 0.0992
Training loss (for one batch) at step 0: 15.7572
Seen so far: 64 samples


100%|██████████| 188/188 [00:18<00:00, 10.38it/s]


Validation acc: 0.7660
Time taken: 28.35s
Training acc over epoch: 0.0981
Training loss (for one batch) at step 5000: 16.1028
Seen so far: 320064 samples


100%|██████████| 188/188 [00:18<00:00, 10.29it/s]


Validation acc: 0.7768
Time taken: 730.19s
Training acc over epoch: 0.0988
