In [1]:
from __future__ import print_function

import os
import argparse
import socket
import time

import tensorboard_logger as tb_logger
import torch
import torch.optim as optim
import torch.nn as nn
import torch.backends.cudnn as cudnn


2024-02-27 16:56:10.624373: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-02-27 16:56:10.626372: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-02-27 16:56:10.666901: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-02-27 16:56:10.667749: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
print("here")

here


In [3]:
import os

In [4]:
import sys
sys.path.insert(0,os.getcwd())

In [5]:
from models import model_dict

from dataset.cifar100 import get_cifar100_dataloaders
from helper.util import adjust_learning_rate, accuracy, AverageMeter
from helper.loops import train_vanilla as train, validate

In [6]:
def parse_option():

    hostname = socket.gethostname()

    parser = argparse.ArgumentParser('argument for training')

    parser.add_argument('--print_freq', type=int, default=100, help='print frequency')
    parser.add_argument('--tb_freq', type=int, default=500, help='tb frequency')
    parser.add_argument('--save_freq', type=int, default=40, help='save frequency')
    parser.add_argument('--batch_size', type=int, default=64, help='batch_size')
    parser.add_argument('--num_workers', type=int, default=8, help='num of workers to use')
    parser.add_argument('--epochs', type=int, default=240, help='number of training epochs')

    # optimization
    parser.add_argument('--learning_rate', type=float, default=0.05, help='learning rate')
    parser.add_argument('--lr_decay_epochs', type=str, default='150,180,210', help='where to decay lr, can be a list')
    parser.add_argument('--lr_decay_rate', type=float, default=0.1, help='decay rate for learning rate')
    parser.add_argument('--weight_decay', type=float, default=5e-4, help='weight decay')
    parser.add_argument('--momentum', type=float, default=0.9, help='momentum')

    # dataset
    parser.add_argument('--model', type=str, default='resnet110',
                        choices=['resnet8', 'resnet14', 'resnet20', 'resnet32', 'resnet44', 'resnet56', 'resnet110',
                                 'resnet8x4', 'resnet32x4', 'wrn_16_1', 'wrn_16_2', 'wrn_40_1', 'wrn_40_2',
                                 'vgg8', 'vgg11', 'vgg13', 'vgg16', 'vgg19',
                                 'MobileNetV2', 'ShuffleV1', 'ShuffleV2', ])
    parser.add_argument('--dataset', type=str, default='cifar100', choices=['cifar100'], help='dataset')

    parser.add_argument('-t', '--trial', type=int, default=0, help='the experiment id')

    opt = parser.parse_args("")
    
    # set different learning rate from these 4 models
    if opt.model in ['MobileNetV2', 'ShuffleV1', 'ShuffleV2']:
        opt.learning_rate = 0.01

    # set the path according to the environment
    if hostname.startswith('visiongpu'):
        opt.model_path = '/path/to/my/model'
        opt.tb_path = '/path/to/my/tensorboard'
    else:
        opt.model_path = './save/models'
        opt.tb_path = './save/tensorboard'

    iterations = opt.lr_decay_epochs.split(',')
    opt.lr_decay_epochs = list([])
    for it in iterations:
        opt.lr_decay_epochs.append(int(it))

    opt.model_name = '{}_{}_lr_{}_decay_{}_trial_{}'.format(opt.model, opt.dataset, opt.learning_rate,
                                                            opt.weight_decay, opt.trial)

    opt.tb_folder = os.path.join(opt.tb_path, opt.model_name)
    if not os.path.isdir(opt.tb_folder):
        os.makedirs(opt.tb_folder)

    opt.save_folder = os.path.join(opt.model_path, opt.model_name)
    if not os.path.isdir(opt.save_folder):
        os.makedirs(opt.save_folder)

    return opt

In [7]:
best_acc = 0

opt = parse_option()

# dataloader
if opt.dataset == 'cifar100':
    train_loader, val_loader = get_cifar100_dataloaders(batch_size=opt.batch_size, num_workers=opt.num_workers)
    n_cls = 100
else:
    raise NotImplementedError(opt.dataset)

Files already downloaded and verified
Files already downloaded and verified


In [11]:
from tqdm import tqdm 

In [12]:
# model
model = model_dict[opt.model](num_classes=n_cls)

# optimizer
optimizer = optim.SGD(model.parameters(),
                      lr=opt.learning_rate,
                      momentum=opt.momentum,
                      weight_decay=opt.weight_decay)

criterion = nn.CrossEntropyLoss()

if torch.cuda.is_available():
    model = model.cuda()
    criterion = criterion.cuda()
    cudnn.benchmark = True

# tensorboard
logger = tb_logger.Logger(logdir=opt.tb_folder, flush_secs=2)

# routine
for epoch in tqdm(range(1, opt.epochs + 1)):

    adjust_learning_rate(epoch, opt, optimizer)
    print("==> training...")

    time1 = time.time()
    train_acc, train_loss = train(epoch, train_loader, model, criterion, optimizer, opt)
    time2 = time.time()
    print('epoch {}, total time {:.2f}'.format(epoch, time2 - time1))

    logger.log_value('train_acc', train_acc, epoch)
    logger.log_value('train_loss', train_loss, epoch)

    test_acc, test_acc_top5, test_loss = validate(val_loader, model, criterion, opt)

    logger.log_value('test_acc', test_acc, epoch)
    logger.log_value('test_acc_top5', test_acc_top5, epoch)
    logger.log_value('test_loss', test_loss, epoch)

    # save the best model
    if test_acc > best_acc:
        best_acc = test_acc
        state = {
            'epoch': epoch,
            'model': model.state_dict(),
            'best_acc': best_acc,
            'optimizer': optimizer.state_dict(),
        }
        save_file = os.path.join(opt.save_folder, '{}_best.pth'.format(opt.model))
        print('saving the best model!')
        torch.save(state, save_file)

    # regular saving
    if epoch % opt.save_freq == 0:
        print('==> Saving...')
        state = {
            'epoch': epoch,
            'model': model.state_dict(),
            'accuracy': test_acc,
            'optimizer': optimizer.state_dict(),
        }
        save_file = os.path.join(opt.save_folder, 'ckpt_epoch_{epoch}.pth'.format(epoch=epoch))
        torch.save(state, save_file)

# This best accuracy is only for printing purpose.
# The results reported in the paper/README is from the last epoch.
print('best accuracy:', best_acc)

# save model
state = {
    'opt': opt,
    'model': model.state_dict(),
    'optimizer': optimizer.state_dict(),
}
save_file = os.path.join(opt.save_folder, '{}_last.pth'.format(opt.model))
torch.save(state, save_file)


  0%|          | 0/240 [00:00<?, ?it/s][A

==> training...
Epoch: [1][0/782]	Time 24.188 (24.188)	Data 0.229 (0.229)	Loss 6.7635 (6.7635)	Acc@1 1.562 (1.562)	Acc@5 3.125 (3.125)
Epoch: [1][100/782]	Time 0.036 (0.278)	Data 0.001 (0.003)	Loss 4.6107 (4.7839)	Acc@1 1.562 (1.006)	Acc@5 12.500 (4.796)
Epoch: [1][200/782]	Time 0.036 (0.158)	Data 0.001 (0.002)	Loss 4.5692 (4.6928)	Acc@1 3.125 (1.112)	Acc@5 4.688 (5.061)
Epoch: [1][300/782]	Time 0.036 (0.118)	Data 0.001 (0.002)	Loss 4.5104 (4.6548)	Acc@1 4.688 (1.199)	Acc@5 7.812 (5.580)
Epoch: [1][400/782]	Time 0.036 (0.098)	Data 0.001 (0.001)	Loss 4.5160 (4.6275)	Acc@1 3.125 (1.321)	Acc@5 9.375 (5.938)
Epoch: [1][500/782]	Time 0.036 (0.086)	Data 0.001 (0.001)	Loss 4.4544 (4.6049)	Acc@1 3.125 (1.422)	Acc@5 9.375 (6.443)
Epoch: [1][600/782]	Time 0.036 (0.078)	Data 0.001 (0.001)	Loss 4.3909 (4.5794)	Acc@1 6.250 (1.552)	Acc@5 10.938 (7.248)
Epoch: [1][700/782]	Time 0.036 (0.072)	Data 0.001 (0.001)	Loss 4.4584 (4.5483)	Acc@1 0.000 (1.743)	Acc@5 9.375 (8.165)
 * Acc@1 1.916 Acc@5 8.856
epo


  0%|          | 1/240 [00:57<3:49:57, 57.73s/it][A

==> training...
Epoch: [2][0/782]	Time 0.277 (0.277)	Data 0.221 (0.221)	Loss 4.2402 (4.2402)	Acc@1 6.250 (6.250)	Acc@5 14.062 (14.062)
Epoch: [2][100/782]	Time 0.036 (0.038)	Data 0.001 (0.003)	Loss 4.2497 (4.2788)	Acc@1 7.812 (4.069)	Acc@5 20.312 (16.368)
Epoch: [2][200/782]	Time 0.036 (0.037)	Data 0.001 (0.002)	Loss 4.3598 (4.2564)	Acc@1 1.562 (4.291)	Acc@5 9.375 (17.055)
Epoch: [2][300/782]	Time 0.036 (0.037)	Data 0.001 (0.002)	Loss 4.1653 (4.2258)	Acc@1 9.375 (4.537)	Acc@5 21.875 (18.205)
Epoch: [2][400/782]	Time 0.036 (0.037)	Data 0.001 (0.001)	Loss 3.9589 (4.1872)	Acc@1 10.938 (5.054)	Acc@5 25.000 (19.428)
Epoch: [2][500/782]	Time 0.036 (0.037)	Data 0.001 (0.001)	Loss 3.9653 (4.1575)	Acc@1 4.688 (5.392)	Acc@5 23.438 (20.459)
Epoch: [2][600/782]	Time 0.036 (0.036)	Data 0.001 (0.001)	Loss 4.0065 (4.1242)	Acc@1 7.812 (5.837)	Acc@5 21.875 (21.456)
Epoch: [2][700/782]	Time 0.036 (0.036)	Data 0.001 (0.001)	Loss 3.8420 (4.0962)	Acc@1 4.688 (6.199)	Acc@5 26.562 (22.361)
 * Acc@1 6.530 Acc


  1%|          | 2/240 [01:29<2:49:14, 42.67s/it][A

saving the best model!
==> training...
Epoch: [3][0/782]	Time 0.280 (0.280)	Data 0.228 (0.228)	Loss 4.0881 (4.0881)	Acc@1 3.125 (3.125)	Acc@5 25.000 (25.000)
Epoch: [3][100/782]	Time 0.036 (0.039)	Data 0.001 (0.003)	Loss 3.8999 (3.8411)	Acc@1 4.688 (9.545)	Acc@5 34.375 (30.956)
Epoch: [3][200/782]	Time 0.036 (0.037)	Data 0.001 (0.002)	Loss 3.7317 (3.8016)	Acc@1 10.938 (10.199)	Acc@5 40.625 (31.988)
Epoch: [3][300/782]	Time 0.036 (0.037)	Data 0.001 (0.002)	Loss 3.9705 (3.7667)	Acc@1 7.812 (10.777)	Acc@5 32.812 (33.197)
Epoch: [3][400/782]	Time 0.036 (0.037)	Data 0.001 (0.001)	Loss 3.5255 (3.7399)	Acc@1 4.688 (11.183)	Acc@5 39.062 (33.942)
Epoch: [3][500/782]	Time 0.036 (0.037)	Data 0.001 (0.001)	Loss 3.3739 (3.7092)	Acc@1 18.750 (11.886)	Acc@5 39.062 (34.924)
Epoch: [3][600/782]	Time 0.036 (0.037)	Data 0.001 (0.001)	Loss 3.5784 (3.6775)	Acc@1 7.812 (12.284)	Acc@5 40.625 (35.989)
Epoch: [3][700/782]	Time 0.036 (0.037)	Data 0.001 (0.001)	Loss 3.5432 (3.6467)	Acc@1 15.625 (12.730)	Acc@5 42


  1%|▏         | 3/240 [02:02<2:29:34, 37.87s/it][A

==> training...
Epoch: [4][0/782]	Time 0.263 (0.263)	Data 0.222 (0.222)	Loss 3.4678 (3.4678)	Acc@1 17.188 (17.188)	Acc@5 42.188 (42.188)
Epoch: [4][100/782]	Time 0.036 (0.038)	Data 0.001 (0.003)	Loss 3.1561 (3.3680)	Acc@1 14.062 (17.450)	Acc@5 54.688 (45.220)
Epoch: [4][200/782]	Time 0.036 (0.037)	Data 0.001 (0.002)	Loss 3.4601 (3.3141)	Acc@1 9.375 (18.276)	Acc@5 39.062 (46.844)
Epoch: [4][300/782]	Time 0.036 (0.037)	Data 0.001 (0.002)	Loss 3.0546 (3.2844)	Acc@1 15.625 (18.999)	Acc@5 53.125 (47.773)
Epoch: [4][400/782]	Time 0.036 (0.037)	Data 0.001 (0.001)	Loss 3.0008 (3.2563)	Acc@1 25.000 (19.685)	Acc@5 57.812 (48.465)
Epoch: [4][500/782]	Time 0.036 (0.037)	Data 0.001 (0.001)	Loss 3.0776 (3.2294)	Acc@1 20.312 (20.029)	Acc@5 54.688 (49.145)
Epoch: [4][600/782]	Time 0.036 (0.037)	Data 0.001 (0.001)	Loss 2.6410 (3.1993)	Acc@1 23.438 (20.523)	Acc@5 57.812 (50.060)
Epoch: [4][700/782]	Time 0.036 (0.037)	Data 0.001 (0.001)	Loss 3.0358 (3.1738)	Acc@1 25.000 (20.941)	Acc@5 50.000 (50.727)
 * 


  2%|▏         | 4/240 [02:34<2:20:20, 35.68s/it][A

==> training...
Epoch: [5][0/782]	Time 0.292 (0.292)	Data 0.252 (0.252)	Loss 3.1254 (3.1254)	Acc@1 20.312 (20.312)	Acc@5 51.562 (51.562)
Epoch: [5][100/782]	Time 0.036 (0.038)	Data 0.001 (0.003)	Loss 2.5322 (2.8818)	Acc@1 29.688 (25.758)	Acc@5 70.312 (58.540)
Epoch: [5][200/782]	Time 0.036 (0.037)	Data 0.001 (0.002)	Loss 2.9024 (2.8649)	Acc@1 18.750 (26.516)	Acc@5 56.250 (58.963)
Epoch: [5][300/782]	Time 0.036 (0.037)	Data 0.001 (0.002)	Loss 2.7053 (2.8563)	Acc@1 35.938 (26.827)	Acc@5 64.062 (58.908)
Epoch: [5][400/782]	Time 0.036 (0.037)	Data 0.001 (0.001)	Loss 3.0244 (2.8448)	Acc@1 25.000 (27.182)	Acc@5 60.938 (59.196)
Epoch: [5][500/782]	Time 0.035 (0.036)	Data 0.001 (0.001)	Loss 2.7513 (2.8300)	Acc@1 32.812 (27.511)	Acc@5 60.938 (59.534)
Epoch: [5][600/782]	Time 0.037 (0.036)	Data 0.001 (0.001)	Loss 3.0151 (2.8122)	Acc@1 18.750 (27.875)	Acc@5 57.812 (59.918)
Epoch: [5][700/782]	Time 0.036 (0.036)	Data 0.001 (0.001)	Loss 2.7408 (2.7912)	Acc@1 31.250 (28.268)	Acc@5 62.500 (60.512)
 *


  2%|▏         | 5/240 [03:06<2:14:30, 34.34s/it][A

==> training...
Epoch: [6][0/782]	Time 0.256 (0.256)	Data 0.216 (0.216)	Loss 2.8986 (2.8986)	Acc@1 23.438 (23.438)	Acc@5 60.938 (60.938)
Epoch: [6][100/782]	Time 0.036 (0.038)	Data 0.001 (0.003)	Loss 2.8880 (2.6392)	Acc@1 28.125 (30.569)	Acc@5 64.062 (64.217)
Epoch: [6][200/782]	Time 0.036 (0.037)	Data 0.001 (0.002)	Loss 2.5741 (2.5974)	Acc@1 31.250 (31.716)	Acc@5 64.062 (65.197)
Epoch: [6][300/782]	Time 0.036 (0.037)	Data 0.001 (0.002)	Loss 2.6882 (2.5702)	Acc@1 32.812 (32.641)	Acc@5 64.062 (65.692)
Epoch: [6][400/782]	Time 0.036 (0.037)	Data 0.001 (0.001)	Loss 2.5437 (2.5491)	Acc@1 32.812 (33.070)	Acc@5 65.625 (65.956)
Epoch: [6][500/782]	Time 0.036 (0.036)	Data 0.001 (0.001)	Loss 2.5067 (2.5321)	Acc@1 26.562 (33.442)	Acc@5 70.312 (66.405)
Epoch: [6][600/782]	Time 0.036 (0.036)	Data 0.001 (0.001)	Loss 2.8055 (2.5153)	Acc@1 21.875 (33.738)	Acc@5 62.500 (66.764)
Epoch: [6][700/782]	Time 0.035 (0.036)	Data 0.001 (0.001)	Loss 2.1781 (2.5024)	Acc@1 43.750 (34.081)	Acc@5 78.125 (67.076)
 *


  2%|▎         | 6/240 [03:38<2:10:50, 33.55s/it][A

==> training...
Epoch: [7][0/782]	Time 0.275 (0.275)	Data 0.231 (0.231)	Loss 2.8655 (2.8655)	Acc@1 32.812 (32.812)	Acc@5 56.250 (56.250)
Epoch: [7][100/782]	Time 0.037 (0.040)	Data 0.001 (0.003)	Loss 2.5052 (2.3316)	Acc@1 31.250 (37.624)	Acc@5 65.625 (71.101)
Epoch: [7][200/782]	Time 0.036 (0.039)	Data 0.001 (0.002)	Loss 2.4362 (2.3312)	Acc@1 40.625 (37.694)	Acc@5 70.312 (71.160)
Epoch: [7][300/782]	Time 0.036 (0.038)	Data 0.001 (0.002)	Loss 2.5412 (2.3253)	Acc@1 40.625 (38.040)	Acc@5 57.812 (71.247)
Epoch: [7][400/782]	Time 0.037 (0.037)	Data 0.001 (0.001)	Loss 2.3026 (2.3173)	Acc@1 35.938 (38.451)	Acc@5 71.875 (71.384)
Epoch: [7][500/782]	Time 0.036 (0.037)	Data 0.001 (0.001)	Loss 2.0843 (2.3079)	Acc@1 40.625 (38.433)	Acc@5 79.688 (71.594)
Epoch: [7][600/782]	Time 0.036 (0.037)	Data 0.001 (0.001)	Loss 2.3574 (2.2985)	Acc@1 32.812 (38.618)	Acc@5 65.625 (71.761)
Epoch: [7][700/782]	Time 0.036 (0.037)	Data 0.001 (0.001)	Loss 2.0019 (2.2906)	Acc@1 45.312 (38.764)	Acc@5 78.125 (71.944)
 *


  3%|▎         | 7/240 [04:10<2:08:47, 33.16s/it][A

==> training...
Epoch: [8][0/782]	Time 0.302 (0.302)	Data 0.250 (0.250)	Loss 2.0139 (2.0139)	Acc@1 46.875 (46.875)	Acc@5 70.312 (70.312)
Epoch: [8][100/782]	Time 0.037 (0.040)	Data 0.001 (0.003)	Loss 2.1456 (2.1442)	Acc@1 39.062 (41.383)	Acc@5 79.688 (75.232)
Epoch: [8][200/782]	Time 0.036 (0.039)	Data 0.001 (0.002)	Loss 2.4025 (2.1462)	Acc@1 32.812 (41.853)	Acc@5 70.312 (74.977)
Epoch: [8][300/782]	Time 0.036 (0.038)	Data 0.001 (0.002)	Loss 1.9459 (2.1424)	Acc@1 46.875 (41.886)	Acc@5 75.000 (75.021)
Epoch: [8][400/782]	Time 0.036 (0.037)	Data 0.001 (0.002)	Loss 2.1373 (2.1423)	Acc@1 42.188 (41.778)	Acc@5 73.438 (75.051)
Epoch: [8][500/782]	Time 0.036 (0.037)	Data 0.001 (0.001)	Loss 2.4453 (2.1389)	Acc@1 42.188 (41.879)	Acc@5 68.750 (75.106)
Epoch: [8][600/782]	Time 0.036 (0.037)	Data 0.001 (0.001)	Loss 2.0733 (2.1337)	Acc@1 43.750 (42.084)	Acc@5 78.125 (75.179)
Epoch: [8][700/782]	Time 0.036 (0.037)	Data 0.001 (0.001)	Loss 2.3804 (2.1314)	Acc@1 42.188 (42.194)	Acc@5 70.312 (75.325)
 *


  3%|▎         | 8/240 [04:42<2:07:02, 32.86s/it][A

==> training...
Epoch: [9][0/782]	Time 0.256 (0.256)	Data 0.216 (0.216)	Loss 1.9905 (1.9905)	Acc@1 42.188 (42.188)	Acc@5 76.562 (76.562)
Epoch: [9][100/782]	Time 0.036 (0.038)	Data 0.001 (0.003)	Loss 2.0352 (1.9950)	Acc@1 40.625 (44.787)	Acc@5 78.125 (77.754)
Epoch: [9][200/782]	Time 0.036 (0.037)	Data 0.001 (0.002)	Loss 2.1634 (2.0137)	Acc@1 48.438 (44.660)	Acc@5 76.562 (77.682)
Epoch: [9][300/782]	Time 0.036 (0.037)	Data 0.001 (0.002)	Loss 1.9785 (2.0116)	Acc@1 48.438 (44.804)	Acc@5 76.562 (77.658)
Epoch: [9][400/782]	Time 0.036 (0.036)	Data 0.001 (0.001)	Loss 1.9332 (2.0156)	Acc@1 50.000 (44.759)	Acc@5 79.688 (77.369)
Epoch: [9][500/782]	Time 0.036 (0.036)	Data 0.001 (0.001)	Loss 1.9563 (2.0163)	Acc@1 42.188 (44.829)	Acc@5 76.562 (77.436)
Epoch: [9][600/782]	Time 0.036 (0.036)	Data 0.001 (0.001)	Loss 2.2003 (2.0190)	Acc@1 42.188 (44.738)	Acc@5 73.438 (77.366)
Epoch: [9][700/782]	Time 0.036 (0.036)	Data 0.001 (0.001)	Loss 2.0414 (2.0168)	Acc@1 40.625 (44.836)	Acc@5 76.562 (77.443)
 *


  4%|▍         | 9/240 [05:14<2:05:25, 32.58s/it][A

==> training...
Epoch: [10][0/782]	Time 0.268 (0.268)	Data 0.227 (0.227)	Loss 1.7393 (1.7393)	Acc@1 51.562 (51.562)	Acc@5 82.812 (82.812)
Epoch: [10][100/782]	Time 0.036 (0.038)	Data 0.001 (0.003)	Loss 2.4269 (1.9557)	Acc@1 39.062 (46.194)	Acc@5 67.188 (78.527)
Epoch: [10][200/782]	Time 0.037 (0.037)	Data 0.001 (0.002)	Loss 2.0424 (1.9413)	Acc@1 45.312 (46.541)	Acc@5 79.688 (79.174)
Epoch: [10][300/782]	Time 0.036 (0.037)	Data 0.001 (0.002)	Loss 1.6071 (1.9268)	Acc@1 51.562 (46.724)	Acc@5 82.812 (79.335)
Epoch: [10][400/782]	Time 0.036 (0.037)	Data 0.001 (0.001)	Loss 1.6820 (1.9271)	Acc@1 48.438 (46.926)	Acc@5 82.812 (79.329)
Epoch: [10][500/782]	Time 0.036 (0.036)	Data 0.001 (0.001)	Loss 1.6560 (1.9371)	Acc@1 51.562 (46.644)	Acc@5 85.938 (79.207)
Epoch: [10][600/782]	Time 0.036 (0.036)	Data 0.001 (0.001)	Loss 1.7261 (1.9273)	Acc@1 50.000 (46.701)	Acc@5 81.250 (79.370)
Epoch: [10][700/782]	Time 0.036 (0.036)	Data 0.001 (0.001)	Loss 1.9539 (1.9288)	Acc@1 43.750 (46.748)	Acc@5 82.812 (79


  4%|▍         | 10/240 [05:46<2:04:09, 32.39s/it][A

==> training...
Epoch: [11][0/782]	Time 0.258 (0.258)	Data 0.219 (0.219)	Loss 1.7647 (1.7647)	Acc@1 48.438 (48.438)	Acc@5 84.375 (84.375)
Epoch: [11][100/782]	Time 0.036 (0.038)	Data 0.001 (0.003)	Loss 1.1156 (1.8098)	Acc@1 71.875 (49.907)	Acc@5 92.188 (81.467)
Epoch: [11][200/782]	Time 0.036 (0.037)	Data 0.001 (0.002)	Loss 2.1046 (1.8472)	Acc@1 45.312 (49.153)	Acc@5 70.312 (80.589)
Epoch: [11][300/782]	Time 0.036 (0.037)	Data 0.001 (0.002)	Loss 1.7678 (1.8518)	Acc@1 48.438 (48.765)	Acc@5 85.938 (80.601)
Epoch: [11][400/782]	Time 0.037 (0.036)	Data 0.001 (0.001)	Loss 1.8410 (1.8519)	Acc@1 56.250 (48.886)	Acc@5 76.562 (80.537)
Epoch: [11][500/782]	Time 0.035 (0.036)	Data 0.001 (0.001)	Loss 2.0163 (1.8539)	Acc@1 42.188 (48.752)	Acc@5 75.000 (80.473)
Epoch: [11][600/782]	Time 0.036 (0.036)	Data 0.001 (0.001)	Loss 1.8063 (1.8489)	Acc@1 54.688 (48.859)	Acc@5 81.250 (80.558)
Epoch: [11][700/782]	Time 0.038 (0.036)	Data 0.001 (0.001)	Loss 1.9400 (1.8522)	Acc@1 45.312 (48.772)	Acc@5 75.000 (80


  5%|▍         | 11/240 [06:18<2:03:06, 32.25s/it][A

==> training...
Epoch: [12][0/782]	Time 0.264 (0.264)	Data 0.224 (0.224)	Loss 1.6299 (1.6299)	Acc@1 50.000 (50.000)	Acc@5 84.375 (84.375)
Epoch: [12][100/782]	Time 0.036 (0.038)	Data 0.001 (0.003)	Loss 1.7143 (1.7878)	Acc@1 45.312 (50.263)	Acc@5 84.375 (81.776)
Epoch: [12][200/782]	Time 0.036 (0.037)	Data 0.001 (0.002)	Loss 1.4997 (1.7828)	Acc@1 51.562 (50.381)	Acc@5 87.500 (81.887)
Epoch: [12][300/782]	Time 0.036 (0.037)	Data 0.001 (0.002)	Loss 2.0416 (1.8029)	Acc@1 46.875 (49.875)	Acc@5 75.000 (81.645)
Epoch: [12][400/782]	Time 0.036 (0.037)	Data 0.001 (0.001)	Loss 1.7291 (1.7974)	Acc@1 48.438 (49.914)	Acc@5 85.938 (81.749)
Epoch: [12][500/782]	Time 0.036 (0.036)	Data 0.001 (0.001)	Loss 1.7859 (1.7959)	Acc@1 43.750 (50.190)	Acc@5 85.938 (81.718)


  5%|▍         | 11/240 [06:39<2:18:45, 36.36s/it]


KeyboardInterrupt: 