## 0 Import Modules

In [1]:
import os

In [2]:
import torch
import torch.nn as nn

from torch.utils.data import DataLoader
from torchvision import datasets, transforms

import wandb

In [3]:
from tqdm import tqdm

In [4]:
from neumeta.models import create_densenet_model as create_model
from neumeta.utils import (
    parse_args, print_omegaconf,
    load_checkpoint, save_checkpoint,
    set_seed,
    AverageMeter, 
)

## 1 Functions

### Accuracy

In [5]:
def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res

### Training Loop Function

In [6]:
# Function to train the model for one epoch
def train_one_epoch(model, train_loader, optimizer, criterion, epoch_idx, args=None, device='cpu'):
    # Set the model to training mode
    model.train()

    # Initialize AverageMeter objects to track the losses
    losses = AverageMeter()
    top1 = AverageMeter()
    
    # Iterate over the training data
    for batch_idx, (x, target) in enumerate(train_loader):
        # Preprocess input
        # ------------------------------------------------------------------------------------------------------
        # Move the data to the device
        x, target = x.to(device), target.to(device)
        
        # Compute output and loss
        predict = model(x)
        loss = criterion(predict, target) 
        
        # Measure accuracy and record loss
        prec1 = accuracy(predict.data, target, topk=(1,))[0].item()
        losses.update(loss.item(), x.size(0))
        top1.update(prec1, x.size(0))

        # Compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss = losses.avg
        train_acc = top1.avg

        # Log (or plot) losses
        # ------------------------------------------------------------------------------------------------------
        # Log the losses and learning rate to wandb
        if batch_idx % args.experiment.log_interval == 0:
            wandb.log({
                "Training Loss": train_loss,
                "Training Accuracy": train_acc
            }, step=batch_idx + epoch_idx * len(train_loader))
            # Print the losses and learning rate
            print(
                f"Iteration {batch_idx}: Loss = {train_loss:.4f}, Top-1 Accuracy = {train_acc:.4f}, Learning rate = {optimizer.param_groups[0]['lr']:.4e}")
    
    return train_loss

### 1.2 Validation function

In [7]:
def validate(model, val_loader, criterion, args=None, device='cpu'):
    # Set the model to training mode
    model.eval()

    # Initialize AverageMeter objects to track the losses
    losses = AverageMeter()
    top1 = AverageMeter()
    
    with torch.no_grad():
        # Iterate over the training data
        for x, target in tqdm(val_loader):
            # Preprocess input
            x, target = x.to(device), target.to(device)
            predict = model(x)
            
            loss = criterion(predict, target) 
            
            # Measure accuracy and record loss
            prec1 = accuracy(predict.data, target, topk=(1,))[0].item()
            losses.update(loss.item(), x.size(0))
            top1.update(prec1, x.size(0))

    return losses.avg, top1.avg

### 1.4 Adjust learning rate

In [8]:
def adjust_learning_rate(optimizer, epoch, init_lr):
    """Sets the learning rate to the initial LR decayed by 10 after 150 and 225 epochs"""
    if epoch < 150:
        lr = init_lr
    elif epoch < 225:
        lr = init_lr * 0.1
    else:
        lr = init_lr * 0.01
    
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

### 1.5 Initialize wandb

In [9]:
def initialize_wandb(config):
    import time
    """
    Initializes Weights and Biases (wandb) with the given configuration.
    
    Args:
        configuration (dict): Configuration parameters for the run.
    """
    # Name the run using current time and configuration name
    run_name = f"{time.strftime('%Y%m%d%H%M%S')}-{config.experiment.name}"
    
    wandb.init(project="densenet-trial", name=run_name, config=dict(config), group='densenet')

## 2 Training LeNet-dim_32

### 2.0 Device

In [10]:
device = "cuda" if torch.cuda.is_available() else "cpu"

### Parsing arguments for input

In [14]:
CONFIG_PATH = 'neumeta/config/densenet/densenet_baseline.yaml'
RATIO = '1.0'
CHECKPOINT_PATH = 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth'

In [15]:
argv_train = ['--config', CONFIG_PATH, '--ratio', RATIO, '--resume_from', CHECKPOINT_PATH]
argv_train

['--config',
 'neumeta/config/densenet/densenet_baseline.yaml',
 '--ratio',
 '1.0',
 '--resume_from',
 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth']

In [16]:
args = parse_args(argv_train)
print_omegaconf(args)

+--------------------------+----------------------------------------------------------------------------------------+
|           Key            |                                         Value                                          |
+--------------------------+----------------------------------------------------------------------------------------+
|     experiment.name      |                               densenet_bc_40_12_baseline                               |
|  experiment.num_epochs   |                                          300                                           |
| experiment.log_interval  |                                          100                                           |
| experiment.eval_interval |                                           1                                            |
|     experiment.seed      |                                           42                                           |
|        model.type        |                            

In [17]:
set_seed(args.experiment.seed)

Setting seed... 42 for reproducibility


### 2.2 Data Preparations

In [18]:
normalize = transforms.Normalize(
    mean=[x/255.0 for x in [125.3, 123.0, 113.9]], 
    std=[x/255.0 for x in [63.0, 62.1, 66.7]]
    )

transforms_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    normalize
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    normalize
])

In [19]:
train_dataset = datasets.CIFAR10(root='./data', train=True, transform=transforms_train, download=True)
val_dataset = datasets.CIFAR10(root='./data', train=False, transform=transform_test)

In [20]:
train_loader = DataLoader(train_dataset, batch_size=args.training.batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=args.training.batch_size, shuffle=False)

### 2.3 Create Model

In [21]:
model = create_model(args.model.type,
                     layers=args.model.layers,
                     growth=args.model.growth,
                     compression=args.model.compression,
                     bottleneck=args.model.bottleneck,
                     drop_rate=args.model.drop_rate).to(device)

In [22]:
model

DenseNet3(
  (conv1): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (block1): DenseBlock(
    (layer): Sequential(
      (0): BottleneckBlock(
        (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv1): Conv2d(24, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (dropout): Dropout(p=0.0, inplace=False)
      )
      (1): BottleneckBlock(
        (bn1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv1): Conv2d(36, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(48, 12, kern

In [23]:
for i, k in enumerate(model.parameters()):
    print(k)

Parameter containing:
tensor([[[[-7.6929e-02,  3.8479e-02,  1.1810e-01],
          [ 3.7327e-02, -2.5631e-01,  5.8351e-03],
          [-3.9329e-02, -5.9862e-02, -6.0881e-02]],

         [[-1.1163e-01,  1.0882e-01,  9.3454e-02],
          [-1.1987e-03,  3.1157e-02,  1.7140e-01],
          [ 8.1337e-02, -7.8931e-02,  4.9267e-02]],

         [[ 1.5046e-02, -8.8851e-02,  3.3003e-01],
          [-4.2694e-02, -1.0119e-01,  9.3057e-02],
          [ 9.0782e-02, -1.6318e-01,  2.9001e-02]]],


        [[[ 3.7010e-02, -1.8797e-02, -1.9568e-02],
          [-8.2715e-02,  1.7037e-02,  4.7069e-03],
          [ 5.3735e-02, -3.9003e-04,  1.5702e-02]],

         [[ 1.2933e-01,  1.2755e-01, -1.3224e-01],
          [ 1.7877e-02, -5.8162e-02,  1.5731e-01],
          [-5.9760e-02,  9.6540e-02, -1.0338e-01]],

         [[ 3.8407e-02, -4.9309e-02,  3.8090e-02],
          [-1.8498e-01, -7.6223e-02, -5.0379e-02],
          [ 6.7753e-02, -7.2016e-02,  2.2102e-02]]],


        [[[-1.9581e-03, -6.2443e-02, -1.1872

In [24]:
for param in model.parameters():
    print(param.data.shape)

torch.Size([24, 3, 3, 3])
torch.Size([24])
torch.Size([24])
torch.Size([48, 24, 1, 1])
torch.Size([48])
torch.Size([48])
torch.Size([12, 48, 3, 3])
torch.Size([36])
torch.Size([36])
torch.Size([48, 36, 1, 1])
torch.Size([48])
torch.Size([48])
torch.Size([12, 48, 3, 3])
torch.Size([48])
torch.Size([48])
torch.Size([48, 48, 1, 1])
torch.Size([48])
torch.Size([48])
torch.Size([12, 48, 3, 3])
torch.Size([60])
torch.Size([60])
torch.Size([48, 60, 1, 1])
torch.Size([48])
torch.Size([48])
torch.Size([12, 48, 3, 3])
torch.Size([72])
torch.Size([72])
torch.Size([48, 72, 1, 1])
torch.Size([48])
torch.Size([48])
torch.Size([12, 48, 3, 3])
torch.Size([84])
torch.Size([84])
torch.Size([48, 84, 1, 1])
torch.Size([48])
torch.Size([48])
torch.Size([12, 48, 3, 3])
torch.Size([96])
torch.Size([96])
torch.Size([48, 96, 1, 1])
torch.Size([48])
torch.Size([48])
torch.Size([48, 48, 1, 1])
torch.Size([48])
torch.Size([48])
torch.Size([12, 48, 3, 3])
torch.Size([60])
torch.Size([60])
torch.Size([48, 60, 1, 1]

In [25]:
print('Number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))

Number of model parameters: 176122


### 2.4 Optimizer and Criterion

In [26]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=args.training.learning_rate, momentum=args.training.momentum, nesterov=True, weight_decay=args.training.weight_decay)

In [27]:
print(f'Criterion: {criterion}\nOptimizer: {optimizer}')

Criterion: CrossEntropyLoss()
Optimizer: SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    fused: None
    lr: 0.1
    maximize: False
    momentum: 0.9
    nesterov: True
    weight_decay: 0.0001
)


### 2.5 Training and Validation Loop

In [28]:
start_epoch = 0
best_acc = 0.0

In [29]:
args.training.save_model_path

'toy/experiments/densenet_bc_40_12_baseline'

In [30]:
os.makedirs(args.training.save_model_path, exist_ok=True)

In [34]:
# args.resume_from = False
args.resume_from

'toy/experiment/densenet_bc_40_12_baseline/densenet_bc_40_12.pth'

In [31]:
if args.resume_from:
        print(f"Resuming from checkpoint: {args.resume_from}")
        checkpoint_info = load_checkpoint(args.resume_from, model, optimizer, None)
        start_epoch = checkpoint_info['epoch']
        best_acc = checkpoint_info['best_acc']
        print(f"Resuming from epoch: {start_epoch}, best accuracy: {best_acc*100:.2f}%")

Resuming from checkpoint: toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Resuming from epoch: 244, best accuracy: 9325.00%


In [32]:
initialize_wandb(args)

[34m[1mwandb[0m: Currently logged in as: [33mefradosuryadi[0m ([33mefradosuryadi-universitas-indonesia[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


In [33]:
for epoch in range(start_epoch, args.experiment.num_epochs):
    print('\n')

    # Adjust learning rate
    adjust_learning_rate(optimizer, epoch, args.training.learning_rate)
    
    # Train for one epoch
    train_loss = train_one_epoch(model, train_loader, optimizer, criterion, epoch_idx=epoch, args=args, device=device)
    
    # Print the training loss and learning rate
    print(f"Epoch [{epoch+1}/{args.experiment.num_epochs}], Training Loss: {train_loss:.4f}, Learning Rate: {optimizer.param_groups[0]['lr']:.6f}")

    if (epoch + 1) % 1 == 0:
        val_loss, acc = validate(model, val_loader, criterion, device=device)

        wandb.log({
            "Validation Loss": val_loss,
            "Validation Accuracy": acc
        })

        print(f"Epoch [{epoch+1}/{args.experiment.num_epochs}], Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc:.2f}%")

        if acc > best_acc:
            best_acc = acc
            save_checkpoint(f"{args.training.save_model_path}/densenet_bc_40_12_cifar10_baseline_best.pth",model,optimizer,None,epoch,best_acc)
            print(f"Checkpoint saved at epoch {epoch} with accuracy: {best_acc:.2f}%")

print("Training finished.")



Iteration 0: Loss = 0.0208, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0155, Top-1 Accuracy = 99.5668, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0173, Top-1 Accuracy = 99.5180, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0170, Top-1 Accuracy = 99.5224, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0173, Top-1 Accuracy = 99.5129, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0174, Top-1 Accuracy = 99.5259, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0176, Top-1 Accuracy = 99.5268, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0177, Top-1 Accuracy = 99.5275, Learning rate = 1.0000e-03
Epoch [245/300], Training Loss: 0.0176, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 42.72it/s]


Epoch [245/300], Validation Loss: 0.2768, Validation Accuracy: 93.11%


Iteration 0: Loss = 0.0216, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0157, Top-1 Accuracy = 99.6906, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0168, Top-1 Accuracy = 99.6502, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0175, Top-1 Accuracy = 99.6211, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0178, Top-1 Accuracy = 99.5909, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0176, Top-1 Accuracy = 99.6039, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0179, Top-1 Accuracy = 99.5840, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0181, Top-1 Accuracy = 99.5676, Learning rate = 1.0000e-03
Epoch [246/300], Training Loss: 0.0177, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.18it/s]


Epoch [246/300], Validation Loss: 0.2800, Validation Accuracy: 93.09%


Iteration 0: Loss = 0.0153, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0147, Top-1 Accuracy = 99.6906, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0151, Top-1 Accuracy = 99.6502, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0153, Top-1 Accuracy = 99.6833, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0167, Top-1 Accuracy = 99.6065, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0170, Top-1 Accuracy = 99.5665, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0170, Top-1 Accuracy = 99.5814, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0167, Top-1 Accuracy = 99.6032, Learning rate = 1.0000e-03
Epoch [247/300], Training Loss: 0.0167, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.31it/s]


Epoch [247/300], Validation Loss: 0.2756, Validation Accuracy: 93.05%


Iteration 0: Loss = 0.0092, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0162, Top-1 Accuracy = 99.5823, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0171, Top-1 Accuracy = 99.5258, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0168, Top-1 Accuracy = 99.5588, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0169, Top-1 Accuracy = 99.5636, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0167, Top-1 Accuracy = 99.5821, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0167, Top-1 Accuracy = 99.5840, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0165, Top-1 Accuracy = 99.5899, Learning rate = 1.0000e-03
Epoch [248/300], Training Loss: 0.0166, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.63it/s]


Epoch [248/300], Validation Loss: 0.2775, Validation Accuracy: 93.13%


Iteration 0: Loss = 0.0076, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0151, Top-1 Accuracy = 99.7215, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0153, Top-1 Accuracy = 99.6735, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0150, Top-1 Accuracy = 99.6937, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0161, Top-1 Accuracy = 99.6103, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0161, Top-1 Accuracy = 99.6070, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0160, Top-1 Accuracy = 99.6230, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0164, Top-1 Accuracy = 99.5988, Learning rate = 1.0000e-03
Epoch [249/300], Training Loss: 0.0164, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.63it/s]


Epoch [249/300], Validation Loss: 0.2794, Validation Accuracy: 93.11%


Iteration 0: Loss = 0.0164, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0178, Top-1 Accuracy = 99.5668, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0162, Top-1 Accuracy = 99.6657, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0165, Top-1 Accuracy = 99.6211, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0167, Top-1 Accuracy = 99.6065, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0166, Top-1 Accuracy = 99.6070, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0163, Top-1 Accuracy = 99.6230, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0164, Top-1 Accuracy = 99.6188, Learning rate = 1.0000e-03
Epoch [250/300], Training Loss: 0.0166, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.62it/s]


Epoch [250/300], Validation Loss: 0.2819, Validation Accuracy: 93.01%


Iteration 0: Loss = 0.0573, Top-1 Accuracy = 98.4375, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0159, Top-1 Accuracy = 99.6751, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0153, Top-1 Accuracy = 99.6735, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0156, Top-1 Accuracy = 99.6470, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0158, Top-1 Accuracy = 99.6337, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0159, Top-1 Accuracy = 99.6164, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0158, Top-1 Accuracy = 99.6204, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0162, Top-1 Accuracy = 99.6055, Learning rate = 1.0000e-03
Epoch [251/300], Training Loss: 0.0163, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.69it/s]


Epoch [251/300], Validation Loss: 0.2807, Validation Accuracy: 93.05%


Iteration 0: Loss = 0.0094, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0144, Top-1 Accuracy = 99.7061, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0154, Top-1 Accuracy = 99.6424, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0154, Top-1 Accuracy = 99.6314, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0156, Top-1 Accuracy = 99.6415, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0160, Top-1 Accuracy = 99.6164, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0158, Top-1 Accuracy = 99.6126, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0160, Top-1 Accuracy = 99.5988, Learning rate = 1.0000e-03
Epoch [252/300], Training Loss: 0.0161, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.64it/s]


Epoch [252/300], Validation Loss: 0.2841, Validation Accuracy: 92.99%


Iteration 0: Loss = 0.0333, Top-1 Accuracy = 98.4375, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0179, Top-1 Accuracy = 99.5823, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0162, Top-1 Accuracy = 99.6346, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0158, Top-1 Accuracy = 99.6418, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0157, Top-1 Accuracy = 99.6649, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0158, Top-1 Accuracy = 99.6569, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0153, Top-1 Accuracy = 99.6802, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0156, Top-1 Accuracy = 99.6723, Learning rate = 1.0000e-03
Epoch [253/300], Training Loss: 0.0156, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 43.93it/s]


Epoch [253/300], Validation Loss: 0.2800, Validation Accuracy: 93.18%


Iteration 0: Loss = 0.0056, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0164, Top-1 Accuracy = 99.6132, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0159, Top-1 Accuracy = 99.6269, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0155, Top-1 Accuracy = 99.6522, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0155, Top-1 Accuracy = 99.6493, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0153, Top-1 Accuracy = 99.6601, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0154, Top-1 Accuracy = 99.6412, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0153, Top-1 Accuracy = 99.6434, Learning rate = 1.0000e-03
Epoch [254/300], Training Loss: 0.0154, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 43.95it/s]


Epoch [254/300], Validation Loss: 0.2810, Validation Accuracy: 93.07%


Iteration 0: Loss = 0.0065, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0150, Top-1 Accuracy = 99.7061, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0150, Top-1 Accuracy = 99.6346, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0153, Top-1 Accuracy = 99.6366, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0148, Top-1 Accuracy = 99.6649, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0149, Top-1 Accuracy = 99.6632, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0153, Top-1 Accuracy = 99.6490, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0153, Top-1 Accuracy = 99.6634, Learning rate = 1.0000e-03
Epoch [255/300], Training Loss: 0.0155, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 43.57it/s]


Epoch [255/300], Validation Loss: 0.2799, Validation Accuracy: 93.23%


Iteration 0: Loss = 0.0048, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0162, Top-1 Accuracy = 99.5514, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0160, Top-1 Accuracy = 99.6191, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0166, Top-1 Accuracy = 99.5847, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0165, Top-1 Accuracy = 99.6065, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0163, Top-1 Accuracy = 99.6008, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0160, Top-1 Accuracy = 99.6178, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0160, Top-1 Accuracy = 99.6077, Learning rate = 1.0000e-03
Epoch [256/300], Training Loss: 0.0160, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.04it/s]


Epoch [256/300], Validation Loss: 0.2888, Validation Accuracy: 92.98%


Iteration 0: Loss = 0.0028, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0140, Top-1 Accuracy = 99.7525, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0160, Top-1 Accuracy = 99.6502, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0159, Top-1 Accuracy = 99.6107, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0150, Top-1 Accuracy = 99.6493, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0151, Top-1 Accuracy = 99.6538, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0147, Top-1 Accuracy = 99.6698, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0147, Top-1 Accuracy = 99.6634, Learning rate = 1.0000e-03
Epoch [257/300], Training Loss: 0.0148, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.16it/s]


Epoch [257/300], Validation Loss: 0.2839, Validation Accuracy: 93.06%


Iteration 0: Loss = 0.0144, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0138, Top-1 Accuracy = 99.7834, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0149, Top-1 Accuracy = 99.6580, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0152, Top-1 Accuracy = 99.6626, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0149, Top-1 Accuracy = 99.6649, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0149, Top-1 Accuracy = 99.6663, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0150, Top-1 Accuracy = 99.6594, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0150, Top-1 Accuracy = 99.6567, Learning rate = 1.0000e-03
Epoch [258/300], Training Loss: 0.0151, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.05it/s]


Epoch [258/300], Validation Loss: 0.2834, Validation Accuracy: 93.07%


Iteration 0: Loss = 0.0119, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0147, Top-1 Accuracy = 99.7061, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0142, Top-1 Accuracy = 99.6657, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0140, Top-1 Accuracy = 99.6574, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0140, Top-1 Accuracy = 99.6844, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0141, Top-1 Accuracy = 99.6663, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0142, Top-1 Accuracy = 99.6516, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0141, Top-1 Accuracy = 99.6612, Learning rate = 1.0000e-03
Epoch [259/300], Training Loss: 0.0142, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.12it/s]


Epoch [259/300], Validation Loss: 0.2816, Validation Accuracy: 93.21%


Iteration 0: Loss = 0.0038, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0127, Top-1 Accuracy = 99.7679, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0128, Top-1 Accuracy = 99.7357, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0128, Top-1 Accuracy = 99.7404, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0131, Top-1 Accuracy = 99.7350, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0140, Top-1 Accuracy = 99.7006, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0143, Top-1 Accuracy = 99.6750, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0143, Top-1 Accuracy = 99.6790, Learning rate = 1.0000e-03
Epoch [260/300], Training Loss: 0.0143, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 43.93it/s]


Epoch [260/300], Validation Loss: 0.2802, Validation Accuracy: 92.98%


Iteration 0: Loss = 0.0069, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0134, Top-1 Accuracy = 99.7679, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0149, Top-1 Accuracy = 99.6813, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0141, Top-1 Accuracy = 99.7301, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0142, Top-1 Accuracy = 99.7039, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0140, Top-1 Accuracy = 99.7037, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0142, Top-1 Accuracy = 99.6958, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0143, Top-1 Accuracy = 99.6857, Learning rate = 1.0000e-03
Epoch [261/300], Training Loss: 0.0143, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.52it/s]


Epoch [261/300], Validation Loss: 0.2860, Validation Accuracy: 93.13%


Iteration 0: Loss = 0.0137, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0157, Top-1 Accuracy = 99.5514, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0141, Top-1 Accuracy = 99.6813, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0145, Top-1 Accuracy = 99.6730, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0145, Top-1 Accuracy = 99.6805, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0141, Top-1 Accuracy = 99.7037, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0143, Top-1 Accuracy = 99.6802, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0142, Top-1 Accuracy = 99.6768, Learning rate = 1.0000e-03
Epoch [262/300], Training Loss: 0.0146, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.57it/s]


Epoch [262/300], Validation Loss: 0.2912, Validation Accuracy: 93.02%


Iteration 0: Loss = 0.0101, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0138, Top-1 Accuracy = 99.6132, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0141, Top-1 Accuracy = 99.6191, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0137, Top-1 Accuracy = 99.6626, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0143, Top-1 Accuracy = 99.6454, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0141, Top-1 Accuracy = 99.6663, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0141, Top-1 Accuracy = 99.6698, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0143, Top-1 Accuracy = 99.6657, Learning rate = 1.0000e-03
Epoch [263/300], Training Loss: 0.0144, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.54it/s]


Epoch [263/300], Validation Loss: 0.2836, Validation Accuracy: 93.21%


Iteration 0: Loss = 0.0095, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0135, Top-1 Accuracy = 99.6906, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0141, Top-1 Accuracy = 99.6968, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0138, Top-1 Accuracy = 99.6885, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0134, Top-1 Accuracy = 99.7233, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0134, Top-1 Accuracy = 99.7162, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0137, Top-1 Accuracy = 99.6984, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0137, Top-1 Accuracy = 99.7013, Learning rate = 1.0000e-03
Epoch [264/300], Training Loss: 0.0138, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.56it/s]


Epoch [264/300], Validation Loss: 0.2870, Validation Accuracy: 93.07%


Iteration 0: Loss = 0.0065, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0139, Top-1 Accuracy = 99.7679, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0144, Top-1 Accuracy = 99.7124, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0144, Top-1 Accuracy = 99.6782, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0148, Top-1 Accuracy = 99.6376, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0145, Top-1 Accuracy = 99.6445, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0142, Top-1 Accuracy = 99.6542, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0142, Top-1 Accuracy = 99.6456, Learning rate = 1.0000e-03
Epoch [265/300], Training Loss: 0.0143, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.37it/s]


Epoch [265/300], Validation Loss: 0.2870, Validation Accuracy: 93.01%


Iteration 0: Loss = 0.0161, Top-1 Accuracy = 98.4375, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0148, Top-1 Accuracy = 99.5978, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0144, Top-1 Accuracy = 99.6346, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0135, Top-1 Accuracy = 99.6730, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0132, Top-1 Accuracy = 99.6961, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0134, Top-1 Accuracy = 99.7037, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0139, Top-1 Accuracy = 99.6594, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0140, Top-1 Accuracy = 99.6590, Learning rate = 1.0000e-03
Epoch [266/300], Training Loss: 0.0140, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.61it/s]


Epoch [266/300], Validation Loss: 0.2897, Validation Accuracy: 92.99%


Iteration 0: Loss = 0.0174, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0141, Top-1 Accuracy = 99.6442, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0132, Top-1 Accuracy = 99.7279, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0136, Top-1 Accuracy = 99.7093, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0140, Top-1 Accuracy = 99.6805, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0138, Top-1 Accuracy = 99.6912, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0143, Top-1 Accuracy = 99.6750, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0142, Top-1 Accuracy = 99.6679, Learning rate = 1.0000e-03
Epoch [267/300], Training Loss: 0.0143, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.61it/s]


Epoch [267/300], Validation Loss: 0.2844, Validation Accuracy: 93.22%


Iteration 0: Loss = 0.0078, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0148, Top-1 Accuracy = 99.5978, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0155, Top-1 Accuracy = 99.6191, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0149, Top-1 Accuracy = 99.6418, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0151, Top-1 Accuracy = 99.6415, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0146, Top-1 Accuracy = 99.6538, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0144, Top-1 Accuracy = 99.6724, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0142, Top-1 Accuracy = 99.6768, Learning rate = 1.0000e-03
Epoch [268/300], Training Loss: 0.0140, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.50it/s]


Epoch [268/300], Validation Loss: 0.2897, Validation Accuracy: 93.13%


Iteration 0: Loss = 0.0052, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0122, Top-1 Accuracy = 99.7525, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0124, Top-1 Accuracy = 99.7512, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0127, Top-1 Accuracy = 99.7093, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0129, Top-1 Accuracy = 99.6883, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0132, Top-1 Accuracy = 99.6944, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0132, Top-1 Accuracy = 99.6906, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0131, Top-1 Accuracy = 99.7013, Learning rate = 1.0000e-03
Epoch [269/300], Training Loss: 0.0129, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.58it/s]


Epoch [269/300], Validation Loss: 0.2825, Validation Accuracy: 93.10%


Iteration 0: Loss = 0.0007, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0128, Top-1 Accuracy = 99.7061, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0130, Top-1 Accuracy = 99.7201, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0125, Top-1 Accuracy = 99.7145, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0129, Top-1 Accuracy = 99.7195, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0133, Top-1 Accuracy = 99.7100, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0136, Top-1 Accuracy = 99.6854, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0138, Top-1 Accuracy = 99.6679, Learning rate = 1.0000e-03
Epoch [270/300], Training Loss: 0.0140, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.47it/s]


Epoch [270/300], Validation Loss: 0.2857, Validation Accuracy: 93.24%


Iteration 0: Loss = 0.0023, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0132, Top-1 Accuracy = 99.6597, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0138, Top-1 Accuracy = 99.6813, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0141, Top-1 Accuracy = 99.6678, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0140, Top-1 Accuracy = 99.6688, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0141, Top-1 Accuracy = 99.6788, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0139, Top-1 Accuracy = 99.6880, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0140, Top-1 Accuracy = 99.6835, Learning rate = 1.0000e-03
Epoch [271/300], Training Loss: 0.0142, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.45it/s]


Epoch [271/300], Validation Loss: 0.2874, Validation Accuracy: 93.14%


Iteration 0: Loss = 0.0034, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0117, Top-1 Accuracy = 99.7834, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0134, Top-1 Accuracy = 99.7279, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0134, Top-1 Accuracy = 99.7353, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0134, Top-1 Accuracy = 99.7233, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0137, Top-1 Accuracy = 99.7131, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0137, Top-1 Accuracy = 99.7036, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0139, Top-1 Accuracy = 99.6879, Learning rate = 1.0000e-03
Epoch [272/300], Training Loss: 0.0140, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 43.89it/s]


Epoch [272/300], Validation Loss: 0.2870, Validation Accuracy: 93.13%


Iteration 0: Loss = 0.0824, Top-1 Accuracy = 95.3125, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0129, Top-1 Accuracy = 99.6597, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0128, Top-1 Accuracy = 99.6891, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0124, Top-1 Accuracy = 99.7041, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0121, Top-1 Accuracy = 99.7311, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0122, Top-1 Accuracy = 99.7255, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0124, Top-1 Accuracy = 99.7166, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0125, Top-1 Accuracy = 99.7192, Learning rate = 1.0000e-03
Epoch [273/300], Training Loss: 0.0125, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.31it/s]


Epoch [273/300], Validation Loss: 0.2845, Validation Accuracy: 93.18%


Iteration 0: Loss = 0.0205, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0116, Top-1 Accuracy = 99.7525, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0128, Top-1 Accuracy = 99.7124, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0130, Top-1 Accuracy = 99.6833, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0128, Top-1 Accuracy = 99.7233, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0129, Top-1 Accuracy = 99.7224, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0128, Top-1 Accuracy = 99.7296, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0127, Top-1 Accuracy = 99.7192, Learning rate = 1.0000e-03
Epoch [274/300], Training Loss: 0.0128, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.62it/s]


Epoch [274/300], Validation Loss: 0.2895, Validation Accuracy: 93.22%


Iteration 0: Loss = 0.0098, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0123, Top-1 Accuracy = 99.8453, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0126, Top-1 Accuracy = 99.7435, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0124, Top-1 Accuracy = 99.7508, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0127, Top-1 Accuracy = 99.7311, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0129, Top-1 Accuracy = 99.7131, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0129, Top-1 Accuracy = 99.7218, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0126, Top-1 Accuracy = 99.7303, Learning rate = 1.0000e-03
Epoch [275/300], Training Loss: 0.0128, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.10it/s]


Epoch [275/300], Validation Loss: 0.2902, Validation Accuracy: 93.27%
Checkpoint saved at epoch 274 with accuracy: 93.27%


Iteration 0: Loss = 0.0078, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0124, Top-1 Accuracy = 99.7679, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0145, Top-1 Accuracy = 99.6424, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0141, Top-1 Accuracy = 99.6522, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0145, Top-1 Accuracy = 99.6259, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0144, Top-1 Accuracy = 99.6445, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0140, Top-1 Accuracy = 99.6646, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0140, Top-1 Accuracy = 99.6634, Learning rate = 1.0000e-03
Epoch [276/300], Training Loss: 0.0139, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.47it/s]


Epoch [276/300], Validation Loss: 0.2869, Validation Accuracy: 93.16%


Iteration 0: Loss = 0.0042, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0124, Top-1 Accuracy = 99.7834, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0127, Top-1 Accuracy = 99.7823, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0128, Top-1 Accuracy = 99.7508, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0128, Top-1 Accuracy = 99.7506, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0130, Top-1 Accuracy = 99.7411, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0129, Top-1 Accuracy = 99.7426, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0128, Top-1 Accuracy = 99.7459, Learning rate = 1.0000e-03
Epoch [277/300], Training Loss: 0.0127, Learning Rate: 0.001000


100%|██████████| 157/157 [00:04<00:00, 34.41it/s]


Epoch [277/300], Validation Loss: 0.2860, Validation Accuracy: 93.30%
Checkpoint saved at epoch 276 with accuracy: 93.30%


Iteration 0: Loss = 0.0057, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0098, Top-1 Accuracy = 99.8453, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0119, Top-1 Accuracy = 99.7357, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0121, Top-1 Accuracy = 99.7197, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0120, Top-1 Accuracy = 99.7272, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0121, Top-1 Accuracy = 99.7255, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0123, Top-1 Accuracy = 99.7218, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0124, Top-1 Accuracy = 99.7102, Learning rate = 1.0000e-03
Epoch [278/300], Training Loss: 0.0125, Learning Rate: 0.001000


100%|██████████| 157/157 [00:04<00:00, 34.09it/s]


Epoch [278/300], Validation Loss: 0.2844, Validation Accuracy: 93.12%


Iteration 0: Loss = 0.0025, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0124, Top-1 Accuracy = 99.7215, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0134, Top-1 Accuracy = 99.6891, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0138, Top-1 Accuracy = 99.6366, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0136, Top-1 Accuracy = 99.6376, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0132, Top-1 Accuracy = 99.6601, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0132, Top-1 Accuracy = 99.6724, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0132, Top-1 Accuracy = 99.6746, Learning rate = 1.0000e-03
Epoch [279/300], Training Loss: 0.0134, Learning Rate: 0.001000


100%|██████████| 157/157 [00:04<00:00, 34.28it/s]


Epoch [279/300], Validation Loss: 0.2902, Validation Accuracy: 93.16%


Iteration 0: Loss = 0.0073, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0127, Top-1 Accuracy = 99.7061, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0125, Top-1 Accuracy = 99.6813, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0124, Top-1 Accuracy = 99.7041, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0124, Top-1 Accuracy = 99.7117, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0122, Top-1 Accuracy = 99.7224, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0126, Top-1 Accuracy = 99.7062, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0129, Top-1 Accuracy = 99.6991, Learning rate = 1.0000e-03
Epoch [280/300], Training Loss: 0.0128, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.25it/s]


Epoch [280/300], Validation Loss: 0.2837, Validation Accuracy: 93.20%


Iteration 0: Loss = 0.0269, Top-1 Accuracy = 98.4375, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0147, Top-1 Accuracy = 99.6132, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0136, Top-1 Accuracy = 99.6657, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0132, Top-1 Accuracy = 99.6885, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0132, Top-1 Accuracy = 99.6961, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0133, Top-1 Accuracy = 99.6944, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0131, Top-1 Accuracy = 99.7062, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0133, Top-1 Accuracy = 99.6991, Learning rate = 1.0000e-03
Epoch [281/300], Training Loss: 0.0134, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.27it/s]


Epoch [281/300], Validation Loss: 0.2855, Validation Accuracy: 93.27%


Iteration 0: Loss = 0.0092, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0116, Top-1 Accuracy = 99.6906, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0120, Top-1 Accuracy = 99.7357, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0115, Top-1 Accuracy = 99.7716, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0112, Top-1 Accuracy = 99.7779, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0111, Top-1 Accuracy = 99.7786, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0112, Top-1 Accuracy = 99.7816, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0115, Top-1 Accuracy = 99.7771, Learning rate = 1.0000e-03
Epoch [282/300], Training Loss: 0.0118, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.42it/s]


Epoch [282/300], Validation Loss: 0.2987, Validation Accuracy: 93.24%


Iteration 0: Loss = 0.0060, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0123, Top-1 Accuracy = 99.6906, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0120, Top-1 Accuracy = 99.7668, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0125, Top-1 Accuracy = 99.7249, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0126, Top-1 Accuracy = 99.7311, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0125, Top-1 Accuracy = 99.7100, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0130, Top-1 Accuracy = 99.6854, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0127, Top-1 Accuracy = 99.6946, Learning rate = 1.0000e-03
Epoch [283/300], Training Loss: 0.0132, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.33it/s]


Epoch [283/300], Validation Loss: 0.2903, Validation Accuracy: 93.21%


Iteration 0: Loss = 0.0542, Top-1 Accuracy = 98.4375, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0154, Top-1 Accuracy = 99.5514, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0152, Top-1 Accuracy = 99.5958, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0147, Top-1 Accuracy = 99.6055, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0147, Top-1 Accuracy = 99.6103, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0141, Top-1 Accuracy = 99.6257, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0139, Top-1 Accuracy = 99.6308, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0137, Top-1 Accuracy = 99.6411, Learning rate = 1.0000e-03
Epoch [284/300], Training Loss: 0.0137, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.46it/s]


Epoch [284/300], Validation Loss: 0.2873, Validation Accuracy: 93.18%


Iteration 0: Loss = 0.0078, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0110, Top-1 Accuracy = 99.7679, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0115, Top-1 Accuracy = 99.7357, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0112, Top-1 Accuracy = 99.7612, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0116, Top-1 Accuracy = 99.7428, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0119, Top-1 Accuracy = 99.7380, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0121, Top-1 Accuracy = 99.7244, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0122, Top-1 Accuracy = 99.7258, Learning rate = 1.0000e-03
Epoch [285/300], Training Loss: 0.0125, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.56it/s]


Epoch [285/300], Validation Loss: 0.2916, Validation Accuracy: 93.14%


Iteration 0: Loss = 0.0106, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0154, Top-1 Accuracy = 99.5978, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0128, Top-1 Accuracy = 99.6813, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0126, Top-1 Accuracy = 99.6782, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0128, Top-1 Accuracy = 99.6727, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0131, Top-1 Accuracy = 99.6632, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0129, Top-1 Accuracy = 99.6646, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0127, Top-1 Accuracy = 99.6746, Learning rate = 1.0000e-03
Epoch [286/300], Training Loss: 0.0127, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.30it/s]


Epoch [286/300], Validation Loss: 0.2863, Validation Accuracy: 93.36%
Checkpoint saved at epoch 285 with accuracy: 93.36%


Iteration 0: Loss = 0.0058, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0127, Top-1 Accuracy = 99.7061, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0125, Top-1 Accuracy = 99.7201, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0126, Top-1 Accuracy = 99.7145, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0122, Top-1 Accuracy = 99.7195, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0123, Top-1 Accuracy = 99.7162, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0125, Top-1 Accuracy = 99.7114, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0124, Top-1 Accuracy = 99.7214, Learning rate = 1.0000e-03
Epoch [287/300], Training Loss: 0.0125, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.49it/s]


Epoch [287/300], Validation Loss: 0.2861, Validation Accuracy: 93.29%


Iteration 0: Loss = 0.0192, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0109, Top-1 Accuracy = 99.7989, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0119, Top-1 Accuracy = 99.7746, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0113, Top-1 Accuracy = 99.7924, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0116, Top-1 Accuracy = 99.7701, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0117, Top-1 Accuracy = 99.7443, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0117, Top-1 Accuracy = 99.7582, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0114, Top-1 Accuracy = 99.7704, Learning rate = 1.0000e-03
Epoch [288/300], Training Loss: 0.0115, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.66it/s]


Epoch [288/300], Validation Loss: 0.2890, Validation Accuracy: 93.27%


Iteration 0: Loss = 0.0046, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0131, Top-1 Accuracy = 99.7215, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0127, Top-1 Accuracy = 99.7201, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0126, Top-1 Accuracy = 99.7093, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0126, Top-1 Accuracy = 99.7117, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0128, Top-1 Accuracy = 99.6944, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0127, Top-1 Accuracy = 99.6958, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0127, Top-1 Accuracy = 99.7013, Learning rate = 1.0000e-03
Epoch [289/300], Training Loss: 0.0126, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.35it/s]


Epoch [289/300], Validation Loss: 0.2845, Validation Accuracy: 93.33%


Iteration 0: Loss = 0.0065, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0133, Top-1 Accuracy = 99.7525, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0123, Top-1 Accuracy = 99.7668, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0121, Top-1 Accuracy = 99.7664, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0119, Top-1 Accuracy = 99.7779, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0116, Top-1 Accuracy = 99.7786, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0118, Top-1 Accuracy = 99.7712, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0118, Top-1 Accuracy = 99.7593, Learning rate = 1.0000e-03
Epoch [290/300], Training Loss: 0.0118, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.39it/s]


Epoch [290/300], Validation Loss: 0.2919, Validation Accuracy: 93.17%


Iteration 0: Loss = 0.0083, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0107, Top-1 Accuracy = 99.7525, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0113, Top-1 Accuracy = 99.7590, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0120, Top-1 Accuracy = 99.7145, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0122, Top-1 Accuracy = 99.6883, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0123, Top-1 Accuracy = 99.6788, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0121, Top-1 Accuracy = 99.6906, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0121, Top-1 Accuracy = 99.6924, Learning rate = 1.0000e-03
Epoch [291/300], Training Loss: 0.0121, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.33it/s]


Epoch [291/300], Validation Loss: 0.2885, Validation Accuracy: 93.16%


Iteration 0: Loss = 0.0118, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0103, Top-1 Accuracy = 99.7989, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0112, Top-1 Accuracy = 99.7823, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0110, Top-1 Accuracy = 99.7924, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0110, Top-1 Accuracy = 99.7857, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0113, Top-1 Accuracy = 99.7723, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0117, Top-1 Accuracy = 99.7556, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0118, Top-1 Accuracy = 99.7481, Learning rate = 1.0000e-03
Epoch [292/300], Training Loss: 0.0120, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.46it/s]


Epoch [292/300], Validation Loss: 0.2929, Validation Accuracy: 93.16%


Iteration 0: Loss = 0.0052, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0119, Top-1 Accuracy = 99.8144, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0112, Top-1 Accuracy = 99.8290, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0120, Top-1 Accuracy = 99.7612, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0117, Top-1 Accuracy = 99.7779, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0125, Top-1 Accuracy = 99.7380, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0123, Top-1 Accuracy = 99.7348, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0123, Top-1 Accuracy = 99.7414, Learning rate = 1.0000e-03
Epoch [293/300], Training Loss: 0.0121, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.39it/s]


Epoch [293/300], Validation Loss: 0.2950, Validation Accuracy: 93.18%


Iteration 0: Loss = 0.0088, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0113, Top-1 Accuracy = 99.7834, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0126, Top-1 Accuracy = 99.7124, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0125, Top-1 Accuracy = 99.7249, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0121, Top-1 Accuracy = 99.7506, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0119, Top-1 Accuracy = 99.7505, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0118, Top-1 Accuracy = 99.7530, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0119, Top-1 Accuracy = 99.7504, Learning rate = 1.0000e-03
Epoch [294/300], Training Loss: 0.0121, Learning Rate: 0.001000


100%|██████████| 157/157 [00:04<00:00, 32.97it/s]


Epoch [294/300], Validation Loss: 0.2923, Validation Accuracy: 93.08%


Iteration 0: Loss = 0.0026, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0100, Top-1 Accuracy = 99.8298, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0118, Top-1 Accuracy = 99.7668, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0121, Top-1 Accuracy = 99.7249, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0125, Top-1 Accuracy = 99.7117, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0123, Top-1 Accuracy = 99.7255, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0121, Top-1 Accuracy = 99.7296, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0119, Top-1 Accuracy = 99.7437, Learning rate = 1.0000e-03
Epoch [295/300], Training Loss: 0.0120, Learning Rate: 0.001000


100%|██████████| 157/157 [00:04<00:00, 34.08it/s]


Epoch [295/300], Validation Loss: 0.2907, Validation Accuracy: 93.09%


Iteration 0: Loss = 0.0038, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0126, Top-1 Accuracy = 99.7061, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0119, Top-1 Accuracy = 99.7201, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0112, Top-1 Accuracy = 99.7664, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0117, Top-1 Accuracy = 99.7428, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0114, Top-1 Accuracy = 99.7505, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0115, Top-1 Accuracy = 99.7478, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0115, Top-1 Accuracy = 99.7593, Learning rate = 1.0000e-03
Epoch [296/300], Training Loss: 0.0115, Learning Rate: 0.001000


100%|██████████| 157/157 [00:04<00:00, 34.15it/s]


Epoch [296/300], Validation Loss: 0.2914, Validation Accuracy: 93.25%


Iteration 0: Loss = 0.0058, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0120, Top-1 Accuracy = 99.7834, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0109, Top-1 Accuracy = 99.8057, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0118, Top-1 Accuracy = 99.7924, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0117, Top-1 Accuracy = 99.7896, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0125, Top-1 Accuracy = 99.7599, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0123, Top-1 Accuracy = 99.7556, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0121, Top-1 Accuracy = 99.7570, Learning rate = 1.0000e-03
Epoch [297/300], Training Loss: 0.0120, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 43.87it/s]


Epoch [297/300], Validation Loss: 0.2931, Validation Accuracy: 93.12%


Iteration 0: Loss = 0.0475, Top-1 Accuracy = 98.4375, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0112, Top-1 Accuracy = 99.7834, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0108, Top-1 Accuracy = 99.7823, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0108, Top-1 Accuracy = 99.7820, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0108, Top-1 Accuracy = 99.7818, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0115, Top-1 Accuracy = 99.7630, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0114, Top-1 Accuracy = 99.7582, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0115, Top-1 Accuracy = 99.7615, Learning rate = 1.0000e-03
Epoch [298/300], Training Loss: 0.0115, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 43.32it/s]


Epoch [298/300], Validation Loss: 0.2916, Validation Accuracy: 93.03%


Iteration 0: Loss = 0.0257, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0117, Top-1 Accuracy = 99.7679, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0109, Top-1 Accuracy = 99.7979, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0113, Top-1 Accuracy = 99.7664, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0113, Top-1 Accuracy = 99.7662, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0116, Top-1 Accuracy = 99.7661, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0117, Top-1 Accuracy = 99.7712, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0119, Top-1 Accuracy = 99.7615, Learning rate = 1.0000e-03
Epoch [299/300], Training Loss: 0.0120, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 43.70it/s]


Epoch [299/300], Validation Loss: 0.2915, Validation Accuracy: 93.13%


Iteration 0: Loss = 0.0128, Top-1 Accuracy = 100.0000, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0126, Top-1 Accuracy = 99.6597, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0117, Top-1 Accuracy = 99.7279, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0120, Top-1 Accuracy = 99.7197, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0115, Top-1 Accuracy = 99.7506, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0114, Top-1 Accuracy = 99.7536, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0115, Top-1 Accuracy = 99.7530, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0114, Top-1 Accuracy = 99.7526, Learning rate = 1.0000e-03
Epoch [300/300], Training Loss: 0.0115, Learning Rate: 0.001000


100%|██████████| 157/157 [00:03<00:00, 44.05it/s]

Epoch [300/300], Validation Loss: 0.2934, Validation Accuracy: 93.04%
Training finished.





In [34]:
optimizer.param_groups[0]['lr']

0.001

In [35]:
wandb.finish()

0,1
Training Accuracy,▆▆▇▇▇▇▇▁▇▇▇▇▇▇▇▇▇▇▇▇▇█▇█▇▇▇▇▇█▇▇▇██▇████
Training Loss,██▃█▃▇▇▇▅▆▆▇▆▆▇▅▆▆▆▅▅▅▅▆▆▅▅▇▅▄▅▅▅▆▅▅▁▅▁▅
Validation Accuracy,▄▃▂▄▄▂▁▅▃▆▃▃▆▁▄▆▄▃▆▄▅▆▅▇▄▅▆▆▅▄▇▇█▅▅▃▃▄▂▂
Validation Loss,▁▂▁▂▃▃▂▅▄▃▂▄▆▃▄▄▅▃▄▅▅▅▄▄▄▃▄█▅▅▄▅▄▆▇▆▆▆▆▆

0,1
Training Accuracy,99.75259
Training Loss,0.01143
Validation Accuracy,93.04
Validation Loss,0.29341


## 3. Testing

In [36]:
saved_checkpoint_path = args.training.save_model_path + '/densenet_bc_40_12_cifar10_baseline_best.pth'
saved_checkpoint_path

'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth'

In [37]:
model_test = create_model(args.model.type,
                          layers=args.model.layers,
                          growth=args.model.growth,
                          compression=args.model.compression,
                          bottleneck=args.model.bottleneck,
                          drop_rate=args.model.drop_rate).to(device)

In [38]:
checkpoint = torch.load(saved_checkpoint_path, map_location=device)
model_test.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [43]:
val_loss, acc = validate(model, val_loader, criterion, device=device)
print(f"Test on DenseNet-BC-40-12, Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc:.2f}%")

100%|██████████| 157/157 [00:03<00:00, 44.20it/s]

Test on DenseNet-BC-40-12, Validation Loss: 0.2934, Validation Accuracy: 93.04%



