## Import

In [1]:
import os
import random

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import wandb

In [3]:
from neumeta.models import create_model_cifar10 as create_model
from neumeta.utils import (
    parse_args, print_omegaconf,
    load_checkpoint, save_checkpoint,
    set_seed,
    get_cifar10, 
    sample_coordinates, sample_subset, shuffle_coordinates_all,
    get_hypernetwork, get_optimizer, 
    sample_weights,
    weighted_regression_loss, validate_single, AverageMeter, EMA,
    sample_single_model, sample_merge_model,
)

## Functions

### 1 Find maximum dimension of the model

In [4]:
def find_max_dim(model_cls):
    """Find maximum dimension of the model"""
    # Get the learnable parameters of the model
    checkpoint = model_cls.learnable_parameter 

    # Set the maximum value to the length of the checkpoint
    max_value = len(checkpoint)

    # Iterate over the new model's weight
    for i, (k, tensor) in enumerate(checkpoint.items()):
        # Handle 2D tensors (e.g., weight matrices) 
        if len(tensor.shape) == 4:
            coords = [tensor.shape[0], tensor.shape[1]]
            max_value = max(max_value, max(coords))
        # Handle 1D tensors (e.g., biases)
        elif len(tensor.shape) == 1:
            max_value = max(max_value, tensor.shape[0])
    
    return max_value

### 2 Initialize wandb

In [5]:
def initialize_wandb(config):
    import time
    """
    Initializes Weights and Biases (wandb) with the given configuration.
    
    Args:
        configuration (dict): Configuration parameters for the run.
    """
    # Name the run using current time and configuration name
    run_name = f"{time.strftime('%Y%m%d%H%M%S')}-{config.experiment.name}"
    
    wandb.init(project="ninr-trial", name=run_name, config=dict(config), group='cifar10')

### 3 Initialize model dictionary

In [6]:
def init_model_dict(args, device):
    """
    Initializes a dictionary of models for each dimension in the given range, along with ground truth models for the starting dimension.

    Args:
        args: An object containing the arguments for initializing the models.

    Returns:
        dim_dict: A dictionary containing the models for each dimension, along with their corresponding coordinates, keys, indices, size, and ground truth models.
        gt_model_dict: A dictionary containing the ground truth models for the starting dimension.
    """
    dim_dict = {}
    gt_model_dict = {}
    
    # Create a model for each dimension in dimensions range
    for dim in args.dimensions.range:
        model_cls = create_model(args.model.type,
                                 hidden_dim=dim,
                                 path=args.model.pretrained_path,
                                 smooth=args.model.smooth).to(device)
        # Sample the coordinates, keys, indices, and the size for the model
        coords_tensor, keys_list, indices_list, size_list = sample_coordinates(model_cls)
        # Add the model, coordinates, keys, indices, size, and key mask to the dictionary
        dim_dict[f"{dim}"] = (model_cls, coords_tensor, keys_list, indices_list, size_list, None)

        # Print to makes line better
        print('\n')
        
        # If the dimension is the starting dimension (the dimension of pretrained_model), add the ground truth model to the dictionary
        if dim == args.dimensions.start:
            print(f"Loading model for dim {dim}")
            model_trained = create_model(args.model.type, 
                                         hidden_dim=dim, 
                                         path=args.model.pretrained_path, 
                                         smooth=True).to(device)
            model_trained.eval()
            gt_model_dict[f'{dim}'] = model_trained

    
    return dim_dict, gt_model_dict

### 4 Training function for target model of a random dimension

In [7]:
# Function to train the model for one epoch
def train_one_epoch(model, train_loader, optimizer, criterion, dim_dict, gt_model_dict, epoch_idx, ema=None, args=None, device='cpu'):
    # Set the model to training mode
    model.train()
    total_loss = 0.0

    # Initialize AverageMeter objects to track the losses
    losses = AverageMeter()
    cls_losses = AverageMeter()
    reg_losses = AverageMeter()
    reconstruct_losses = AverageMeter()

    # Iterate over the training data
    for batch_idx, (x, target) in enumerate(train_loader):
        # Zero the gradients
        optimizer.zero_grad()

        # Preprocess input
        # ------------------------------------------------------------------------------------------------------
        # Move the data to the device
        x, target = x.to(device), target.to(device)
        # Choose a random hidden dimension
        hidden_dim = random.choice(args.dimensions.range)
        # Get the model class, coordinates, keys, indices, size, and key mask for the chosen dimension
        model_cls, coords_tensor, keys_list, indices_list, size_list, key_mask = dim_dict[f"{hidden_dim}"]
        # Sample a subset the input tensor of the coordinates, keys, indices, size, and selected keys
        coords_tensor, keys_list, indices_list, size_list, selected_keys = sample_subset(coords_tensor,
                                                                                         keys_list,
                                                                                         indices_list,
                                                                                         size_list,
                                                                                         key_mask,
                                                                                         ratio=args.ratio)
        # Add noise to the coordinates if specified
        if args.training.coordinate_noise > 0.0:
            coords_tensor = coords_tensor + (torch.rand_like(coords_tensor) - 0.5) * args.training.coordinate_noise


        # Main task of hypernetwork and target network
        # ------------------------------------------------------------------------------------------------------
        # Sample the weights for the target model using hypernetwork
        model_cls, reconstructed_weights = sample_weights(model, model_cls,
                                                          coords_tensor, keys_list, indices_list, size_list, key_mask, selected_keys,
                                                          device=device, NORM=args.dimensions.norm)
        # Forward pass
        predict = model_cls(x)


        # Compute losses
        # ------------------------------------------------------------------------------------------------------
        # Compute classification loss
        cls_loss = criterion(predict, target) 
        # Compute regularization loss
        reg_loss = sum([torch.norm(w, p=2) for w in reconstructed_weights])
        # Compute reconstruction loss if ground truth model is available
        if f"{hidden_dim}" in gt_model_dict:
            gt_model = gt_model_dict[f"{hidden_dim}"]
            gt_selected_weights = [
                w for k, w in gt_model.learnable_parameter.items() if k in selected_keys]

            reconstruct_loss = weighted_regression_loss(
                reconstructed_weights, gt_selected_weights)
        else:
            reconstruct_loss = torch.tensor(0.0)
        # Compute the total loss
        loss = args.hyper_model.loss_weight.ce_weight * cls_loss + args.hyper_model.loss_weight.reg_weight * \
            reg_loss + args.hyper_model.loss_weight.recon_weight * reconstruct_loss


        # Compute gradients and update weights
        # ------------------------------------------------------------------------------------------------------
        # Zero the gradients of the updated weights
        for updated_weight in model_cls.parameters():
            updated_weight.grad = None

        # Compute the gradients of the reconstructed weights
        loss.backward(retain_graph=True)
        torch.autograd.backward(reconstructed_weights, [
                                w.grad for k, w in model_cls.named_parameters() if k in selected_keys])
        
        # Clip the gradients if specified
        if args.training.get('clip_grad', 0.0) > 0:
            torch.nn.utils.clip_grad_value_(
                model.parameters(), args.training.clip_grad)
            
        # Update the weights
        optimizer.step()

        # Update the EMA if specified
        if ema:
            ema.update()  # Update the EMA after each training step
        total_loss += loss.item()

        # Update the AverageMeter objects
        losses.update(loss.item())
        cls_losses.update(cls_loss.item())
        reg_losses.update(reg_loss.item())
        reconstruct_losses.update(reconstruct_loss.item())


        # Log (or plot) losses
        # ------------------------------------------------------------------------------------------------------
        # Log the losses and learning rate to wandb
        if batch_idx % args.experiment.log_interval == 0:
            wandb.log({
                "Loss": losses.avg,
                "Cls Loss": cls_losses.avg,
                "Reg Loss": reg_losses.avg,
                "Reconstruct Loss": reconstruct_losses.avg,
                "Learning rate": optimizer.param_groups[0]['lr']
            }, step=batch_idx + epoch_idx * len(train_loader))
            # Print the losses and learning rate
            print(
                f"Iteration {batch_idx}: Loss = {losses.avg:.4f}, Reg Loss = {reg_losses.avg:.4f}, Reconstruct Loss = {reconstruct_losses.avg:.4f}, Cls Loss = {cls_losses.avg:.4f}, Learning rate = {optimizer.param_groups[0]['lr']:.4e}")
    
    # Returns the training loss, structure of network in each dimension, and the original structure of pretrained network
    return losses.avg, dim_dict, gt_model_dict

## Main

### 0 Set device to GPU

In [8]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

### 1 Parsing arguments for inputs

In [9]:
CONFIG_PATH = 'neumeta/config/resnet20_cifar10_32-64v3_200e-coordnoise-resmlpv2_smooth_5_256_16_smooth_in_code.yaml'
RATIO = '1.0'
CHECKPOINT_PATH = 'toy/experiments/resnet20_cifar10_32-64-4layer-200e-noisecoord-resmlpv2_smooth_5_256_16_smooth_in_code/cifar10_nerf_best.pth'

In [10]:
argv_train = ['--config', CONFIG_PATH, '--ratio', RATIO, '--resume_from', CHECKPOINT_PATH]

In [11]:
args = parse_args(argv_train)  # Parse arguments
print_omegaconf(args)  # Print arguments

Loading base config from toy/experiments/base_config.yaml
+--------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------+
|                 Key                  |                                                                Value                                                                 |
+--------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------+
|           experiment.name            |                      ninr_resnet20_cifar10_32-64-4layer-200e-noisecoord-resmlpv2_smooth_5_256_16_smooth_in_code                      |
|         experiment.recononly         |                                                                  0                                                                   |
|        experiment.num_epochs         |                      

In [12]:
set_seed(args.experiment.seed)

Setting seed... 42 for reproducibility


### 2 Get training and validation data (in dataloader format)

In [13]:
train_loader, val_loader = get_cifar10(args.training.batch_size, strong_transform=args.training.get('strong_aug', None))

### 3 Create target model

#### 3.0 Create the model

In [14]:
model = create_model(args.model.type,
                     hidden_dim=args.dimensions.start,
                     path=args.model.pretrained_path,
                     smooth=args.model.smooth).to(device)

Replace the last 2 block of layer3 with new block with hidden dim 64
Loading pretrained weights for resnet20


#### 3.1 Print the structure and shape of the model

In [15]:
model

CifarResNet(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): Identity()
  (relu): ReLU(inplace=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn1): Identity()
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn2): Identity()
    )
    (1): BasicBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn1): Identity()
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn2): Identity()
    )
    (2): BasicBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn1): Identity()
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn2): Identity()
    )
  )
  (layer2): Sequential(


In [16]:
for i, (k, tensor) in enumerate(model.learnable_parameter.items()):
    print(k, tensor.shape)

layer3.2.conv1.weight torch.Size([64, 64, 3, 3])
layer3.2.conv1.bias torch.Size([64])
layer3.2.conv2.weight torch.Size([64, 64, 3, 3])
layer3.2.conv2.bias torch.Size([64])


#### 3.2 The maximum dimension of the target model

In [17]:
# Print the maximum dimension of the model
print(f'Maximum DIM: {find_max_dim(model)}')

Maximum DIM: 64


#### 3.3 Validate the accuracy of pretrained network

In [18]:
# Validate the model for the starting dimension (its pretrained form)
val_loss, acc = validate_single(model, val_loader, nn.CrossEntropyLoss(), args=args)
print(f'Initial Permutated model Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc * 100:.2f}%')

100%|██████████| 157/157 [00:02<00:00, 72.52it/s]

Initial Permutated model Validation Loss: 0.2825, Validation Accuracy: 92.60%





In [19]:
# Get the learnable parameters of the model
checkpoint = model.learnable_parameter
# Get the number of parameters
number_param = len(checkpoint)

In [20]:
# Print the keys of the parameters and the number of parameters
print(f"Parameters keys: {model.keys}")
print(f"Number of parameters to be learned: {number_param}")

Parameters keys: ['layer3.2.conv1.weight', 'layer3.2.conv1.bias', 'layer3.2.conv2.weight', 'layer3.2.conv2.bias']
Number of parameters to be learned: 4


### 4 Create the hypernetwork

#### 4.0 Create the model

In [21]:
# Get the hypermodel
hyper_model = get_hypernetwork(args, number_param)

Hyper model type: resmlp
Using scalar 0.1
num_freqs:  16 <class 'int'>


#### 4.1 Print model structure

In [22]:
hyper_model

NeRF_ResMLP_Compose(
  (positional_encoding): PositionalEncoding()
  (model): ModuleList(
    (0-3): 4 x NeRF_MLP_Residual_Scaled(
      (initial_layer): Linear(in_features=198, out_features=256, bias=True)
      (residual_blocks): ModuleList(
        (0-3): 4 x Linear(in_features=256, out_features=256, bias=True)
      )
      (scalars): ParameterList(
          (0): Parameter containing: [torch.float32 of size  (cuda:0)]
          (1): Parameter containing: [torch.float32 of size  (cuda:0)]
          (2): Parameter containing: [torch.float32 of size  (cuda:0)]
          (3): Parameter containing: [torch.float32 of size  (cuda:0)]
      )
      (act): ReLU(inplace=True)
      (output_layer): Linear(in_features=256, out_features=9, bias=True)
    )
  )
)

#### 4.2 Initialize EMA to track only a smooth version of the model weight

In [23]:
# Initialize the EMA
ema = EMA(hyper_model, decay=args.hyper_model.ema_decay)

### 5 Get Loss function, Optimizer, and Scheduler

In [24]:
criterion, val_criterion, optimizer, scheduler = get_optimizer(args, hyper_model)

In [25]:
print(f'Criterion: {criterion}\nVal_criterion: {val_criterion}\nOptimizer: {optimizer}\nScheduler: {scheduler}')

Criterion: CrossEntropyLoss()
Val_criterion: CrossEntropyLoss()
Optimizer: AdamW (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    initial_lr: 0.001
    lr: 0.001
    maximize: False
    weight_decay: 0.01
)
Scheduler: <torch.optim.lr_scheduler.MultiStepLR object at 0x0000020BBF087450>


### 6 Training loop

#### 6.1 Initialize training parameters

In [26]:
# Initialize the starting epoch and best accuracy
start_epoch = 0
best_acc = 0.0

#### 6.2 Directory to save the model

In [27]:
# Create the directory to save the model
os.makedirs(args.training.save_model_path, exist_ok=True)

#### 6.3 Resume training loop

In [28]:
if args.resume_from:
        print(f"Resuming from checkpoint: {args.resume_from}")
        checkpoint_info = load_checkpoint(args.resume_from, hyper_model, optimizer, ema)
        start_epoch = checkpoint_info['epoch']
        best_acc = checkpoint_info['best_acc']
        print(f"Resuming from epoch: {start_epoch}, best accuracy: {best_acc*100:.2f}%")
        # Note: If there are more elements to retrieve, do so here.

Resuming from checkpoint: toy/experiments/resnet20_cifar10_32-64-4layer-200e-noisecoord-resmlpv2_smooth_5_256_16_smooth_in_code/cifar10_nerf_best.pth
Resuming from epoch: 88, best accuracy: 91.35%


#### 6.4 Initialize wandb for plotting

In [29]:
# Initialize wandb
initialize_wandb(args)

[34m[1mwandb[0m: Currently logged in as: [33mefradosuryadi[0m ([33mefradosuryadi-universitas-indonesia[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


#### 6.5 Initialize model dictionary for each dimension and shuffle it

In [30]:
# Initialize model dictionary
dim_dict, gt_model_dict = init_model_dict(args, device)

Replace the last 2 block of layer3 with new block with hidden dim 32
Loading pretrained weights for resnet20


Replace the last 2 block of layer3 with new block with hidden dim 33
Loading pretrained weights for resnet20


Replace the last 2 block of layer3 with new block with hidden dim 34
Loading pretrained weights for resnet20


Replace the last 2 block of layer3 with new block with hidden dim 35
Loading pretrained weights for resnet20


Replace the last 2 block of layer3 with new block with hidden dim 36
Loading pretrained weights for resnet20


Replace the last 2 block of layer3 with new block with hidden dim 37
Loading pretrained weights for resnet20


Replace the last 2 block of layer3 with new block with hidden dim 38
Loading pretrained weights for resnet20


Replace the last 2 block of layer3 with new block with hidden dim 39
Loading pretrained weights for resnet20


Replace the last 2 block of layer3 with new block with hidden dim 40
Loading pretrained weights for resnet20


R

In [31]:
gt_model_dict

{'64': CifarResNet(
   (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
   (bn1): Identity()
   (relu): ReLU(inplace=True)
   (layer1): Sequential(
     (0): BasicBlock(
       (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
       (bn1): Identity()
       (relu): ReLU(inplace=True)
       (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
       (bn2): Identity()
     )
     (1): BasicBlock(
       (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
       (bn1): Identity()
       (relu): ReLU(inplace=True)
       (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
       (bn2): Identity()
     )
     (2): BasicBlock(
       (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
       (bn1): Identity()
       (relu): ReLU(inplace=True)
       (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
       (bn2): Identity()
   

In [32]:
dim_dict

{'32': (CifarResNet(
    (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn1): Identity()
    (relu): ReLU(inplace=True)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (bn1): Identity()
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (bn2): Identity()
      )
      (1): BasicBlock(
        (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (bn1): Identity()
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (bn2): Identity()
      )
      (2): BasicBlock(
        (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (bn1): Identity()
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    

In [33]:
dim_dict = shuffle_coordinates_all(dim_dict)
dim_dict

{'32': (CifarResNet(
    (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn1): Identity()
    (relu): ReLU(inplace=True)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (bn1): Identity()
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (bn2): Identity()
      )
      (1): BasicBlock(
        (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (bn1): Identity()
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (bn2): Identity()
      )
      (2): BasicBlock(
        (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (bn1): Identity()
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    

#### 6.6 Hypernetwork training loop

In [34]:
args.experiment.num_epochs

200

In [35]:
# Iterate over the epochs
for epoch in range(start_epoch, args.experiment.num_epochs):
    # Train the hypernetwork to generate a model with random dimension for one epoch
    train_loss, dim_dict, gt_model_dict = train_one_epoch(hyper_model, train_loader, optimizer, criterion, 
                                                          dim_dict, gt_model_dict, epoch_idx=epoch, ema=ema, 
                                                          args=args, device=device)
    # Step the scheduler
    scheduler.step()

    # Print the training loss and learning rate
    print(f"Epoch [{epoch+1}/{args.experiment.num_epochs}], Training Loss: {train_loss:.4f}, Learning Rate: {scheduler.get_last_lr()[0]:.6f}")

    # If it's time to evaluate the model
    if (epoch + 1) % args.experiment.eval_interval == 0:
        # Apply EMA if it is specified
        if ema:
            ema.apply()  # Save the weights of original model created before training_loop
        
        # Sample the merged model (create model of same structure before training loop by using the hypernetwork)
        # And then test the performance of the hypernetwork by seeing how good it is in generating the weights
        model = sample_merge_model(hyper_model, model, args) 
        # Validate the merged model
        val_loss, acc = validate_single(model, val_loader, val_criterion, args=args)

        # If EMA is specified, restore the original weights
        if ema:
            ema.restore()  # Restore the original weights to the weights of the pretrained networks

        # Log the validation loss and accuracy to wandb
        wandb.log({
            "Validation Loss": val_loss,
            "Validation Accuracy": acc
        })
        # Print the validation loss and accuracy
        print(f"Epoch [{epoch+1}/{args.experiment.num_epochs}], Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc*100:.2f}%")
        print('\n\n')

        # Save the checkpoint if the accuracy is better than the previous best
        if acc > best_acc:
            best_acc = acc
            save_checkpoint(f"{args.training.save_model_path}/cifar10_nerf_best.pth",hyper_model,optimizer,ema,epoch,best_acc)
            print(f"Checkpoint saved at epoch {epoch} with accuracy: {best_acc*100:.2f}%")


Iteration 0: Loss = 0.0920, Reg Loss = 69.5818, Reconstruct Loss = 0.0000, Cls Loss = 0.0851, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0906, Reg Loss = 71.7286, Reconstruct Loss = 0.0008, Cls Loss = 0.0825, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0936, Reg Loss = 72.1495, Reconstruct Loss = 0.0013, Cls Loss = 0.0851, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0938, Reg Loss = 72.2022, Reconstruct Loss = 0.0014, Cls Loss = 0.0852, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0935, Reg Loss = 72.3943, Reconstruct Loss = 0.0013, Cls Loss = 0.0849, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0942, Reg Loss = 72.4242, Reconstruct Loss = 0.0013, Cls Loss = 0.0856, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0933, Reg Loss = 72.3629, Reconstruct Loss = 0.0011, Cls Loss = 0.0849, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0930, Reg Loss = 72.2855, Reconstruct Loss = 0.0010, Cls Loss = 0.0848, Learning rate = 1.0000e-03
Epoch [89/200], Tr

100%|██████████| 157/157 [00:02<00:00, 72.47it/s]


Epoch [89/200], Validation Loss: 0.3774, Validation Accuracy: 91.00%



Iteration 0: Loss = 0.0394, Reg Loss = 74.8093, Reconstruct Loss = 0.0000, Cls Loss = 0.0319, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0877, Reg Loss = 73.3926, Reconstruct Loss = 0.0017, Cls Loss = 0.0786, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0887, Reg Loss = 73.8328, Reconstruct Loss = 0.0011, Cls Loss = 0.0803, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0860, Reg Loss = 74.1228, Reconstruct Loss = 0.0010, Cls Loss = 0.0776, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0880, Reg Loss = 74.0819, Reconstruct Loss = 0.0011, Cls Loss = 0.0794, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0894, Reg Loss = 73.4136, Reconstruct Loss = 0.0012, Cls Loss = 0.0808, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0893, Reg Loss = 72.8440, Reconstruct Loss = 0.0014, Cls Loss = 0.0806, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0876, Reg Loss = 72.7685, Reconstruct Loss = 

100%|██████████| 157/157 [00:02<00:00, 72.74it/s]


Epoch [90/200], Validation Loss: 0.4001, Validation Accuracy: 91.25%



Iteration 0: Loss = 0.0720, Reg Loss = 70.1427, Reconstruct Loss = 0.0000, Cls Loss = 0.0650, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0988, Reg Loss = 70.8984, Reconstruct Loss = 0.0011, Cls Loss = 0.0906, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0897, Reg Loss = 71.5393, Reconstruct Loss = 0.0021, Cls Loss = 0.0805, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0915, Reg Loss = 71.4452, Reconstruct Loss = 0.0018, Cls Loss = 0.0826, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0918, Reg Loss = 71.3091, Reconstruct Loss = 0.0018, Cls Loss = 0.0829, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0911, Reg Loss = 71.4304, Reconstruct Loss = 0.0017, Cls Loss = 0.0823, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0901, Reg Loss = 71.8699, Reconstruct Loss = 0.0016, Cls Loss = 0.0814, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0904, Reg Loss = 71.7993, Reconstruct Loss = 

100%|██████████| 157/157 [00:02<00:00, 73.97it/s]


Epoch [91/200], Validation Loss: 0.3891, Validation Accuracy: 91.00%



Iteration 0: Loss = 0.0695, Reg Loss = 71.0154, Reconstruct Loss = 0.0000, Cls Loss = 0.0624, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0842, Reg Loss = 72.4114, Reconstruct Loss = 0.0014, Cls Loss = 0.0756, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0954, Reg Loss = 72.6058, Reconstruct Loss = 0.0013, Cls Loss = 0.0868, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0874, Reg Loss = 72.6093, Reconstruct Loss = 0.0012, Cls Loss = 0.0790, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0864, Reg Loss = 72.1131, Reconstruct Loss = 0.0013, Cls Loss = 0.0779, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0884, Reg Loss = 72.1351, Reconstruct Loss = 0.0014, Cls Loss = 0.0799, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0883, Reg Loss = 72.1937, Reconstruct Loss = 0.0012, Cls Loss = 0.0799, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0898, Reg Loss = 72.0953, Reconstruct Loss = 

100%|██████████| 157/157 [00:03<00:00, 40.51it/s]


Epoch [92/200], Validation Loss: 0.4083, Validation Accuracy: 90.96%



Iteration 0: Loss = 0.0266, Reg Loss = 68.1176, Reconstruct Loss = 0.0000, Cls Loss = 0.0198, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0753, Reg Loss = 72.5300, Reconstruct Loss = 0.0004, Cls Loss = 0.0676, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0833, Reg Loss = 72.5853, Reconstruct Loss = 0.0012, Cls Loss = 0.0748, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0912, Reg Loss = 72.9726, Reconstruct Loss = 0.0012, Cls Loss = 0.0827, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0900, Reg Loss = 72.8635, Reconstruct Loss = 0.0012, Cls Loss = 0.0815, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0888, Reg Loss = 72.8937, Reconstruct Loss = 0.0012, Cls Loss = 0.0804, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0875, Reg Loss = 72.8860, Reconstruct Loss = 0.0014, Cls Loss = 0.0788, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0873, Reg Loss = 72.8726, Reconstruct Loss = 

100%|██████████| 157/157 [00:03<00:00, 41.10it/s]


Epoch [93/200], Validation Loss: 0.3929, Validation Accuracy: 91.16%



Iteration 0: Loss = 0.0481, Reg Loss = 70.6698, Reconstruct Loss = 0.0000, Cls Loss = 0.0410, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0826, Reg Loss = 74.4449, Reconstruct Loss = 0.0023, Cls Loss = 0.0728, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0892, Reg Loss = 74.3323, Reconstruct Loss = 0.0022, Cls Loss = 0.0796, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0850, Reg Loss = 74.1667, Reconstruct Loss = 0.0022, Cls Loss = 0.0754, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0825, Reg Loss = 74.1464, Reconstruct Loss = 0.0019, Cls Loss = 0.0732, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0838, Reg Loss = 74.0736, Reconstruct Loss = 0.0016, Cls Loss = 0.0748, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0849, Reg Loss = 74.1108, Reconstruct Loss = 0.0013, Cls Loss = 0.0761, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0842, Reg Loss = 74.1697, Reconstruct Loss = 

100%|██████████| 157/157 [00:03<00:00, 41.78it/s]


Epoch [94/200], Validation Loss: 0.4024, Validation Accuracy: 91.34%



Iteration 0: Loss = 0.0921, Reg Loss = 68.0845, Reconstruct Loss = 0.0000, Cls Loss = 0.0853, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0764, Reg Loss = 74.2231, Reconstruct Loss = 0.0017, Cls Loss = 0.0672, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0825, Reg Loss = 73.6319, Reconstruct Loss = 0.0015, Cls Loss = 0.0737, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0822, Reg Loss = 73.3543, Reconstruct Loss = 0.0013, Cls Loss = 0.0735, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0836, Reg Loss = 73.3459, Reconstruct Loss = 0.0011, Cls Loss = 0.0752, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0869, Reg Loss = 73.4923, Reconstruct Loss = 0.0011, Cls Loss = 0.0784, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0852, Reg Loss = 73.7583, Reconstruct Loss = 0.0011, Cls Loss = 0.0768, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0868, Reg Loss = 73.9536, Reconstruct Loss = 

100%|██████████| 157/157 [00:03<00:00, 40.36it/s]


Epoch [95/200], Validation Loss: 0.4086, Validation Accuracy: 90.81%



Iteration 0: Loss = 0.1036, Reg Loss = 76.6279, Reconstruct Loss = 0.0000, Cls Loss = 0.0959, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0834, Reg Loss = 73.0191, Reconstruct Loss = 0.0000, Cls Loss = 0.0761, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0897, Reg Loss = 72.8141, Reconstruct Loss = 0.0002, Cls Loss = 0.0822, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0890, Reg Loss = 72.3098, Reconstruct Loss = 0.0004, Cls Loss = 0.0813, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0886, Reg Loss = 72.1714, Reconstruct Loss = 0.0004, Cls Loss = 0.0810, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0873, Reg Loss = 72.4295, Reconstruct Loss = 0.0006, Cls Loss = 0.0795, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0873, Reg Loss = 72.5297, Reconstruct Loss = 0.0007, Cls Loss = 0.0794, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0869, Reg Loss = 72.8850, Reconstruct Loss = 

100%|██████████| 157/157 [00:03<00:00, 40.68it/s]


Epoch [96/200], Validation Loss: 0.3994, Validation Accuracy: 90.87%



Iteration 0: Loss = 0.0264, Reg Loss = 76.2689, Reconstruct Loss = 0.0000, Cls Loss = 0.0187, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0792, Reg Loss = 76.0172, Reconstruct Loss = 0.0009, Cls Loss = 0.0707, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0786, Reg Loss = 75.9481, Reconstruct Loss = 0.0015, Cls Loss = 0.0695, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0803, Reg Loss = 75.2608, Reconstruct Loss = 0.0011, Cls Loss = 0.0716, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0830, Reg Loss = 74.5071, Reconstruct Loss = 0.0009, Cls Loss = 0.0746, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0850, Reg Loss = 73.9146, Reconstruct Loss = 0.0009, Cls Loss = 0.0767, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0868, Reg Loss = 73.9628, Reconstruct Loss = 0.0010, Cls Loss = 0.0784, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0867, Reg Loss = 73.7005, Reconstruct Loss = 

100%|██████████| 157/157 [00:03<00:00, 41.00it/s]


Epoch [97/200], Validation Loss: 0.4122, Validation Accuracy: 90.88%



Iteration 0: Loss = 0.1717, Reg Loss = 70.9547, Reconstruct Loss = 0.0000, Cls Loss = 0.1646, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0937, Reg Loss = 73.0752, Reconstruct Loss = 0.0019, Cls Loss = 0.0846, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0947, Reg Loss = 73.2785, Reconstruct Loss = 0.0015, Cls Loss = 0.0859, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0925, Reg Loss = 72.6102, Reconstruct Loss = 0.0015, Cls Loss = 0.0837, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0906, Reg Loss = 72.0960, Reconstruct Loss = 0.0014, Cls Loss = 0.0819, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0895, Reg Loss = 71.9053, Reconstruct Loss = 0.0012, Cls Loss = 0.0811, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0892, Reg Loss = 71.9590, Reconstruct Loss = 0.0011, Cls Loss = 0.0809, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0884, Reg Loss = 72.1806, Reconstruct Loss = 

100%|██████████| 157/157 [00:03<00:00, 40.53it/s]


Epoch [98/200], Validation Loss: 0.3807, Validation Accuracy: 91.43%



Checkpoint saved at epoch 97 with accuracy: 91.43%
Iteration 0: Loss = 0.0283, Reg Loss = 72.0029, Reconstruct Loss = 0.0000, Cls Loss = 0.0211, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0651, Reg Loss = 72.3521, Reconstruct Loss = 0.0014, Cls Loss = 0.0565, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0730, Reg Loss = 72.5035, Reconstruct Loss = 0.0011, Cls Loss = 0.0647, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0762, Reg Loss = 72.9945, Reconstruct Loss = 0.0011, Cls Loss = 0.0678, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0792, Reg Loss = 73.4183, Reconstruct Loss = 0.0013, Cls Loss = 0.0706, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0805, Reg Loss = 73.4996, Reconstruct Loss = 0.0012, Cls Loss = 0.0720, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0803, Reg Loss = 73.6261, Reconstruct Loss = 0.0011, Cls Loss = 0.0719, Learning rate = 1.0000e-03
Iteration 700: Los

100%|██████████| 157/157 [00:03<00:00, 39.99it/s]


Epoch [99/200], Validation Loss: 0.3673, Validation Accuracy: 91.50%



Checkpoint saved at epoch 98 with accuracy: 91.50%
Iteration 0: Loss = 0.0238, Reg Loss = 73.2421, Reconstruct Loss = 0.0000, Cls Loss = 0.0165, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0791, Reg Loss = 73.2847, Reconstruct Loss = 0.0023, Cls Loss = 0.0694, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0820, Reg Loss = 72.8218, Reconstruct Loss = 0.0019, Cls Loss = 0.0729, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0811, Reg Loss = 73.0765, Reconstruct Loss = 0.0017, Cls Loss = 0.0721, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0796, Reg Loss = 72.8580, Reconstruct Loss = 0.0015, Cls Loss = 0.0708, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0808, Reg Loss = 72.9609, Reconstruct Loss = 0.0013, Cls Loss = 0.0723, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0835, Reg Loss = 72.9875, Reconstruct Loss = 0.0012, Cls Loss = 0.0750, Learning rate = 1.0000e-03
Iteration 700: Los

100%|██████████| 157/157 [00:02<00:00, 73.08it/s]


Epoch [100/200], Validation Loss: 0.3903, Validation Accuracy: 91.18%



Iteration 0: Loss = 0.0636, Reg Loss = 76.0131, Reconstruct Loss = 0.0000, Cls Loss = 0.0560, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0854, Reg Loss = 73.0556, Reconstruct Loss = 0.0011, Cls Loss = 0.0770, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0827, Reg Loss = 73.2340, Reconstruct Loss = 0.0008, Cls Loss = 0.0747, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0807, Reg Loss = 73.6995, Reconstruct Loss = 0.0009, Cls Loss = 0.0724, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0816, Reg Loss = 73.5301, Reconstruct Loss = 0.0007, Cls Loss = 0.0736, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0837, Reg Loss = 73.1845, Reconstruct Loss = 0.0009, Cls Loss = 0.0755, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0859, Reg Loss = 73.2974, Reconstruct Loss = 0.0009, Cls Loss = 0.0777, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0853, Reg Loss = 73.3721, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 75.34it/s]


Epoch [101/200], Validation Loss: 0.3740, Validation Accuracy: 91.11%



Iteration 0: Loss = 0.0753, Reg Loss = 68.3673, Reconstruct Loss = 0.0000, Cls Loss = 0.0685, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0959, Reg Loss = 73.0440, Reconstruct Loss = 0.0018, Cls Loss = 0.0868, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0913, Reg Loss = 73.3695, Reconstruct Loss = 0.0013, Cls Loss = 0.0826, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0871, Reg Loss = 73.2085, Reconstruct Loss = 0.0010, Cls Loss = 0.0788, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0855, Reg Loss = 73.0413, Reconstruct Loss = 0.0013, Cls Loss = 0.0769, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0846, Reg Loss = 72.8862, Reconstruct Loss = 0.0015, Cls Loss = 0.0758, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0853, Reg Loss = 72.7501, Reconstruct Loss = 0.0014, Cls Loss = 0.0766, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0835, Reg Loss = 72.7866, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 74.05it/s]


Epoch [102/200], Validation Loss: 0.3928, Validation Accuracy: 90.94%



Iteration 0: Loss = 0.1097, Reg Loss = 73.8192, Reconstruct Loss = 0.0000, Cls Loss = 0.1023, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0828, Reg Loss = 73.4403, Reconstruct Loss = 0.0004, Cls Loss = 0.0751, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0847, Reg Loss = 73.6153, Reconstruct Loss = 0.0009, Cls Loss = 0.0764, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0847, Reg Loss = 73.4574, Reconstruct Loss = 0.0011, Cls Loss = 0.0763, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0830, Reg Loss = 73.3816, Reconstruct Loss = 0.0010, Cls Loss = 0.0746, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0844, Reg Loss = 72.9459, Reconstruct Loss = 0.0012, Cls Loss = 0.0759, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0819, Reg Loss = 72.5908, Reconstruct Loss = 0.0012, Cls Loss = 0.0735, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0810, Reg Loss = 72.6525, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 74.40it/s]


Epoch [103/200], Validation Loss: 0.3982, Validation Accuracy: 91.10%



Iteration 0: Loss = 0.0989, Reg Loss = 72.9621, Reconstruct Loss = 0.0000, Cls Loss = 0.0916, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0815, Reg Loss = 73.0817, Reconstruct Loss = 0.0020, Cls Loss = 0.0722, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0801, Reg Loss = 72.9429, Reconstruct Loss = 0.0016, Cls Loss = 0.0713, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0810, Reg Loss = 72.7109, Reconstruct Loss = 0.0015, Cls Loss = 0.0723, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0841, Reg Loss = 72.5869, Reconstruct Loss = 0.0014, Cls Loss = 0.0754, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0853, Reg Loss = 72.5475, Reconstruct Loss = 0.0013, Cls Loss = 0.0768, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0834, Reg Loss = 72.4509, Reconstruct Loss = 0.0012, Cls Loss = 0.0749, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0838, Reg Loss = 72.5603, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 74.55it/s]


Epoch [104/200], Validation Loss: 0.4058, Validation Accuracy: 90.92%



Iteration 0: Loss = 0.0450, Reg Loss = 69.6520, Reconstruct Loss = 0.0000, Cls Loss = 0.0380, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0873, Reg Loss = 71.0359, Reconstruct Loss = 0.0004, Cls Loss = 0.0797, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0838, Reg Loss = 71.9281, Reconstruct Loss = 0.0008, Cls Loss = 0.0757, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0812, Reg Loss = 72.4904, Reconstruct Loss = 0.0008, Cls Loss = 0.0731, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0801, Reg Loss = 72.8351, Reconstruct Loss = 0.0006, Cls Loss = 0.0722, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0804, Reg Loss = 72.9374, Reconstruct Loss = 0.0006, Cls Loss = 0.0726, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0807, Reg Loss = 73.0887, Reconstruct Loss = 0.0007, Cls Loss = 0.0727, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0801, Reg Loss = 73.0918, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 74.75it/s]


Epoch [105/200], Validation Loss: 0.3800, Validation Accuracy: 91.35%



Iteration 0: Loss = 0.0720, Reg Loss = 70.4144, Reconstruct Loss = 0.0000, Cls Loss = 0.0649, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0741, Reg Loss = 73.0615, Reconstruct Loss = 0.0012, Cls Loss = 0.0655, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0779, Reg Loss = 73.0516, Reconstruct Loss = 0.0017, Cls Loss = 0.0689, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0807, Reg Loss = 72.4769, Reconstruct Loss = 0.0012, Cls Loss = 0.0722, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0811, Reg Loss = 72.2547, Reconstruct Loss = 0.0012, Cls Loss = 0.0727, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0796, Reg Loss = 72.4997, Reconstruct Loss = 0.0012, Cls Loss = 0.0711, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0805, Reg Loss = 72.5664, Reconstruct Loss = 0.0014, Cls Loss = 0.0719, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0799, Reg Loss = 72.3718, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 74.36it/s]


Epoch [106/200], Validation Loss: 0.3986, Validation Accuracy: 90.96%



Iteration 0: Loss = 0.1256, Reg Loss = 69.7680, Reconstruct Loss = 0.0000, Cls Loss = 0.1187, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0949, Reg Loss = 70.9966, Reconstruct Loss = 0.0016, Cls Loss = 0.0863, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0881, Reg Loss = 71.3186, Reconstruct Loss = 0.0017, Cls Loss = 0.0792, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0853, Reg Loss = 71.8005, Reconstruct Loss = 0.0015, Cls Loss = 0.0766, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0856, Reg Loss = 71.7498, Reconstruct Loss = 0.0014, Cls Loss = 0.0770, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0836, Reg Loss = 71.6508, Reconstruct Loss = 0.0013, Cls Loss = 0.0751, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0836, Reg Loss = 71.4165, Reconstruct Loss = 0.0013, Cls Loss = 0.0751, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0838, Reg Loss = 71.2965, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 75.41it/s]


Epoch [107/200], Validation Loss: 0.3793, Validation Accuracy: 91.44%



Iteration 0: Loss = 0.1229, Reg Loss = 66.9192, Reconstruct Loss = 0.0000, Cls Loss = 0.1162, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0881, Reg Loss = 71.6037, Reconstruct Loss = 0.0004, Cls Loss = 0.0806, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0864, Reg Loss = 71.3509, Reconstruct Loss = 0.0004, Cls Loss = 0.0788, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0836, Reg Loss = 71.2024, Reconstruct Loss = 0.0009, Cls Loss = 0.0757, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0851, Reg Loss = 71.2861, Reconstruct Loss = 0.0010, Cls Loss = 0.0770, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0846, Reg Loss = 71.5102, Reconstruct Loss = 0.0010, Cls Loss = 0.0765, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0834, Reg Loss = 71.4616, Reconstruct Loss = 0.0010, Cls Loss = 0.0752, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0838, Reg Loss = 71.5163, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 74.89it/s]


Epoch [108/200], Validation Loss: 0.3927, Validation Accuracy: 91.16%



Iteration 0: Loss = 0.0637, Reg Loss = 77.3560, Reconstruct Loss = 0.0419, Cls Loss = 0.0140, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0858, Reg Loss = 73.9652, Reconstruct Loss = 0.0012, Cls Loss = 0.0773, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0820, Reg Loss = 74.1880, Reconstruct Loss = 0.0014, Cls Loss = 0.0733, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0796, Reg Loss = 74.3777, Reconstruct Loss = 0.0015, Cls Loss = 0.0707, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0817, Reg Loss = 74.1230, Reconstruct Loss = 0.0014, Cls Loss = 0.0729, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0817, Reg Loss = 73.9008, Reconstruct Loss = 0.0012, Cls Loss = 0.0731, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0825, Reg Loss = 73.5897, Reconstruct Loss = 0.0012, Cls Loss = 0.0739, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0837, Reg Loss = 73.4248, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 76.11it/s]


Epoch [109/200], Validation Loss: 0.3764, Validation Accuracy: 91.23%



Iteration 0: Loss = 0.0893, Reg Loss = 75.3310, Reconstruct Loss = 0.0000, Cls Loss = 0.0817, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0753, Reg Loss = 74.1878, Reconstruct Loss = 0.0008, Cls Loss = 0.0671, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0790, Reg Loss = 74.4312, Reconstruct Loss = 0.0021, Cls Loss = 0.0695, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0810, Reg Loss = 74.0781, Reconstruct Loss = 0.0019, Cls Loss = 0.0717, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0796, Reg Loss = 73.5903, Reconstruct Loss = 0.0016, Cls Loss = 0.0706, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0802, Reg Loss = 73.1547, Reconstruct Loss = 0.0015, Cls Loss = 0.0713, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0811, Reg Loss = 73.0311, Reconstruct Loss = 0.0016, Cls Loss = 0.0722, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0812, Reg Loss = 72.9244, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 74.20it/s]


Epoch [110/200], Validation Loss: 0.3950, Validation Accuracy: 91.09%



Iteration 0: Loss = 0.1450, Reg Loss = 71.8037, Reconstruct Loss = 0.0000, Cls Loss = 0.1378, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0836, Reg Loss = 71.9335, Reconstruct Loss = 0.0016, Cls Loss = 0.0748, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0861, Reg Loss = 71.8033, Reconstruct Loss = 0.0014, Cls Loss = 0.0775, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0849, Reg Loss = 71.8019, Reconstruct Loss = 0.0011, Cls Loss = 0.0766, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0829, Reg Loss = 71.9467, Reconstruct Loss = 0.0011, Cls Loss = 0.0745, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0817, Reg Loss = 72.1513, Reconstruct Loss = 0.0012, Cls Loss = 0.0732, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0839, Reg Loss = 72.3813, Reconstruct Loss = 0.0011, Cls Loss = 0.0755, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0828, Reg Loss = 72.4473, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 74.65it/s]


Epoch [111/200], Validation Loss: 0.3818, Validation Accuracy: 91.44%



Iteration 0: Loss = 0.1781, Reg Loss = 71.3084, Reconstruct Loss = 0.0000, Cls Loss = 0.1710, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0820, Reg Loss = 72.1858, Reconstruct Loss = 0.0004, Cls Loss = 0.0744, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0819, Reg Loss = 72.4960, Reconstruct Loss = 0.0012, Cls Loss = 0.0734, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0805, Reg Loss = 72.5861, Reconstruct Loss = 0.0009, Cls Loss = 0.0723, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0807, Reg Loss = 73.1166, Reconstruct Loss = 0.0007, Cls Loss = 0.0727, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0804, Reg Loss = 72.9826, Reconstruct Loss = 0.0006, Cls Loss = 0.0725, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0786, Reg Loss = 72.7971, Reconstruct Loss = 0.0006, Cls Loss = 0.0707, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0800, Reg Loss = 72.6156, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 74.97it/s]


Epoch [112/200], Validation Loss: 0.3869, Validation Accuracy: 91.31%



Iteration 0: Loss = 0.1212, Reg Loss = 72.8057, Reconstruct Loss = 0.0000, Cls Loss = 0.1139, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0870, Reg Loss = 72.1690, Reconstruct Loss = 0.0015, Cls Loss = 0.0784, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0846, Reg Loss = 72.0818, Reconstruct Loss = 0.0017, Cls Loss = 0.0757, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0774, Reg Loss = 72.0156, Reconstruct Loss = 0.0018, Cls Loss = 0.0685, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0771, Reg Loss = 72.3117, Reconstruct Loss = 0.0014, Cls Loss = 0.0685, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0793, Reg Loss = 72.2535, Reconstruct Loss = 0.0012, Cls Loss = 0.0709, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0812, Reg Loss = 72.2617, Reconstruct Loss = 0.0013, Cls Loss = 0.0726, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0800, Reg Loss = 72.2183, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 74.54it/s]


Epoch [113/200], Validation Loss: 0.4012, Validation Accuracy: 91.20%



Iteration 0: Loss = 0.0758, Reg Loss = 67.3648, Reconstruct Loss = 0.0000, Cls Loss = 0.0691, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0851, Reg Loss = 71.1200, Reconstruct Loss = 0.0023, Cls Loss = 0.0756, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0843, Reg Loss = 70.7163, Reconstruct Loss = 0.0012, Cls Loss = 0.0761, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0837, Reg Loss = 70.5579, Reconstruct Loss = 0.0009, Cls Loss = 0.0758, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0817, Reg Loss = 70.6501, Reconstruct Loss = 0.0012, Cls Loss = 0.0735, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0846, Reg Loss = 70.7633, Reconstruct Loss = 0.0013, Cls Loss = 0.0762, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0840, Reg Loss = 71.0415, Reconstruct Loss = 0.0011, Cls Loss = 0.0757, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0825, Reg Loss = 71.1809, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 74.55it/s]


Epoch [114/200], Validation Loss: 0.3980, Validation Accuracy: 91.07%



Iteration 0: Loss = 0.0235, Reg Loss = 71.4670, Reconstruct Loss = 0.0000, Cls Loss = 0.0163, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0869, Reg Loss = 70.5312, Reconstruct Loss = 0.0008, Cls Loss = 0.0791, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0842, Reg Loss = 71.1574, Reconstruct Loss = 0.0011, Cls Loss = 0.0759, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0814, Reg Loss = 71.6727, Reconstruct Loss = 0.0010, Cls Loss = 0.0733, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0813, Reg Loss = 71.6133, Reconstruct Loss = 0.0007, Cls Loss = 0.0734, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0815, Reg Loss = 71.4721, Reconstruct Loss = 0.0007, Cls Loss = 0.0736, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0795, Reg Loss = 71.8117, Reconstruct Loss = 0.0007, Cls Loss = 0.0716, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0800, Reg Loss = 71.9755, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 75.10it/s]


Epoch [115/200], Validation Loss: 0.3817, Validation Accuracy: 91.42%



Iteration 0: Loss = 0.0328, Reg Loss = 74.8081, Reconstruct Loss = 0.0000, Cls Loss = 0.0253, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0836, Reg Loss = 71.7762, Reconstruct Loss = 0.0011, Cls Loss = 0.0753, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0812, Reg Loss = 71.9379, Reconstruct Loss = 0.0020, Cls Loss = 0.0719, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0816, Reg Loss = 71.8284, Reconstruct Loss = 0.0020, Cls Loss = 0.0725, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0812, Reg Loss = 71.5205, Reconstruct Loss = 0.0017, Cls Loss = 0.0723, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0789, Reg Loss = 71.5234, Reconstruct Loss = 0.0014, Cls Loss = 0.0703, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0792, Reg Loss = 71.7305, Reconstruct Loss = 0.0012, Cls Loss = 0.0708, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0793, Reg Loss = 71.7393, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 77.88it/s]


Epoch [116/200], Validation Loss: 0.3935, Validation Accuracy: 91.38%



Iteration 0: Loss = 0.0970, Reg Loss = 74.4565, Reconstruct Loss = 0.0000, Cls Loss = 0.0896, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0805, Reg Loss = 71.4018, Reconstruct Loss = 0.0004, Cls Loss = 0.0730, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0797, Reg Loss = 71.8776, Reconstruct Loss = 0.0004, Cls Loss = 0.0721, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0813, Reg Loss = 71.8304, Reconstruct Loss = 0.0005, Cls Loss = 0.0735, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0790, Reg Loss = 72.0617, Reconstruct Loss = 0.0010, Cls Loss = 0.0708, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0792, Reg Loss = 72.1952, Reconstruct Loss = 0.0011, Cls Loss = 0.0710, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0803, Reg Loss = 72.0848, Reconstruct Loss = 0.0012, Cls Loss = 0.0720, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0791, Reg Loss = 72.2146, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 69.89it/s]


Epoch [117/200], Validation Loss: 0.4227, Validation Accuracy: 90.70%



Iteration 0: Loss = 0.0204, Reg Loss = 71.9897, Reconstruct Loss = 0.0000, Cls Loss = 0.0132, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0850, Reg Loss = 72.6047, Reconstruct Loss = 0.0014, Cls Loss = 0.0763, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0866, Reg Loss = 71.3355, Reconstruct Loss = 0.0009, Cls Loss = 0.0786, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0828, Reg Loss = 70.8583, Reconstruct Loss = 0.0008, Cls Loss = 0.0749, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0812, Reg Loss = 70.2275, Reconstruct Loss = 0.0007, Cls Loss = 0.0735, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0793, Reg Loss = 70.2812, Reconstruct Loss = 0.0008, Cls Loss = 0.0715, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0791, Reg Loss = 70.5533, Reconstruct Loss = 0.0009, Cls Loss = 0.0711, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0804, Reg Loss = 70.5461, Reconstruct Loss =

100%|██████████| 157/157 [00:01<00:00, 83.03it/s]


Epoch [118/200], Validation Loss: 0.3774, Validation Accuracy: 91.30%



Iteration 0: Loss = 0.1122, Reg Loss = 73.4629, Reconstruct Loss = 0.0000, Cls Loss = 0.1049, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0752, Reg Loss = 70.7097, Reconstruct Loss = 0.0015, Cls Loss = 0.0666, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0805, Reg Loss = 71.3170, Reconstruct Loss = 0.0011, Cls Loss = 0.0723, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0824, Reg Loss = 71.3721, Reconstruct Loss = 0.0010, Cls Loss = 0.0742, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0793, Reg Loss = 71.4702, Reconstruct Loss = 0.0009, Cls Loss = 0.0712, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0801, Reg Loss = 71.3848, Reconstruct Loss = 0.0009, Cls Loss = 0.0721, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0804, Reg Loss = 71.0425, Reconstruct Loss = 0.0008, Cls Loss = 0.0725, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0812, Reg Loss = 70.8814, Reconstruct Loss =

100%|██████████| 157/157 [00:01<00:00, 79.08it/s]


Epoch [119/200], Validation Loss: 0.3820, Validation Accuracy: 91.37%



Iteration 0: Loss = 0.0307, Reg Loss = 72.5738, Reconstruct Loss = 0.0000, Cls Loss = 0.0235, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0735, Reg Loss = 72.0100, Reconstruct Loss = 0.0008, Cls Loss = 0.0655, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0813, Reg Loss = 71.6550, Reconstruct Loss = 0.0012, Cls Loss = 0.0730, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0798, Reg Loss = 71.5019, Reconstruct Loss = 0.0012, Cls Loss = 0.0715, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0805, Reg Loss = 70.9145, Reconstruct Loss = 0.0012, Cls Loss = 0.0722, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0789, Reg Loss = 70.8441, Reconstruct Loss = 0.0012, Cls Loss = 0.0706, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0786, Reg Loss = 70.7511, Reconstruct Loss = 0.0010, Cls Loss = 0.0705, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0791, Reg Loss = 70.9814, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 75.50it/s]


Epoch [120/200], Validation Loss: 0.3883, Validation Accuracy: 91.25%



Iteration 0: Loss = 0.0353, Reg Loss = 76.4699, Reconstruct Loss = 0.0000, Cls Loss = 0.0277, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0937, Reg Loss = 70.8667, Reconstruct Loss = 0.0008, Cls Loss = 0.0857, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0839, Reg Loss = 70.9834, Reconstruct Loss = 0.0012, Cls Loss = 0.0755, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0835, Reg Loss = 70.8911, Reconstruct Loss = 0.0011, Cls Loss = 0.0753, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0849, Reg Loss = 70.9123, Reconstruct Loss = 0.0010, Cls Loss = 0.0768, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0841, Reg Loss = 70.9652, Reconstruct Loss = 0.0009, Cls Loss = 0.0760, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0818, Reg Loss = 71.2033, Reconstruct Loss = 0.0010, Cls Loss = 0.0737, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0804, Reg Loss = 71.3203, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 69.23it/s]


Epoch [121/200], Validation Loss: 0.3922, Validation Accuracy: 91.43%



Iteration 0: Loss = 0.0619, Reg Loss = 67.2592, Reconstruct Loss = 0.0000, Cls Loss = 0.0552, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0789, Reg Loss = 70.2802, Reconstruct Loss = 0.0018, Cls Loss = 0.0700, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0730, Reg Loss = 70.4959, Reconstruct Loss = 0.0016, Cls Loss = 0.0643, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0735, Reg Loss = 70.7275, Reconstruct Loss = 0.0013, Cls Loss = 0.0651, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0753, Reg Loss = 70.7699, Reconstruct Loss = 0.0016, Cls Loss = 0.0667, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0768, Reg Loss = 71.1312, Reconstruct Loss = 0.0017, Cls Loss = 0.0680, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0782, Reg Loss = 71.1589, Reconstruct Loss = 0.0016, Cls Loss = 0.0696, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0770, Reg Loss = 71.3527, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 74.21it/s]


Epoch [122/200], Validation Loss: 0.3934, Validation Accuracy: 91.26%



Iteration 0: Loss = 0.0360, Reg Loss = 70.2976, Reconstruct Loss = 0.0000, Cls Loss = 0.0290, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0722, Reg Loss = 71.7557, Reconstruct Loss = 0.0003, Cls Loss = 0.0647, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0733, Reg Loss = 71.3438, Reconstruct Loss = 0.0008, Cls Loss = 0.0653, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0734, Reg Loss = 71.3901, Reconstruct Loss = 0.0010, Cls Loss = 0.0652, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0759, Reg Loss = 71.2429, Reconstruct Loss = 0.0010, Cls Loss = 0.0678, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0763, Reg Loss = 71.4394, Reconstruct Loss = 0.0010, Cls Loss = 0.0682, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0782, Reg Loss = 71.6127, Reconstruct Loss = 0.0010, Cls Loss = 0.0701, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0783, Reg Loss = 71.7380, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 74.05it/s]


Epoch [123/200], Validation Loss: 0.3826, Validation Accuracy: 91.39%



Iteration 0: Loss = 0.1346, Reg Loss = 75.3828, Reconstruct Loss = 0.0000, Cls Loss = 0.1271, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0672, Reg Loss = 73.9450, Reconstruct Loss = 0.0004, Cls Loss = 0.0593, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0709, Reg Loss = 73.3723, Reconstruct Loss = 0.0008, Cls Loss = 0.0627, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0768, Reg Loss = 73.2476, Reconstruct Loss = 0.0013, Cls Loss = 0.0682, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0793, Reg Loss = 72.8542, Reconstruct Loss = 0.0012, Cls Loss = 0.0708, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0798, Reg Loss = 72.7714, Reconstruct Loss = 0.0012, Cls Loss = 0.0713, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0810, Reg Loss = 72.7183, Reconstruct Loss = 0.0012, Cls Loss = 0.0726, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0804, Reg Loss = 72.6471, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 75.11it/s]


Epoch [124/200], Validation Loss: 0.3670, Validation Accuracy: 91.43%



Iteration 0: Loss = 0.0276, Reg Loss = 75.0877, Reconstruct Loss = 0.0000, Cls Loss = 0.0201, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0732, Reg Loss = 71.6006, Reconstruct Loss = 0.0008, Cls Loss = 0.0652, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0771, Reg Loss = 71.8763, Reconstruct Loss = 0.0007, Cls Loss = 0.0692, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0801, Reg Loss = 71.4629, Reconstruct Loss = 0.0009, Cls Loss = 0.0720, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0836, Reg Loss = 71.3343, Reconstruct Loss = 0.0008, Cls Loss = 0.0757, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0811, Reg Loss = 71.3235, Reconstruct Loss = 0.0009, Cls Loss = 0.0732, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0804, Reg Loss = 71.6341, Reconstruct Loss = 0.0012, Cls Loss = 0.0720, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0796, Reg Loss = 71.8339, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 70.30it/s]


Epoch [125/200], Validation Loss: 0.4066, Validation Accuracy: 90.71%



Iteration 0: Loss = 0.0430, Reg Loss = 67.9564, Reconstruct Loss = 0.0000, Cls Loss = 0.0362, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0691, Reg Loss = 73.1346, Reconstruct Loss = 0.0018, Cls Loss = 0.0599, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0778, Reg Loss = 72.1783, Reconstruct Loss = 0.0013, Cls Loss = 0.0693, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0758, Reg Loss = 72.0857, Reconstruct Loss = 0.0013, Cls Loss = 0.0672, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0770, Reg Loss = 71.9233, Reconstruct Loss = 0.0013, Cls Loss = 0.0685, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0773, Reg Loss = 72.0656, Reconstruct Loss = 0.0013, Cls Loss = 0.0688, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0781, Reg Loss = 72.0828, Reconstruct Loss = 0.0011, Cls Loss = 0.0698, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0784, Reg Loss = 72.1541, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 77.53it/s]


Epoch [126/200], Validation Loss: 0.3896, Validation Accuracy: 91.01%



Iteration 0: Loss = 0.2061, Reg Loss = 70.6812, Reconstruct Loss = 0.0000, Cls Loss = 0.1990, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0748, Reg Loss = 70.9080, Reconstruct Loss = 0.0015, Cls Loss = 0.0662, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0744, Reg Loss = 72.0723, Reconstruct Loss = 0.0009, Cls Loss = 0.0663, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0760, Reg Loss = 71.8540, Reconstruct Loss = 0.0011, Cls Loss = 0.0677, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0776, Reg Loss = 72.1655, Reconstruct Loss = 0.0010, Cls Loss = 0.0694, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0765, Reg Loss = 72.0886, Reconstruct Loss = 0.0009, Cls Loss = 0.0684, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0761, Reg Loss = 71.9208, Reconstruct Loss = 0.0011, Cls Loss = 0.0678, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0761, Reg Loss = 71.7438, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 78.36it/s]


Epoch [127/200], Validation Loss: 0.3904, Validation Accuracy: 91.29%



Iteration 0: Loss = 0.1424, Reg Loss = 72.8109, Reconstruct Loss = 0.0000, Cls Loss = 0.1351, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0730, Reg Loss = 71.4779, Reconstruct Loss = 0.0016, Cls Loss = 0.0643, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0826, Reg Loss = 70.9200, Reconstruct Loss = 0.0021, Cls Loss = 0.0734, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0769, Reg Loss = 70.7013, Reconstruct Loss = 0.0016, Cls Loss = 0.0682, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0764, Reg Loss = 70.5812, Reconstruct Loss = 0.0015, Cls Loss = 0.0679, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0773, Reg Loss = 70.6588, Reconstruct Loss = 0.0013, Cls Loss = 0.0690, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0764, Reg Loss = 70.9368, Reconstruct Loss = 0.0014, Cls Loss = 0.0679, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0763, Reg Loss = 70.9922, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 72.68it/s]


Epoch [128/200], Validation Loss: 0.4035, Validation Accuracy: 91.26%



Iteration 0: Loss = 0.0493, Reg Loss = 69.0119, Reconstruct Loss = 0.0000, Cls Loss = 0.0424, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0879, Reg Loss = 73.0897, Reconstruct Loss = 0.0000, Cls Loss = 0.0806, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0904, Reg Loss = 73.3292, Reconstruct Loss = 0.0007, Cls Loss = 0.0823, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0850, Reg Loss = 73.3401, Reconstruct Loss = 0.0005, Cls Loss = 0.0772, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0818, Reg Loss = 73.3454, Reconstruct Loss = 0.0009, Cls Loss = 0.0736, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0802, Reg Loss = 73.2177, Reconstruct Loss = 0.0009, Cls Loss = 0.0720, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0806, Reg Loss = 73.2056, Reconstruct Loss = 0.0012, Cls Loss = 0.0721, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0818, Reg Loss = 72.9448, Reconstruct Loss =

100%|██████████| 157/157 [00:01<00:00, 80.71it/s]


Epoch [129/200], Validation Loss: 0.3905, Validation Accuracy: 91.42%



Iteration 0: Loss = 0.1026, Reg Loss = 72.1563, Reconstruct Loss = 0.0000, Cls Loss = 0.0954, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0718, Reg Loss = 70.0753, Reconstruct Loss = 0.0016, Cls Loss = 0.0632, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0742, Reg Loss = 69.8345, Reconstruct Loss = 0.0014, Cls Loss = 0.0659, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0720, Reg Loss = 69.9637, Reconstruct Loss = 0.0012, Cls Loss = 0.0639, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0725, Reg Loss = 70.5951, Reconstruct Loss = 0.0009, Cls Loss = 0.0645, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0755, Reg Loss = 70.9565, Reconstruct Loss = 0.0009, Cls Loss = 0.0675, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0773, Reg Loss = 71.3066, Reconstruct Loss = 0.0011, Cls Loss = 0.0691, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0765, Reg Loss = 71.2258, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 77.43it/s]


Epoch [130/200], Validation Loss: 0.3813, Validation Accuracy: 91.53%



Checkpoint saved at epoch 129 with accuracy: 91.53%
Iteration 0: Loss = 0.0655, Reg Loss = 70.7170, Reconstruct Loss = 0.0000, Cls Loss = 0.0585, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0716, Reg Loss = 72.0529, Reconstruct Loss = 0.0012, Cls Loss = 0.0632, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0741, Reg Loss = 71.8721, Reconstruct Loss = 0.0010, Cls Loss = 0.0659, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0755, Reg Loss = 71.4828, Reconstruct Loss = 0.0012, Cls Loss = 0.0672, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0754, Reg Loss = 71.0366, Reconstruct Loss = 0.0012, Cls Loss = 0.0671, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0743, Reg Loss = 70.7609, Reconstruct Loss = 0.0011, Cls Loss = 0.0661, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0740, Reg Loss = 70.8625, Reconstruct Loss = 0.0012, Cls Loss = 0.0657, Learning rate = 1.0000e-03
Iteration 700: L

100%|██████████| 157/157 [00:02<00:00, 76.84it/s]


Epoch [131/200], Validation Loss: 0.3982, Validation Accuracy: 91.46%



Iteration 0: Loss = 0.1165, Reg Loss = 71.6473, Reconstruct Loss = 0.0000, Cls Loss = 0.1093, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0786, Reg Loss = 69.3839, Reconstruct Loss = 0.0007, Cls Loss = 0.0709, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0800, Reg Loss = 69.5729, Reconstruct Loss = 0.0005, Cls Loss = 0.0725, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0799, Reg Loss = 70.4159, Reconstruct Loss = 0.0008, Cls Loss = 0.0720, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0768, Reg Loss = 70.5546, Reconstruct Loss = 0.0010, Cls Loss = 0.0687, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0787, Reg Loss = 70.7180, Reconstruct Loss = 0.0013, Cls Loss = 0.0704, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0799, Reg Loss = 70.4863, Reconstruct Loss = 0.0012, Cls Loss = 0.0716, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0805, Reg Loss = 70.3688, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 73.93it/s]


Epoch [132/200], Validation Loss: 0.4015, Validation Accuracy: 90.98%



Iteration 0: Loss = 0.1054, Reg Loss = 67.2334, Reconstruct Loss = 0.0000, Cls Loss = 0.0986, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0808, Reg Loss = 71.1079, Reconstruct Loss = 0.0017, Cls Loss = 0.0720, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0738, Reg Loss = 70.0205, Reconstruct Loss = 0.0011, Cls Loss = 0.0657, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0754, Reg Loss = 70.0714, Reconstruct Loss = 0.0012, Cls Loss = 0.0673, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0766, Reg Loss = 70.1015, Reconstruct Loss = 0.0010, Cls Loss = 0.0686, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0750, Reg Loss = 70.2948, Reconstruct Loss = 0.0012, Cls Loss = 0.0667, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0762, Reg Loss = 70.2622, Reconstruct Loss = 0.0011, Cls Loss = 0.0680, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0767, Reg Loss = 70.2801, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 77.63it/s]


Epoch [133/200], Validation Loss: 0.3915, Validation Accuracy: 91.37%



Iteration 0: Loss = 0.0212, Reg Loss = 65.7450, Reconstruct Loss = 0.0000, Cls Loss = 0.0146, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0809, Reg Loss = 70.2412, Reconstruct Loss = 0.0011, Cls Loss = 0.0728, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0830, Reg Loss = 69.3380, Reconstruct Loss = 0.0009, Cls Loss = 0.0752, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0826, Reg Loss = 68.8349, Reconstruct Loss = 0.0010, Cls Loss = 0.0747, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0801, Reg Loss = 68.9401, Reconstruct Loss = 0.0009, Cls Loss = 0.0722, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0770, Reg Loss = 69.3425, Reconstruct Loss = 0.0011, Cls Loss = 0.0690, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0780, Reg Loss = 69.7944, Reconstruct Loss = 0.0012, Cls Loss = 0.0698, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0776, Reg Loss = 69.9543, Reconstruct Loss =

100%|██████████| 157/157 [00:01<00:00, 80.70it/s]


Epoch [134/200], Validation Loss: 0.3950, Validation Accuracy: 91.43%



Iteration 0: Loss = 0.0404, Reg Loss = 73.6168, Reconstruct Loss = 0.0000, Cls Loss = 0.0330, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0776, Reg Loss = 70.0523, Reconstruct Loss = 0.0011, Cls Loss = 0.0695, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0784, Reg Loss = 70.1543, Reconstruct Loss = 0.0009, Cls Loss = 0.0704, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0766, Reg Loss = 70.3606, Reconstruct Loss = 0.0011, Cls Loss = 0.0684, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0761, Reg Loss = 70.4880, Reconstruct Loss = 0.0012, Cls Loss = 0.0679, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0756, Reg Loss = 70.6769, Reconstruct Loss = 0.0013, Cls Loss = 0.0672, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0743, Reg Loss = 70.6601, Reconstruct Loss = 0.0012, Cls Loss = 0.0660, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0747, Reg Loss = 70.6919, Reconstruct Loss =

100%|██████████| 157/157 [00:01<00:00, 78.56it/s]


Epoch [135/200], Validation Loss: 0.3994, Validation Accuracy: 91.07%



Iteration 0: Loss = 0.0897, Reg Loss = 76.0393, Reconstruct Loss = 0.0000, Cls Loss = 0.0821, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0793, Reg Loss = 71.8938, Reconstruct Loss = 0.0014, Cls Loss = 0.0708, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0817, Reg Loss = 71.2300, Reconstruct Loss = 0.0012, Cls Loss = 0.0734, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0775, Reg Loss = 71.3448, Reconstruct Loss = 0.0011, Cls Loss = 0.0692, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0768, Reg Loss = 71.4459, Reconstruct Loss = 0.0009, Cls Loss = 0.0688, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0769, Reg Loss = 71.1582, Reconstruct Loss = 0.0010, Cls Loss = 0.0688, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0776, Reg Loss = 71.4102, Reconstruct Loss = 0.0013, Cls Loss = 0.0691, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0765, Reg Loss = 71.5039, Reconstruct Loss =

100%|██████████| 157/157 [00:01<00:00, 79.45it/s]


Epoch [136/200], Validation Loss: 0.3947, Validation Accuracy: 91.36%



Iteration 0: Loss = 0.0357, Reg Loss = 76.9978, Reconstruct Loss = 0.0000, Cls Loss = 0.0280, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0790, Reg Loss = 72.5201, Reconstruct Loss = 0.0014, Cls Loss = 0.0704, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0766, Reg Loss = 71.7724, Reconstruct Loss = 0.0014, Cls Loss = 0.0680, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0735, Reg Loss = 71.3957, Reconstruct Loss = 0.0010, Cls Loss = 0.0654, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0742, Reg Loss = 71.1189, Reconstruct Loss = 0.0008, Cls Loss = 0.0662, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0738, Reg Loss = 70.9249, Reconstruct Loss = 0.0009, Cls Loss = 0.0658, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0732, Reg Loss = 70.7589, Reconstruct Loss = 0.0011, Cls Loss = 0.0650, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0731, Reg Loss = 70.6255, Reconstruct Loss =

100%|██████████| 157/157 [00:01<00:00, 79.06it/s]


Epoch [137/200], Validation Loss: 0.4052, Validation Accuracy: 91.24%



Iteration 0: Loss = 0.1078, Reg Loss = 74.9254, Reconstruct Loss = 0.0000, Cls Loss = 0.1003, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0734, Reg Loss = 71.3871, Reconstruct Loss = 0.0016, Cls Loss = 0.0647, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0730, Reg Loss = 70.7272, Reconstruct Loss = 0.0013, Cls Loss = 0.0646, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0746, Reg Loss = 70.5590, Reconstruct Loss = 0.0016, Cls Loss = 0.0659, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0760, Reg Loss = 70.3151, Reconstruct Loss = 0.0013, Cls Loss = 0.0676, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0766, Reg Loss = 70.2117, Reconstruct Loss = 0.0012, Cls Loss = 0.0683, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0768, Reg Loss = 70.2129, Reconstruct Loss = 0.0011, Cls Loss = 0.0687, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0758, Reg Loss = 70.1027, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 77.74it/s]


Epoch [138/200], Validation Loss: 0.3869, Validation Accuracy: 91.22%



Iteration 0: Loss = 0.1600, Reg Loss = 65.9718, Reconstruct Loss = 0.0000, Cls Loss = 0.1534, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0831, Reg Loss = 70.9035, Reconstruct Loss = 0.0022, Cls Loss = 0.0738, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0776, Reg Loss = 70.8463, Reconstruct Loss = 0.0011, Cls Loss = 0.0694, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0772, Reg Loss = 70.9226, Reconstruct Loss = 0.0011, Cls Loss = 0.0690, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0778, Reg Loss = 70.8974, Reconstruct Loss = 0.0008, Cls Loss = 0.0699, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0780, Reg Loss = 71.0493, Reconstruct Loss = 0.0010, Cls Loss = 0.0699, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0757, Reg Loss = 71.3253, Reconstruct Loss = 0.0009, Cls Loss = 0.0677, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0765, Reg Loss = 71.3567, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 74.47it/s]


Epoch [139/200], Validation Loss: 0.4182, Validation Accuracy: 90.98%



Iteration 0: Loss = 0.0191, Reg Loss = 72.9366, Reconstruct Loss = 0.0000, Cls Loss = 0.0118, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0767, Reg Loss = 70.8277, Reconstruct Loss = 0.0017, Cls Loss = 0.0679, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0776, Reg Loss = 70.2152, Reconstruct Loss = 0.0012, Cls Loss = 0.0694, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0747, Reg Loss = 70.4535, Reconstruct Loss = 0.0012, Cls Loss = 0.0664, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0722, Reg Loss = 70.6008, Reconstruct Loss = 0.0010, Cls Loss = 0.0641, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0731, Reg Loss = 70.7723, Reconstruct Loss = 0.0011, Cls Loss = 0.0649, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0727, Reg Loss = 70.9387, Reconstruct Loss = 0.0011, Cls Loss = 0.0645, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0744, Reg Loss = 71.1010, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 72.99it/s]


Epoch [140/200], Validation Loss: 0.4161, Validation Accuracy: 91.17%



Iteration 0: Loss = 0.1051, Reg Loss = 70.6346, Reconstruct Loss = 0.0000, Cls Loss = 0.0981, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0732, Reg Loss = 73.1666, Reconstruct Loss = 0.0007, Cls Loss = 0.0652, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0728, Reg Loss = 73.3086, Reconstruct Loss = 0.0005, Cls Loss = 0.0649, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0739, Reg Loss = 73.1033, Reconstruct Loss = 0.0009, Cls Loss = 0.0657, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0745, Reg Loss = 72.4876, Reconstruct Loss = 0.0012, Cls Loss = 0.0661, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0752, Reg Loss = 72.2425, Reconstruct Loss = 0.0011, Cls Loss = 0.0669, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0750, Reg Loss = 72.3180, Reconstruct Loss = 0.0009, Cls Loss = 0.0668, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0742, Reg Loss = 72.3267, Reconstruct Loss =

100%|██████████| 157/157 [00:03<00:00, 47.95it/s]


Epoch [141/200], Validation Loss: 0.3869, Validation Accuracy: 91.42%



Iteration 0: Loss = 0.0660, Reg Loss = 72.3103, Reconstruct Loss = 0.0000, Cls Loss = 0.0588, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0777, Reg Loss = 71.5167, Reconstruct Loss = 0.0009, Cls Loss = 0.0696, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0764, Reg Loss = 71.3263, Reconstruct Loss = 0.0010, Cls Loss = 0.0683, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0771, Reg Loss = 71.5096, Reconstruct Loss = 0.0013, Cls Loss = 0.0687, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0768, Reg Loss = 71.4141, Reconstruct Loss = 0.0012, Cls Loss = 0.0684, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0769, Reg Loss = 71.3288, Reconstruct Loss = 0.0011, Cls Loss = 0.0687, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0751, Reg Loss = 71.4305, Reconstruct Loss = 0.0010, Cls Loss = 0.0669, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0742, Reg Loss = 71.3864, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 78.23it/s]


Epoch [142/200], Validation Loss: 0.4071, Validation Accuracy: 91.07%



Iteration 0: Loss = 0.0193, Reg Loss = 71.5728, Reconstruct Loss = 0.0000, Cls Loss = 0.0122, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0787, Reg Loss = 71.5136, Reconstruct Loss = 0.0007, Cls Loss = 0.0708, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0756, Reg Loss = 71.4951, Reconstruct Loss = 0.0012, Cls Loss = 0.0673, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0748, Reg Loss = 71.4402, Reconstruct Loss = 0.0012, Cls Loss = 0.0664, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0758, Reg Loss = 71.4292, Reconstruct Loss = 0.0014, Cls Loss = 0.0672, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0758, Reg Loss = 71.4464, Reconstruct Loss = 0.0013, Cls Loss = 0.0674, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0742, Reg Loss = 71.3641, Reconstruct Loss = 0.0011, Cls Loss = 0.0660, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0730, Reg Loss = 71.4745, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 64.19it/s]


Epoch [143/200], Validation Loss: 0.4219, Validation Accuracy: 91.26%



Iteration 0: Loss = 0.0407, Reg Loss = 66.6486, Reconstruct Loss = 0.0000, Cls Loss = 0.0340, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0790, Reg Loss = 70.1817, Reconstruct Loss = 0.0017, Cls Loss = 0.0703, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0760, Reg Loss = 70.1469, Reconstruct Loss = 0.0018, Cls Loss = 0.0673, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0775, Reg Loss = 70.1935, Reconstruct Loss = 0.0012, Cls Loss = 0.0693, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0782, Reg Loss = 70.5576, Reconstruct Loss = 0.0012, Cls Loss = 0.0699, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0785, Reg Loss = 70.6651, Reconstruct Loss = 0.0010, Cls Loss = 0.0704, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0765, Reg Loss = 70.5307, Reconstruct Loss = 0.0010, Cls Loss = 0.0685, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0754, Reg Loss = 70.4874, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 72.29it/s]


Epoch [144/200], Validation Loss: 0.4067, Validation Accuracy: 91.41%



Iteration 0: Loss = 0.0129, Reg Loss = 66.5154, Reconstruct Loss = 0.0000, Cls Loss = 0.0062, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0741, Reg Loss = 70.9480, Reconstruct Loss = 0.0015, Cls Loss = 0.0655, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0751, Reg Loss = 70.8845, Reconstruct Loss = 0.0016, Cls Loss = 0.0664, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0766, Reg Loss = 70.6552, Reconstruct Loss = 0.0016, Cls Loss = 0.0680, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0754, Reg Loss = 70.7144, Reconstruct Loss = 0.0013, Cls Loss = 0.0671, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0747, Reg Loss = 70.7214, Reconstruct Loss = 0.0012, Cls Loss = 0.0664, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0737, Reg Loss = 70.6662, Reconstruct Loss = 0.0010, Cls Loss = 0.0656, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0748, Reg Loss = 70.8790, Reconstruct Loss =

100%|██████████| 157/157 [00:03<00:00, 45.61it/s]


Epoch [145/200], Validation Loss: 0.4132, Validation Accuracy: 91.06%



Iteration 0: Loss = 0.0217, Reg Loss = 74.5151, Reconstruct Loss = 0.0000, Cls Loss = 0.0142, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0616, Reg Loss = 70.2811, Reconstruct Loss = 0.0007, Cls Loss = 0.0539, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0645, Reg Loss = 70.3899, Reconstruct Loss = 0.0005, Cls Loss = 0.0569, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0671, Reg Loss = 70.7098, Reconstruct Loss = 0.0008, Cls Loss = 0.0592, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0677, Reg Loss = 70.8980, Reconstruct Loss = 0.0008, Cls Loss = 0.0598, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0703, Reg Loss = 70.6247, Reconstruct Loss = 0.0008, Cls Loss = 0.0625, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0718, Reg Loss = 70.4387, Reconstruct Loss = 0.0008, Cls Loss = 0.0640, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0728, Reg Loss = 70.4943, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 70.44it/s]


Epoch [146/200], Validation Loss: 0.3777, Validation Accuracy: 91.52%



Iteration 0: Loss = 0.0413, Reg Loss = 68.8749, Reconstruct Loss = 0.0000, Cls Loss = 0.0344, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0752, Reg Loss = 71.1962, Reconstruct Loss = 0.0014, Cls Loss = 0.0667, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0757, Reg Loss = 71.2125, Reconstruct Loss = 0.0009, Cls Loss = 0.0677, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0720, Reg Loss = 70.8246, Reconstruct Loss = 0.0016, Cls Loss = 0.0633, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0715, Reg Loss = 70.5710, Reconstruct Loss = 0.0012, Cls Loss = 0.0633, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0723, Reg Loss = 70.7703, Reconstruct Loss = 0.0015, Cls Loss = 0.0637, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0716, Reg Loss = 70.7371, Reconstruct Loss = 0.0016, Cls Loss = 0.0629, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0720, Reg Loss = 70.6677, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 74.02it/s]


Epoch [147/200], Validation Loss: 0.4040, Validation Accuracy: 91.45%



Iteration 0: Loss = 0.0659, Reg Loss = 68.6984, Reconstruct Loss = 0.0000, Cls Loss = 0.0590, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0626, Reg Loss = 70.4773, Reconstruct Loss = 0.0003, Cls Loss = 0.0552, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0718, Reg Loss = 70.0904, Reconstruct Loss = 0.0005, Cls Loss = 0.0643, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0726, Reg Loss = 70.3783, Reconstruct Loss = 0.0006, Cls Loss = 0.0650, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0728, Reg Loss = 70.4899, Reconstruct Loss = 0.0006, Cls Loss = 0.0652, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0730, Reg Loss = 70.6761, Reconstruct Loss = 0.0010, Cls Loss = 0.0649, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0736, Reg Loss = 70.6686, Reconstruct Loss = 0.0009, Cls Loss = 0.0656, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0754, Reg Loss = 70.3275, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 71.88it/s]


Epoch [148/200], Validation Loss: 0.3980, Validation Accuracy: 91.16%



Iteration 0: Loss = 0.0498, Reg Loss = 67.8064, Reconstruct Loss = 0.0000, Cls Loss = 0.0430, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0700, Reg Loss = 69.6319, Reconstruct Loss = 0.0024, Cls Loss = 0.0606, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0671, Reg Loss = 69.8828, Reconstruct Loss = 0.0016, Cls Loss = 0.0586, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0726, Reg Loss = 69.3529, Reconstruct Loss = 0.0012, Cls Loss = 0.0645, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0732, Reg Loss = 69.4624, Reconstruct Loss = 0.0010, Cls Loss = 0.0653, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0714, Reg Loss = 69.8814, Reconstruct Loss = 0.0011, Cls Loss = 0.0633, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0705, Reg Loss = 70.0092, Reconstruct Loss = 0.0011, Cls Loss = 0.0625, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0699, Reg Loss = 70.4619, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 69.90it/s]


Epoch [149/200], Validation Loss: 0.4151, Validation Accuracy: 91.40%



Iteration 0: Loss = 0.0621, Reg Loss = 71.3390, Reconstruct Loss = 0.0000, Cls Loss = 0.0549, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0891, Reg Loss = 70.0939, Reconstruct Loss = 0.0013, Cls Loss = 0.0808, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0796, Reg Loss = 70.2488, Reconstruct Loss = 0.0010, Cls Loss = 0.0716, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0788, Reg Loss = 70.5102, Reconstruct Loss = 0.0010, Cls Loss = 0.0707, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0779, Reg Loss = 70.6150, Reconstruct Loss = 0.0008, Cls Loss = 0.0700, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0759, Reg Loss = 70.6432, Reconstruct Loss = 0.0010, Cls Loss = 0.0679, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0769, Reg Loss = 70.5332, Reconstruct Loss = 0.0009, Cls Loss = 0.0690, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0754, Reg Loss = 70.2418, Reconstruct Loss =

100%|██████████| 157/157 [00:01<00:00, 78.77it/s]


Epoch [150/200], Validation Loss: 0.3942, Validation Accuracy: 91.44%



Iteration 0: Loss = 0.0193, Reg Loss = 68.1610, Reconstruct Loss = 0.0000, Cls Loss = 0.0125, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0716, Reg Loss = 70.2458, Reconstruct Loss = 0.0007, Cls Loss = 0.0639, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0704, Reg Loss = 70.0857, Reconstruct Loss = 0.0010, Cls Loss = 0.0624, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0699, Reg Loss = 69.5483, Reconstruct Loss = 0.0008, Cls Loss = 0.0621, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0700, Reg Loss = 69.4458, Reconstruct Loss = 0.0008, Cls Loss = 0.0622, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0697, Reg Loss = 69.4717, Reconstruct Loss = 0.0012, Cls Loss = 0.0616, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0693, Reg Loss = 69.5043, Reconstruct Loss = 0.0012, Cls Loss = 0.0612, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0686, Reg Loss = 69.5178, Reconstruct Loss =

100%|██████████| 157/157 [00:03<00:00, 47.66it/s]


Epoch [151/200], Validation Loss: 0.4059, Validation Accuracy: 91.31%



Iteration 0: Loss = 0.0817, Reg Loss = 71.2075, Reconstruct Loss = 0.0000, Cls Loss = 0.0746, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0763, Reg Loss = 71.1791, Reconstruct Loss = 0.0014, Cls Loss = 0.0678, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0715, Reg Loss = 71.1859, Reconstruct Loss = 0.0015, Cls Loss = 0.0629, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0709, Reg Loss = 71.0117, Reconstruct Loss = 0.0016, Cls Loss = 0.0622, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0695, Reg Loss = 70.9598, Reconstruct Loss = 0.0015, Cls Loss = 0.0610, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0695, Reg Loss = 70.4927, Reconstruct Loss = 0.0013, Cls Loss = 0.0611, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0728, Reg Loss = 70.2507, Reconstruct Loss = 0.0012, Cls Loss = 0.0645, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0727, Reg Loss = 70.2468, Reconstruct Loss =

100%|██████████| 157/157 [00:01<00:00, 81.14it/s]


Epoch [152/200], Validation Loss: 0.3967, Validation Accuracy: 91.29%



Iteration 0: Loss = 0.0276, Reg Loss = 70.6777, Reconstruct Loss = 0.0000, Cls Loss = 0.0205, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0763, Reg Loss = 71.2534, Reconstruct Loss = 0.0014, Cls Loss = 0.0678, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0745, Reg Loss = 70.9330, Reconstruct Loss = 0.0011, Cls Loss = 0.0664, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0731, Reg Loss = 71.2198, Reconstruct Loss = 0.0011, Cls Loss = 0.0649, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0744, Reg Loss = 71.2955, Reconstruct Loss = 0.0011, Cls Loss = 0.0662, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0765, Reg Loss = 70.8083, Reconstruct Loss = 0.0011, Cls Loss = 0.0683, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0759, Reg Loss = 70.6088, Reconstruct Loss = 0.0010, Cls Loss = 0.0679, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0740, Reg Loss = 70.5992, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 72.49it/s]


Epoch [153/200], Validation Loss: 0.4001, Validation Accuracy: 91.35%



Iteration 0: Loss = 0.0379, Reg Loss = 67.1376, Reconstruct Loss = 0.0000, Cls Loss = 0.0312, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0703, Reg Loss = 70.7303, Reconstruct Loss = 0.0018, Cls Loss = 0.0615, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0697, Reg Loss = 70.1246, Reconstruct Loss = 0.0014, Cls Loss = 0.0612, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0731, Reg Loss = 70.7140, Reconstruct Loss = 0.0014, Cls Loss = 0.0646, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0713, Reg Loss = 71.0801, Reconstruct Loss = 0.0014, Cls Loss = 0.0629, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0720, Reg Loss = 71.4980, Reconstruct Loss = 0.0012, Cls Loss = 0.0636, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0714, Reg Loss = 71.4292, Reconstruct Loss = 0.0011, Cls Loss = 0.0632, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0713, Reg Loss = 71.3377, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 71.75it/s]


Epoch [154/200], Validation Loss: 0.3945, Validation Accuracy: 91.19%



Iteration 0: Loss = 0.1101, Reg Loss = 73.8714, Reconstruct Loss = 0.0000, Cls Loss = 0.1028, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0738, Reg Loss = 71.4786, Reconstruct Loss = 0.0010, Cls Loss = 0.0656, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0767, Reg Loss = 71.1609, Reconstruct Loss = 0.0010, Cls Loss = 0.0685, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0737, Reg Loss = 70.9856, Reconstruct Loss = 0.0011, Cls Loss = 0.0655, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0758, Reg Loss = 70.8217, Reconstruct Loss = 0.0012, Cls Loss = 0.0676, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0749, Reg Loss = 70.9435, Reconstruct Loss = 0.0012, Cls Loss = 0.0667, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0747, Reg Loss = 70.9220, Reconstruct Loss = 0.0013, Cls Loss = 0.0663, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0736, Reg Loss = 70.6512, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 74.93it/s]


Epoch [155/200], Validation Loss: 0.4405, Validation Accuracy: 91.08%



Iteration 0: Loss = 0.1420, Reg Loss = 62.4374, Reconstruct Loss = 0.0000, Cls Loss = 0.1357, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0718, Reg Loss = 70.2054, Reconstruct Loss = 0.0016, Cls Loss = 0.0631, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0712, Reg Loss = 70.1218, Reconstruct Loss = 0.0016, Cls Loss = 0.0625, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0690, Reg Loss = 69.8282, Reconstruct Loss = 0.0014, Cls Loss = 0.0606, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0693, Reg Loss = 69.9431, Reconstruct Loss = 0.0013, Cls Loss = 0.0610, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0697, Reg Loss = 69.9570, Reconstruct Loss = 0.0011, Cls Loss = 0.0616, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0710, Reg Loss = 69.8840, Reconstruct Loss = 0.0012, Cls Loss = 0.0628, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0707, Reg Loss = 70.0452, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 59.89it/s]


Epoch [156/200], Validation Loss: 0.4112, Validation Accuracy: 91.15%



Iteration 0: Loss = 0.0257, Reg Loss = 67.0291, Reconstruct Loss = 0.0000, Cls Loss = 0.0190, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0781, Reg Loss = 70.8808, Reconstruct Loss = 0.0010, Cls Loss = 0.0700, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0790, Reg Loss = 70.1134, Reconstruct Loss = 0.0008, Cls Loss = 0.0712, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0777, Reg Loss = 69.4404, Reconstruct Loss = 0.0010, Cls Loss = 0.0698, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0755, Reg Loss = 69.2281, Reconstruct Loss = 0.0007, Cls Loss = 0.0678, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0746, Reg Loss = 69.5211, Reconstruct Loss = 0.0007, Cls Loss = 0.0670, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0729, Reg Loss = 69.7666, Reconstruct Loss = 0.0009, Cls Loss = 0.0651, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0724, Reg Loss = 69.9624, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 71.97it/s]


Epoch [157/200], Validation Loss: 0.3924, Validation Accuracy: 91.10%



Iteration 0: Loss = 0.1191, Reg Loss = 68.5223, Reconstruct Loss = 0.0000, Cls Loss = 0.1122, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0656, Reg Loss = 70.4445, Reconstruct Loss = 0.0010, Cls Loss = 0.0575, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0732, Reg Loss = 69.7839, Reconstruct Loss = 0.0010, Cls Loss = 0.0652, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0748, Reg Loss = 69.7573, Reconstruct Loss = 0.0012, Cls Loss = 0.0666, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0745, Reg Loss = 70.0150, Reconstruct Loss = 0.0012, Cls Loss = 0.0663, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0761, Reg Loss = 69.7651, Reconstruct Loss = 0.0012, Cls Loss = 0.0679, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0758, Reg Loss = 69.5803, Reconstruct Loss = 0.0012, Cls Loss = 0.0677, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0755, Reg Loss = 69.4551, Reconstruct Loss =

100%|██████████| 157/157 [00:01<00:00, 82.30it/s]


Epoch [158/200], Validation Loss: 0.4253, Validation Accuracy: 91.00%



Iteration 0: Loss = 0.0096, Reg Loss = 70.4000, Reconstruct Loss = 0.0000, Cls Loss = 0.0026, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0782, Reg Loss = 69.5592, Reconstruct Loss = 0.0014, Cls Loss = 0.0699, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0699, Reg Loss = 69.9497, Reconstruct Loss = 0.0016, Cls Loss = 0.0613, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0743, Reg Loss = 69.6823, Reconstruct Loss = 0.0013, Cls Loss = 0.0661, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0711, Reg Loss = 69.6955, Reconstruct Loss = 0.0010, Cls Loss = 0.0631, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0719, Reg Loss = 69.7917, Reconstruct Loss = 0.0008, Cls Loss = 0.0641, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0733, Reg Loss = 69.9582, Reconstruct Loss = 0.0008, Cls Loss = 0.0655, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0725, Reg Loss = 69.9395, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 72.94it/s]


Epoch [159/200], Validation Loss: 0.3937, Validation Accuracy: 91.44%



Iteration 0: Loss = 0.0374, Reg Loss = 74.0611, Reconstruct Loss = 0.0000, Cls Loss = 0.0300, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0644, Reg Loss = 71.4032, Reconstruct Loss = 0.0003, Cls Loss = 0.0570, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0701, Reg Loss = 70.9283, Reconstruct Loss = 0.0010, Cls Loss = 0.0620, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0691, Reg Loss = 70.3482, Reconstruct Loss = 0.0011, Cls Loss = 0.0610, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0672, Reg Loss = 70.4900, Reconstruct Loss = 0.0012, Cls Loss = 0.0589, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0693, Reg Loss = 70.6111, Reconstruct Loss = 0.0012, Cls Loss = 0.0610, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0705, Reg Loss = 70.2944, Reconstruct Loss = 0.0011, Cls Loss = 0.0624, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0699, Reg Loss = 70.0991, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 72.43it/s]


Epoch [160/200], Validation Loss: 0.3825, Validation Accuracy: 91.52%



Iteration 0: Loss = 0.0221, Reg Loss = 68.9218, Reconstruct Loss = 0.0000, Cls Loss = 0.0152, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0710, Reg Loss = 69.9968, Reconstruct Loss = 0.0014, Cls Loss = 0.0626, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0688, Reg Loss = 69.3758, Reconstruct Loss = 0.0010, Cls Loss = 0.0608, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0729, Reg Loss = 68.8992, Reconstruct Loss = 0.0012, Cls Loss = 0.0648, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0708, Reg Loss = 68.9442, Reconstruct Loss = 0.0010, Cls Loss = 0.0630, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0717, Reg Loss = 69.0728, Reconstruct Loss = 0.0009, Cls Loss = 0.0639, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0722, Reg Loss = 69.0853, Reconstruct Loss = 0.0010, Cls Loss = 0.0643, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0711, Reg Loss = 69.2951, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 71.79it/s]


Epoch [161/200], Validation Loss: 0.4012, Validation Accuracy: 91.31%



Iteration 0: Loss = 0.0604, Reg Loss = 73.0496, Reconstruct Loss = 0.0000, Cls Loss = 0.0531, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0744, Reg Loss = 69.0517, Reconstruct Loss = 0.0010, Cls Loss = 0.0665, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0692, Reg Loss = 69.5783, Reconstruct Loss = 0.0012, Cls Loss = 0.0610, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0703, Reg Loss = 69.2875, Reconstruct Loss = 0.0014, Cls Loss = 0.0620, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0708, Reg Loss = 69.2384, Reconstruct Loss = 0.0013, Cls Loss = 0.0626, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0705, Reg Loss = 69.0559, Reconstruct Loss = 0.0012, Cls Loss = 0.0625, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0701, Reg Loss = 69.1886, Reconstruct Loss = 0.0013, Cls Loss = 0.0619, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0704, Reg Loss = 69.1625, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 70.73it/s]


Epoch [162/200], Validation Loss: 0.4234, Validation Accuracy: 90.78%



Iteration 0: Loss = 0.1780, Reg Loss = 71.5960, Reconstruct Loss = 0.0000, Cls Loss = 0.1708, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0706, Reg Loss = 70.7162, Reconstruct Loss = 0.0007, Cls Loss = 0.0628, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0751, Reg Loss = 70.2612, Reconstruct Loss = 0.0008, Cls Loss = 0.0673, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0722, Reg Loss = 70.0967, Reconstruct Loss = 0.0009, Cls Loss = 0.0643, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0720, Reg Loss = 69.8745, Reconstruct Loss = 0.0008, Cls Loss = 0.0642, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0713, Reg Loss = 69.7795, Reconstruct Loss = 0.0009, Cls Loss = 0.0635, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0726, Reg Loss = 69.6271, Reconstruct Loss = 0.0008, Cls Loss = 0.0648, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0728, Reg Loss = 69.2774, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 72.27it/s]


Epoch [163/200], Validation Loss: 0.4238, Validation Accuracy: 91.11%



Iteration 0: Loss = 0.0180, Reg Loss = 65.4066, Reconstruct Loss = 0.0000, Cls Loss = 0.0115, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0678, Reg Loss = 66.9422, Reconstruct Loss = 0.0006, Cls Loss = 0.0605, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0694, Reg Loss = 66.4906, Reconstruct Loss = 0.0010, Cls Loss = 0.0618, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0718, Reg Loss = 67.3597, Reconstruct Loss = 0.0008, Cls Loss = 0.0643, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0698, Reg Loss = 67.9404, Reconstruct Loss = 0.0009, Cls Loss = 0.0621, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0693, Reg Loss = 68.2195, Reconstruct Loss = 0.0009, Cls Loss = 0.0616, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0717, Reg Loss = 68.4165, Reconstruct Loss = 0.0009, Cls Loss = 0.0639, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0716, Reg Loss = 68.4859, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 73.70it/s]


Epoch [164/200], Validation Loss: 0.4050, Validation Accuracy: 91.30%



Iteration 0: Loss = 0.0460, Reg Loss = 64.6197, Reconstruct Loss = 0.0000, Cls Loss = 0.0396, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0658, Reg Loss = 69.6155, Reconstruct Loss = 0.0004, Cls Loss = 0.0585, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0706, Reg Loss = 69.4840, Reconstruct Loss = 0.0007, Cls Loss = 0.0630, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0697, Reg Loss = 69.5247, Reconstruct Loss = 0.0004, Cls Loss = 0.0623, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0727, Reg Loss = 69.6416, Reconstruct Loss = 0.0008, Cls Loss = 0.0649, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0733, Reg Loss = 69.3793, Reconstruct Loss = 0.0006, Cls Loss = 0.0657, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0728, Reg Loss = 69.4141, Reconstruct Loss = 0.0007, Cls Loss = 0.0652, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0718, Reg Loss = 69.5048, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 76.18it/s]


Epoch [165/200], Validation Loss: 0.4007, Validation Accuracy: 91.02%



Iteration 0: Loss = 0.0394, Reg Loss = 67.9561, Reconstruct Loss = 0.0000, Cls Loss = 0.0326, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0803, Reg Loss = 71.3713, Reconstruct Loss = 0.0016, Cls Loss = 0.0716, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0724, Reg Loss = 71.0912, Reconstruct Loss = 0.0016, Cls Loss = 0.0637, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0731, Reg Loss = 70.9977, Reconstruct Loss = 0.0014, Cls Loss = 0.0646, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0696, Reg Loss = 71.0670, Reconstruct Loss = 0.0014, Cls Loss = 0.0610, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0688, Reg Loss = 71.0315, Reconstruct Loss = 0.0014, Cls Loss = 0.0604, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0701, Reg Loss = 70.9750, Reconstruct Loss = 0.0013, Cls Loss = 0.0616, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0699, Reg Loss = 70.8509, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 70.55it/s]


Epoch [166/200], Validation Loss: 0.3991, Validation Accuracy: 91.37%



Iteration 0: Loss = 0.1776, Reg Loss = 70.2394, Reconstruct Loss = 0.0000, Cls Loss = 0.1706, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0740, Reg Loss = 69.8212, Reconstruct Loss = 0.0023, Cls Loss = 0.0646, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0731, Reg Loss = 70.6532, Reconstruct Loss = 0.0018, Cls Loss = 0.0642, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0727, Reg Loss = 70.6148, Reconstruct Loss = 0.0019, Cls Loss = 0.0637, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0710, Reg Loss = 70.5229, Reconstruct Loss = 0.0017, Cls Loss = 0.0623, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0701, Reg Loss = 70.2951, Reconstruct Loss = 0.0016, Cls Loss = 0.0615, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0712, Reg Loss = 70.0463, Reconstruct Loss = 0.0016, Cls Loss = 0.0626, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0714, Reg Loss = 69.8201, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 76.47it/s]


Epoch [167/200], Validation Loss: 0.4069, Validation Accuracy: 91.48%



Iteration 0: Loss = 0.0331, Reg Loss = 66.1433, Reconstruct Loss = 0.0000, Cls Loss = 0.0265, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0666, Reg Loss = 70.5963, Reconstruct Loss = 0.0016, Cls Loss = 0.0579, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0628, Reg Loss = 69.9050, Reconstruct Loss = 0.0013, Cls Loss = 0.0546, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0662, Reg Loss = 69.2474, Reconstruct Loss = 0.0010, Cls Loss = 0.0583, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0671, Reg Loss = 68.9333, Reconstruct Loss = 0.0010, Cls Loss = 0.0592, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0699, Reg Loss = 68.9475, Reconstruct Loss = 0.0011, Cls Loss = 0.0620, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0704, Reg Loss = 69.0292, Reconstruct Loss = 0.0012, Cls Loss = 0.0622, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0717, Reg Loss = 69.1807, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 73.86it/s]


Epoch [168/200], Validation Loss: 0.3944, Validation Accuracy: 91.30%



Iteration 0: Loss = 0.1575, Reg Loss = 67.0741, Reconstruct Loss = 0.0000, Cls Loss = 0.1508, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0708, Reg Loss = 68.7355, Reconstruct Loss = 0.0010, Cls Loss = 0.0629, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0665, Reg Loss = 69.7295, Reconstruct Loss = 0.0013, Cls Loss = 0.0582, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0699, Reg Loss = 70.3251, Reconstruct Loss = 0.0014, Cls Loss = 0.0615, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0692, Reg Loss = 70.6274, Reconstruct Loss = 0.0013, Cls Loss = 0.0608, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0702, Reg Loss = 70.6762, Reconstruct Loss = 0.0012, Cls Loss = 0.0619, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0713, Reg Loss = 70.6503, Reconstruct Loss = 0.0012, Cls Loss = 0.0630, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0710, Reg Loss = 70.7213, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 71.39it/s]


Epoch [169/200], Validation Loss: 0.4070, Validation Accuracy: 91.39%



Iteration 0: Loss = 0.0234, Reg Loss = 68.7465, Reconstruct Loss = 0.0000, Cls Loss = 0.0165, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0753, Reg Loss = 69.9484, Reconstruct Loss = 0.0012, Cls Loss = 0.0671, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0708, Reg Loss = 70.0048, Reconstruct Loss = 0.0015, Cls Loss = 0.0622, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0668, Reg Loss = 70.2859, Reconstruct Loss = 0.0011, Cls Loss = 0.0586, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0696, Reg Loss = 70.4960, Reconstruct Loss = 0.0010, Cls Loss = 0.0615, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0698, Reg Loss = 70.1600, Reconstruct Loss = 0.0011, Cls Loss = 0.0617, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0693, Reg Loss = 70.0466, Reconstruct Loss = 0.0011, Cls Loss = 0.0611, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0697, Reg Loss = 69.8044, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 77.63it/s]


Epoch [170/200], Validation Loss: 0.4369, Validation Accuracy: 90.71%



Iteration 0: Loss = 0.0458, Reg Loss = 65.7238, Reconstruct Loss = 0.0000, Cls Loss = 0.0393, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0679, Reg Loss = 68.5889, Reconstruct Loss = 0.0009, Cls Loss = 0.0601, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0694, Reg Loss = 69.1899, Reconstruct Loss = 0.0008, Cls Loss = 0.0617, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0712, Reg Loss = 68.9635, Reconstruct Loss = 0.0006, Cls Loss = 0.0636, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0716, Reg Loss = 68.7113, Reconstruct Loss = 0.0006, Cls Loss = 0.0641, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0709, Reg Loss = 69.1346, Reconstruct Loss = 0.0006, Cls Loss = 0.0634, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0709, Reg Loss = 69.1518, Reconstruct Loss = 0.0009, Cls Loss = 0.0631, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0708, Reg Loss = 69.1455, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 78.33it/s]


Epoch [171/200], Validation Loss: 0.4021, Validation Accuracy: 90.91%



Iteration 0: Loss = 0.0547, Reg Loss = 66.8586, Reconstruct Loss = 0.0000, Cls Loss = 0.0480, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0740, Reg Loss = 69.2578, Reconstruct Loss = 0.0000, Cls Loss = 0.0670, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0701, Reg Loss = 69.6097, Reconstruct Loss = 0.0006, Cls Loss = 0.0625, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0686, Reg Loss = 69.3040, Reconstruct Loss = 0.0006, Cls Loss = 0.0610, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0682, Reg Loss = 69.2865, Reconstruct Loss = 0.0008, Cls Loss = 0.0604, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0675, Reg Loss = 69.5503, Reconstruct Loss = 0.0007, Cls Loss = 0.0599, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0671, Reg Loss = 69.9232, Reconstruct Loss = 0.0007, Cls Loss = 0.0594, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0673, Reg Loss = 70.2154, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 76.63it/s]


Epoch [172/200], Validation Loss: 0.3864, Validation Accuracy: 91.41%



Iteration 0: Loss = 0.1169, Reg Loss = 69.8864, Reconstruct Loss = 0.0000, Cls Loss = 0.1099, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0669, Reg Loss = 70.2694, Reconstruct Loss = 0.0009, Cls Loss = 0.0590, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0693, Reg Loss = 69.9957, Reconstruct Loss = 0.0006, Cls Loss = 0.0616, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0703, Reg Loss = 70.1625, Reconstruct Loss = 0.0007, Cls Loss = 0.0625, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0707, Reg Loss = 69.9296, Reconstruct Loss = 0.0008, Cls Loss = 0.0629, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0697, Reg Loss = 69.8290, Reconstruct Loss = 0.0010, Cls Loss = 0.0617, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0707, Reg Loss = 69.9123, Reconstruct Loss = 0.0011, Cls Loss = 0.0626, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0712, Reg Loss = 69.6139, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 72.51it/s]


Epoch [173/200], Validation Loss: 0.3979, Validation Accuracy: 91.38%



Iteration 0: Loss = 0.0786, Reg Loss = 66.3308, Reconstruct Loss = 0.0000, Cls Loss = 0.0720, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0710, Reg Loss = 67.4903, Reconstruct Loss = 0.0013, Cls Loss = 0.0630, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0684, Reg Loss = 67.4180, Reconstruct Loss = 0.0010, Cls Loss = 0.0606, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0666, Reg Loss = 67.6871, Reconstruct Loss = 0.0008, Cls Loss = 0.0591, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0697, Reg Loss = 68.3349, Reconstruct Loss = 0.0007, Cls Loss = 0.0621, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0711, Reg Loss = 68.8907, Reconstruct Loss = 0.0010, Cls Loss = 0.0633, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0699, Reg Loss = 69.4414, Reconstruct Loss = 0.0008, Cls Loss = 0.0621, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0713, Reg Loss = 69.3423, Reconstruct Loss =

100%|██████████| 157/157 [00:03<00:00, 47.61it/s]


Epoch [174/200], Validation Loss: 0.4097, Validation Accuracy: 91.22%



Iteration 0: Loss = 0.0193, Reg Loss = 69.3957, Reconstruct Loss = 0.0000, Cls Loss = 0.0124, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0688, Reg Loss = 69.4248, Reconstruct Loss = 0.0006, Cls Loss = 0.0612, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0667, Reg Loss = 68.9087, Reconstruct Loss = 0.0005, Cls Loss = 0.0593, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0682, Reg Loss = 69.4107, Reconstruct Loss = 0.0004, Cls Loss = 0.0608, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0698, Reg Loss = 69.3293, Reconstruct Loss = 0.0004, Cls Loss = 0.0625, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0702, Reg Loss = 69.2386, Reconstruct Loss = 0.0004, Cls Loss = 0.0629, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0680, Reg Loss = 69.1403, Reconstruct Loss = 0.0004, Cls Loss = 0.0607, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0685, Reg Loss = 68.9504, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 76.72it/s]


Epoch [175/200], Validation Loss: 0.4188, Validation Accuracy: 91.10%



Iteration 0: Loss = 0.1009, Reg Loss = 65.2450, Reconstruct Loss = 0.0000, Cls Loss = 0.0944, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0616, Reg Loss = 68.3406, Reconstruct Loss = 0.0010, Cls Loss = 0.0538, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0632, Reg Loss = 67.9696, Reconstruct Loss = 0.0010, Cls Loss = 0.0555, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0650, Reg Loss = 68.0664, Reconstruct Loss = 0.0009, Cls Loss = 0.0573, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0647, Reg Loss = 68.2489, Reconstruct Loss = 0.0008, Cls Loss = 0.0570, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0674, Reg Loss = 68.3292, Reconstruct Loss = 0.0009, Cls Loss = 0.0596, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0681, Reg Loss = 68.5195, Reconstruct Loss = 0.0010, Cls Loss = 0.0603, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0681, Reg Loss = 68.6206, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 77.37it/s]


Epoch [176/200], Validation Loss: 0.4094, Validation Accuracy: 91.09%



Iteration 0: Loss = 0.1186, Reg Loss = 66.0294, Reconstruct Loss = 0.0000, Cls Loss = 0.1120, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0641, Reg Loss = 69.4106, Reconstruct Loss = 0.0009, Cls Loss = 0.0562, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0624, Reg Loss = 69.1745, Reconstruct Loss = 0.0005, Cls Loss = 0.0550, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0667, Reg Loss = 69.4509, Reconstruct Loss = 0.0005, Cls Loss = 0.0593, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0695, Reg Loss = 69.4791, Reconstruct Loss = 0.0006, Cls Loss = 0.0620, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0706, Reg Loss = 69.2186, Reconstruct Loss = 0.0006, Cls Loss = 0.0630, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0693, Reg Loss = 69.3120, Reconstruct Loss = 0.0007, Cls Loss = 0.0616, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0688, Reg Loss = 69.4227, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 70.63it/s]


Epoch [177/200], Validation Loss: 0.3873, Validation Accuracy: 91.27%



Iteration 0: Loss = 0.0922, Reg Loss = 66.6228, Reconstruct Loss = 0.0000, Cls Loss = 0.0856, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0589, Reg Loss = 71.2767, Reconstruct Loss = 0.0000, Cls Loss = 0.0518, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0679, Reg Loss = 72.0699, Reconstruct Loss = 0.0005, Cls Loss = 0.0602, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0654, Reg Loss = 72.2861, Reconstruct Loss = 0.0011, Cls Loss = 0.0571, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0672, Reg Loss = 72.2403, Reconstruct Loss = 0.0012, Cls Loss = 0.0588, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0673, Reg Loss = 71.9638, Reconstruct Loss = 0.0011, Cls Loss = 0.0590, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0681, Reg Loss = 71.4450, Reconstruct Loss = 0.0011, Cls Loss = 0.0599, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0685, Reg Loss = 70.8856, Reconstruct Loss =

100%|██████████| 157/157 [00:01<00:00, 79.40it/s]


Epoch [178/200], Validation Loss: 0.4075, Validation Accuracy: 91.22%



Iteration 0: Loss = 0.0180, Reg Loss = 73.7372, Reconstruct Loss = 0.0000, Cls Loss = 0.0106, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0627, Reg Loss = 70.6886, Reconstruct Loss = 0.0011, Cls Loss = 0.0545, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0681, Reg Loss = 69.6023, Reconstruct Loss = 0.0013, Cls Loss = 0.0598, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0693, Reg Loss = 69.1522, Reconstruct Loss = 0.0011, Cls Loss = 0.0613, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0688, Reg Loss = 68.9154, Reconstruct Loss = 0.0012, Cls Loss = 0.0607, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0705, Reg Loss = 68.8953, Reconstruct Loss = 0.0012, Cls Loss = 0.0624, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0701, Reg Loss = 68.7501, Reconstruct Loss = 0.0011, Cls Loss = 0.0621, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0694, Reg Loss = 68.6707, Reconstruct Loss =

100%|██████████| 157/157 [00:01<00:00, 79.08it/s]


Epoch [179/200], Validation Loss: 0.4048, Validation Accuracy: 91.37%



Iteration 0: Loss = 0.0449, Reg Loss = 72.3539, Reconstruct Loss = 0.0000, Cls Loss = 0.0377, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0778, Reg Loss = 68.0240, Reconstruct Loss = 0.0006, Cls Loss = 0.0704, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0703, Reg Loss = 67.8322, Reconstruct Loss = 0.0006, Cls Loss = 0.0629, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0694, Reg Loss = 67.7988, Reconstruct Loss = 0.0006, Cls Loss = 0.0620, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0695, Reg Loss = 67.9605, Reconstruct Loss = 0.0008, Cls Loss = 0.0619, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0703, Reg Loss = 67.9826, Reconstruct Loss = 0.0008, Cls Loss = 0.0627, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0704, Reg Loss = 68.0299, Reconstruct Loss = 0.0008, Cls Loss = 0.0628, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0705, Reg Loss = 67.9291, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 77.71it/s]


Epoch [180/200], Validation Loss: 0.4157, Validation Accuracy: 91.28%



Iteration 0: Loss = 0.0241, Reg Loss = 64.0244, Reconstruct Loss = 0.0000, Cls Loss = 0.0177, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0935, Reg Loss = 67.0828, Reconstruct Loss = 0.0018, Cls Loss = 0.0850, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0783, Reg Loss = 67.8738, Reconstruct Loss = 0.0014, Cls Loss = 0.0701, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0780, Reg Loss = 68.5960, Reconstruct Loss = 0.0010, Cls Loss = 0.0702, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0750, Reg Loss = 69.1415, Reconstruct Loss = 0.0008, Cls Loss = 0.0672, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0747, Reg Loss = 69.4178, Reconstruct Loss = 0.0008, Cls Loss = 0.0670, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0729, Reg Loss = 69.5586, Reconstruct Loss = 0.0009, Cls Loss = 0.0651, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0723, Reg Loss = 69.4067, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 77.68it/s]


Epoch [181/200], Validation Loss: 0.4176, Validation Accuracy: 91.00%



Iteration 0: Loss = 0.0156, Reg Loss = 74.5611, Reconstruct Loss = 0.0000, Cls Loss = 0.0081, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0681, Reg Loss = 70.0098, Reconstruct Loss = 0.0010, Cls Loss = 0.0601, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0672, Reg Loss = 69.7619, Reconstruct Loss = 0.0010, Cls Loss = 0.0593, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0662, Reg Loss = 69.6698, Reconstruct Loss = 0.0008, Cls Loss = 0.0584, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0663, Reg Loss = 69.9691, Reconstruct Loss = 0.0009, Cls Loss = 0.0585, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0678, Reg Loss = 69.9387, Reconstruct Loss = 0.0011, Cls Loss = 0.0596, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0672, Reg Loss = 69.9713, Reconstruct Loss = 0.0012, Cls Loss = 0.0590, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0688, Reg Loss = 70.0322, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 71.94it/s]


Epoch [182/200], Validation Loss: 0.4259, Validation Accuracy: 91.25%



Iteration 0: Loss = 0.0557, Reg Loss = 69.3245, Reconstruct Loss = 0.0000, Cls Loss = 0.0488, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0734, Reg Loss = 69.1645, Reconstruct Loss = 0.0018, Cls Loss = 0.0647, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0724, Reg Loss = 68.3160, Reconstruct Loss = 0.0020, Cls Loss = 0.0636, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0706, Reg Loss = 68.5653, Reconstruct Loss = 0.0018, Cls Loss = 0.0620, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0703, Reg Loss = 68.5034, Reconstruct Loss = 0.0016, Cls Loss = 0.0619, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0695, Reg Loss = 68.3835, Reconstruct Loss = 0.0015, Cls Loss = 0.0612, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0701, Reg Loss = 68.3757, Reconstruct Loss = 0.0014, Cls Loss = 0.0618, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0703, Reg Loss = 68.3058, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 75.30it/s]


Epoch [183/200], Validation Loss: 0.3860, Validation Accuracy: 91.30%



Iteration 0: Loss = 0.0943, Reg Loss = 69.4931, Reconstruct Loss = 0.0000, Cls Loss = 0.0873, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0651, Reg Loss = 70.0171, Reconstruct Loss = 0.0009, Cls Loss = 0.0572, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0637, Reg Loss = 69.5336, Reconstruct Loss = 0.0011, Cls Loss = 0.0556, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0666, Reg Loss = 68.7984, Reconstruct Loss = 0.0009, Cls Loss = 0.0588, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0671, Reg Loss = 68.1290, Reconstruct Loss = 0.0008, Cls Loss = 0.0595, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0662, Reg Loss = 68.1210, Reconstruct Loss = 0.0010, Cls Loss = 0.0584, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0649, Reg Loss = 68.3523, Reconstruct Loss = 0.0009, Cls Loss = 0.0571, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0667, Reg Loss = 68.3368, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 72.79it/s]


Epoch [184/200], Validation Loss: 0.3929, Validation Accuracy: 91.20%



Iteration 0: Loss = 0.0556, Reg Loss = 64.3411, Reconstruct Loss = 0.0000, Cls Loss = 0.0492, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0679, Reg Loss = 66.8596, Reconstruct Loss = 0.0006, Cls Loss = 0.0606, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0713, Reg Loss = 67.2104, Reconstruct Loss = 0.0012, Cls Loss = 0.0633, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0690, Reg Loss = 67.3876, Reconstruct Loss = 0.0011, Cls Loss = 0.0611, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0689, Reg Loss = 67.2524, Reconstruct Loss = 0.0011, Cls Loss = 0.0611, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0690, Reg Loss = 67.3848, Reconstruct Loss = 0.0010, Cls Loss = 0.0613, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0693, Reg Loss = 67.3207, Reconstruct Loss = 0.0012, Cls Loss = 0.0613, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0685, Reg Loss = 67.4136, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 67.08it/s]


Epoch [185/200], Validation Loss: 0.4092, Validation Accuracy: 91.64%



Checkpoint saved at epoch 184 with accuracy: 91.64%
Iteration 0: Loss = 0.0498, Reg Loss = 70.1398, Reconstruct Loss = 0.0000, Cls Loss = 0.0428, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0699, Reg Loss = 67.7186, Reconstruct Loss = 0.0015, Cls Loss = 0.0616, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0710, Reg Loss = 67.6998, Reconstruct Loss = 0.0014, Cls Loss = 0.0629, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0723, Reg Loss = 67.8092, Reconstruct Loss = 0.0013, Cls Loss = 0.0642, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0731, Reg Loss = 68.2243, Reconstruct Loss = 0.0012, Cls Loss = 0.0651, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0717, Reg Loss = 68.5444, Reconstruct Loss = 0.0011, Cls Loss = 0.0637, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0704, Reg Loss = 69.0336, Reconstruct Loss = 0.0013, Cls Loss = 0.0623, Learning rate = 1.0000e-03
Iteration 700: L

100%|██████████| 157/157 [00:02<00:00, 69.40it/s]


Epoch [186/200], Validation Loss: 0.4266, Validation Accuracy: 91.07%



Iteration 0: Loss = 0.0130, Reg Loss = 65.0128, Reconstruct Loss = 0.0000, Cls Loss = 0.0065, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0696, Reg Loss = 68.4194, Reconstruct Loss = 0.0003, Cls Loss = 0.0625, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0645, Reg Loss = 68.9149, Reconstruct Loss = 0.0006, Cls Loss = 0.0571, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0625, Reg Loss = 68.2668, Reconstruct Loss = 0.0013, Cls Loss = 0.0544, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0656, Reg Loss = 68.3109, Reconstruct Loss = 0.0013, Cls Loss = 0.0574, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0653, Reg Loss = 68.4614, Reconstruct Loss = 0.0016, Cls Loss = 0.0569, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0664, Reg Loss = 68.6477, Reconstruct Loss = 0.0015, Cls Loss = 0.0581, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0681, Reg Loss = 68.4518, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 76.20it/s]


Epoch [187/200], Validation Loss: 0.3994, Validation Accuracy: 91.22%



Iteration 0: Loss = 0.0284, Reg Loss = 68.3428, Reconstruct Loss = 0.0000, Cls Loss = 0.0216, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0668, Reg Loss = 66.6326, Reconstruct Loss = 0.0009, Cls Loss = 0.0592, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0646, Reg Loss = 67.3063, Reconstruct Loss = 0.0009, Cls Loss = 0.0570, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0642, Reg Loss = 68.0146, Reconstruct Loss = 0.0009, Cls Loss = 0.0565, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.0678, Reg Loss = 68.3337, Reconstruct Loss = 0.0010, Cls Loss = 0.0600, Learning rate = 1.0000e-03
Iteration 500: Loss = 0.0662, Reg Loss = 68.4094, Reconstruct Loss = 0.0010, Cls Loss = 0.0584, Learning rate = 1.0000e-03
Iteration 600: Loss = 0.0666, Reg Loss = 68.5857, Reconstruct Loss = 0.0009, Cls Loss = 0.0588, Learning rate = 1.0000e-03
Iteration 700: Loss = 0.0678, Reg Loss = 68.5577, Reconstruct Loss =

100%|██████████| 157/157 [00:02<00:00, 78.19it/s]


Epoch [188/200], Validation Loss: 0.3923, Validation Accuracy: 91.38%



Iteration 0: Loss = 0.0257, Reg Loss = 62.2351, Reconstruct Loss = 0.0000, Cls Loss = 0.0194, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0656, Reg Loss = 66.6533, Reconstruct Loss = 0.0006, Cls Loss = 0.0584, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0598, Reg Loss = 66.8730, Reconstruct Loss = 0.0006, Cls Loss = 0.0525, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0575, Reg Loss = 66.9506, Reconstruct Loss = 0.0006, Cls Loss = 0.0502, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0565, Reg Loss = 66.9733, Reconstruct Loss = 0.0007, Cls Loss = 0.0491, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0556, Reg Loss = 66.9438, Reconstruct Loss = 0.0007, Cls Loss = 0.0482, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0554, Reg Loss = 66.8495, Reconstruct Loss = 0.0007, Cls Loss = 0.0481, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0543, Reg Loss = 66.8945, Reconstruct Loss =

100%|██████████| 157/157 [00:01<00:00, 79.64it/s]


Epoch [189/200], Validation Loss: 0.4278, Validation Accuracy: 91.14%



Iteration 0: Loss = 0.0655, Reg Loss = 70.4183, Reconstruct Loss = 0.0000, Cls Loss = 0.0585, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0569, Reg Loss = 66.2436, Reconstruct Loss = 0.0006, Cls Loss = 0.0496, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0542, Reg Loss = 66.4773, Reconstruct Loss = 0.0010, Cls Loss = 0.0466, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0546, Reg Loss = 66.2970, Reconstruct Loss = 0.0007, Cls Loss = 0.0473, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0531, Reg Loss = 66.3515, Reconstruct Loss = 0.0006, Cls Loss = 0.0459, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0541, Reg Loss = 66.3563, Reconstruct Loss = 0.0007, Cls Loss = 0.0467, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0533, Reg Loss = 66.3310, Reconstruct Loss = 0.0009, Cls Loss = 0.0458, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0529, Reg Loss = 66.3286, Reconstruct Loss =

100%|██████████| 157/157 [00:01<00:00, 81.25it/s]


Epoch [190/200], Validation Loss: 0.4249, Validation Accuracy: 91.29%



Iteration 0: Loss = 0.0329, Reg Loss = 69.3556, Reconstruct Loss = 0.0000, Cls Loss = 0.0260, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0505, Reg Loss = 65.9413, Reconstruct Loss = 0.0000, Cls Loss = 0.0439, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0524, Reg Loss = 66.1436, Reconstruct Loss = 0.0005, Cls Loss = 0.0452, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0509, Reg Loss = 66.2210, Reconstruct Loss = 0.0004, Cls Loss = 0.0439, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0522, Reg Loss = 66.0984, Reconstruct Loss = 0.0003, Cls Loss = 0.0453, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0527, Reg Loss = 66.1083, Reconstruct Loss = 0.0006, Cls Loss = 0.0455, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0521, Reg Loss = 66.1655, Reconstruct Loss = 0.0008, Cls Loss = 0.0446, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0511, Reg Loss = 66.0916, Reconstruct Loss =

100%|██████████| 157/157 [00:01<00:00, 82.04it/s]


Epoch [191/200], Validation Loss: 0.4062, Validation Accuracy: 91.46%



Iteration 0: Loss = 0.0220, Reg Loss = 67.7852, Reconstruct Loss = 0.0000, Cls Loss = 0.0152, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0459, Reg Loss = 66.0598, Reconstruct Loss = 0.0012, Cls Loss = 0.0381, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0479, Reg Loss = 66.2279, Reconstruct Loss = 0.0015, Cls Loss = 0.0397, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0502, Reg Loss = 66.0905, Reconstruct Loss = 0.0019, Cls Loss = 0.0417, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0493, Reg Loss = 65.8689, Reconstruct Loss = 0.0015, Cls Loss = 0.0412, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0486, Reg Loss = 65.7873, Reconstruct Loss = 0.0015, Cls Loss = 0.0405, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0499, Reg Loss = 65.8128, Reconstruct Loss = 0.0014, Cls Loss = 0.0419, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0496, Reg Loss = 65.7604, Reconstruct Loss =

100%|██████████| 157/157 [00:01<00:00, 81.02it/s]


Epoch [192/200], Validation Loss: 0.4006, Validation Accuracy: 91.49%



Iteration 0: Loss = 0.0279, Reg Loss = 67.2029, Reconstruct Loss = 0.0000, Cls Loss = 0.0212, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0437, Reg Loss = 65.8256, Reconstruct Loss = 0.0017, Cls Loss = 0.0354, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0456, Reg Loss = 65.5504, Reconstruct Loss = 0.0010, Cls Loss = 0.0381, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0473, Reg Loss = 65.4924, Reconstruct Loss = 0.0011, Cls Loss = 0.0396, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0479, Reg Loss = 65.4872, Reconstruct Loss = 0.0010, Cls Loss = 0.0403, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0479, Reg Loss = 65.4528, Reconstruct Loss = 0.0009, Cls Loss = 0.0405, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0484, Reg Loss = 65.4151, Reconstruct Loss = 0.0009, Cls Loss = 0.0410, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0481, Reg Loss = 65.3943, Reconstruct Loss =

100%|██████████| 157/157 [00:01<00:00, 81.94it/s]


Epoch [193/200], Validation Loss: 0.4096, Validation Accuracy: 91.38%



Iteration 0: Loss = 0.0292, Reg Loss = 65.1828, Reconstruct Loss = 0.0000, Cls Loss = 0.0226, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0484, Reg Loss = 64.7018, Reconstruct Loss = 0.0017, Cls Loss = 0.0402, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0496, Reg Loss = 64.7925, Reconstruct Loss = 0.0014, Cls Loss = 0.0417, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0507, Reg Loss = 64.7906, Reconstruct Loss = 0.0011, Cls Loss = 0.0431, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0497, Reg Loss = 64.7227, Reconstruct Loss = 0.0011, Cls Loss = 0.0422, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0505, Reg Loss = 64.6714, Reconstruct Loss = 0.0009, Cls Loss = 0.0431, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0502, Reg Loss = 64.6733, Reconstruct Loss = 0.0009, Cls Loss = 0.0429, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0491, Reg Loss = 64.6905, Reconstruct Loss =

100%|██████████| 157/157 [00:01<00:00, 81.86it/s]


Epoch [194/200], Validation Loss: 0.4113, Validation Accuracy: 91.59%



Iteration 0: Loss = 0.1038, Reg Loss = 61.4409, Reconstruct Loss = 0.0000, Cls Loss = 0.0977, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0475, Reg Loss = 64.6811, Reconstruct Loss = 0.0006, Cls Loss = 0.0405, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0497, Reg Loss = 64.7345, Reconstruct Loss = 0.0006, Cls Loss = 0.0427, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0490, Reg Loss = 64.6208, Reconstruct Loss = 0.0009, Cls Loss = 0.0417, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0481, Reg Loss = 64.5111, Reconstruct Loss = 0.0007, Cls Loss = 0.0409, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0471, Reg Loss = 64.5795, Reconstruct Loss = 0.0008, Cls Loss = 0.0399, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0467, Reg Loss = 64.6292, Reconstruct Loss = 0.0009, Cls Loss = 0.0393, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0462, Reg Loss = 64.6728, Reconstruct Loss =

100%|██████████| 157/157 [00:01<00:00, 80.85it/s]


Epoch [195/200], Validation Loss: 0.4349, Validation Accuracy: 91.21%



Iteration 0: Loss = 0.0376, Reg Loss = 66.8832, Reconstruct Loss = 0.0000, Cls Loss = 0.0309, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0446, Reg Loss = 65.2722, Reconstruct Loss = 0.0011, Cls Loss = 0.0369, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0495, Reg Loss = 65.2691, Reconstruct Loss = 0.0011, Cls Loss = 0.0418, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0494, Reg Loss = 65.2784, Reconstruct Loss = 0.0012, Cls Loss = 0.0418, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0489, Reg Loss = 65.1727, Reconstruct Loss = 0.0010, Cls Loss = 0.0414, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0482, Reg Loss = 65.1162, Reconstruct Loss = 0.0010, Cls Loss = 0.0407, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0480, Reg Loss = 65.0727, Reconstruct Loss = 0.0011, Cls Loss = 0.0404, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0478, Reg Loss = 65.0803, Reconstruct Loss =

100%|██████████| 157/157 [00:01<00:00, 81.11it/s]


Epoch [196/200], Validation Loss: 0.4187, Validation Accuracy: 91.48%



Iteration 0: Loss = 0.0111, Reg Loss = 68.6446, Reconstruct Loss = 0.0000, Cls Loss = 0.0043, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0504, Reg Loss = 64.9601, Reconstruct Loss = 0.0005, Cls Loss = 0.0434, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0456, Reg Loss = 64.7110, Reconstruct Loss = 0.0010, Cls Loss = 0.0381, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0481, Reg Loss = 64.8235, Reconstruct Loss = 0.0012, Cls Loss = 0.0404, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0474, Reg Loss = 64.7992, Reconstruct Loss = 0.0013, Cls Loss = 0.0396, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0472, Reg Loss = 64.8119, Reconstruct Loss = 0.0013, Cls Loss = 0.0394, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0475, Reg Loss = 64.7857, Reconstruct Loss = 0.0015, Cls Loss = 0.0396, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0464, Reg Loss = 64.7549, Reconstruct Loss =

100%|██████████| 157/157 [00:01<00:00, 81.23it/s]


Epoch [197/200], Validation Loss: 0.4416, Validation Accuracy: 91.13%



Iteration 0: Loss = 0.0455, Reg Loss = 67.8116, Reconstruct Loss = 0.0000, Cls Loss = 0.0387, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0465, Reg Loss = 64.6478, Reconstruct Loss = 0.0003, Cls Loss = 0.0398, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0461, Reg Loss = 64.6157, Reconstruct Loss = 0.0007, Cls Loss = 0.0389, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0453, Reg Loss = 64.5377, Reconstruct Loss = 0.0006, Cls Loss = 0.0382, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0451, Reg Loss = 64.6460, Reconstruct Loss = 0.0007, Cls Loss = 0.0379, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0454, Reg Loss = 64.6169, Reconstruct Loss = 0.0006, Cls Loss = 0.0384, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0466, Reg Loss = 64.5871, Reconstruct Loss = 0.0006, Cls Loss = 0.0395, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0467, Reg Loss = 64.5139, Reconstruct Loss =

100%|██████████| 157/157 [00:01<00:00, 81.40it/s]


Epoch [198/200], Validation Loss: 0.4298, Validation Accuracy: 91.42%



Iteration 0: Loss = 0.0093, Reg Loss = 64.5686, Reconstruct Loss = 0.0000, Cls Loss = 0.0029, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0436, Reg Loss = 63.6004, Reconstruct Loss = 0.0011, Cls Loss = 0.0361, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0477, Reg Loss = 63.5171, Reconstruct Loss = 0.0011, Cls Loss = 0.0402, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0475, Reg Loss = 63.5515, Reconstruct Loss = 0.0010, Cls Loss = 0.0402, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0487, Reg Loss = 63.4633, Reconstruct Loss = 0.0008, Cls Loss = 0.0416, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0472, Reg Loss = 63.5365, Reconstruct Loss = 0.0008, Cls Loss = 0.0401, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0479, Reg Loss = 63.5229, Reconstruct Loss = 0.0008, Cls Loss = 0.0408, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0482, Reg Loss = 63.4382, Reconstruct Loss =

100%|██████████| 157/157 [00:01<00:00, 81.15it/s]


Epoch [199/200], Validation Loss: 0.4333, Validation Accuracy: 91.33%



Iteration 0: Loss = 0.0339, Reg Loss = 63.8177, Reconstruct Loss = 0.0000, Cls Loss = 0.0275, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0496, Reg Loss = 62.8479, Reconstruct Loss = 0.0008, Cls Loss = 0.0425, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0455, Reg Loss = 62.9127, Reconstruct Loss = 0.0004, Cls Loss = 0.0388, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0446, Reg Loss = 63.1042, Reconstruct Loss = 0.0008, Cls Loss = 0.0375, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0442, Reg Loss = 63.1665, Reconstruct Loss = 0.0008, Cls Loss = 0.0371, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0451, Reg Loss = 63.0823, Reconstruct Loss = 0.0008, Cls Loss = 0.0380, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0454, Reg Loss = 63.0768, Reconstruct Loss = 0.0009, Cls Loss = 0.0383, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0458, Reg Loss = 63.0179, Reconstruct Loss =

100%|██████████| 157/157 [00:01<00:00, 82.31it/s]

Epoch [200/200], Validation Loss: 0.4156, Validation Accuracy: 91.31%








In [36]:
# Validate the model for the starting dimension (its pretrained form)
val_loss, acc = validate_single(dim_dict['32'][0], val_loader, nn.CrossEntropyLoss(), args=args)
print(f'Initial Permutated model Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc * 100:.2f}%')

100%|██████████| 157/157 [00:02<00:00, 71.43it/s]

Initial Permutated model Validation Loss: 0.3979, Validation Accuracy: 91.37%





In [37]:
# Validate the model for the starting dimension (its pretrained form)
val_loss, acc = validate_single(dim_dict['64'][0], val_loader, nn.CrossEntropyLoss(), args=args)
print(f'Initial Permutated model Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc * 100:.2f}%')

100%|██████████| 157/157 [00:02<00:00, 76.15it/s]

Initial Permutated model Validation Loss: 0.4096, Validation Accuracy: 91.66%





In [38]:
# Validate the model for the starting dimension (its pretrained form)
val_loss, acc = validate_single(gt_model_dict['64'], val_loader, nn.CrossEntropyLoss(), args=args)
print(f'Initial Permutated model Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc * 100:.2f}%')

100%|██████████| 157/157 [00:02<00:00, 77.81it/s]

Initial Permutated model Validation Loss: 0.2825, Validation Accuracy: 92.60%





In [39]:
# End the wandb tracking
wandb.finish()

0,1
Cls Loss,▆▆▇▆▆▆█▆▇▆▆▆▅▆▆▆▆▆▅▆▅▅▆▆▆▅▅▅▅▅▅▅▅▁▅▅▄▁▃▃
Learning rate,███████████████████████████████████▁▁▁▁▁
Loss,▄▃▄▃▃▃▃▃▃▁▃▃▃▃█▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▁▃▃▃▂▂▁▄
Reconstruct Loss,▆▅▄▄▄▄▁▆▆▅▁▄▄▄▅▄▅▅▄▁▄▁▃▅▃▄▄▄▁█▁▄▁▂▁▁▃▃▄▄
Reg Loss,▆▇█▆▆▆▅▅▅▅▄▅▅▇▆▅▅▄▃▄▅▅▄▄▄▄▃▃▄▄▄▅▃▃▇▂▂▂▂▁
Validation Accuracy,▂▄▁▂█▂▆▂▇▅▇▆▆▅▅▇█▆▄▃▄█▇▄▆▄▄▃██▇▄▆▃▆▄▅▄█▆
Validation Loss,▁▄▄▁▃▃▁▁▃▃▁▂▃▂▂▄▃▃▃▄▅▂▅▄▃▃▃▄▃▄▆▂▄▆▅▄▇▆█▅

0,1
Cls Loss,0.0386
Learning rate,0.0001
Loss,0.04582
Reconstruct Loss,0.00091
Reg Loss,63.01793
Validation Accuracy,0.9131
Validation Loss,0.41564


### 7 Testing loop

In [None]:
saved_hypernet_path = args.training.save_model_path + '/cifar10_nerf_best.pth'

In [44]:
saved_hypernet_path

'toy/experiments/resnet20_cifar10_32-64-4layer-200e-noisecoord-resmlpv2_smooth_5_256_16_smooth_in_code/cifar10_nerf_best.pth'

In [41]:
hyper_model_test = get_hypernetwork(args, number_param)

Hyper model type: resmlp
Using scalar 0.1
num_freqs:  16 <class 'int'>


In [42]:
checkpoint = torch.load(saved_hypernet_path, map_location="cpu")  # or "cuda" if using GPU
hyper_model_test.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [43]:
for hidden_dim in range(16, 65):
    # Create a model for this given dimension
    model = create_model(args.model.type,
                         hidden_dim=hidden_dim,
                         path=args.model.pretrained_path,
                         smooth=args.model.smooth).to(device)
    
    # If EMA is specified, apply it
    if ema:
        print('Applying EMA')
        ema.apply()

    # Sample the merged model
    accumulated_model = sample_merge_model(hyper_model_test, model, args, K=100)

    # Validate the merged model
    val_loss, acc = validate_single(accumulated_model, val_loader, val_criterion, args=args)

    # If EMA is specified, restore the original weights after applying EMA
    if ema:
        ema.restore()  # Restore the original weights after applying 
        
    # Save the model
    save_name = os.path.join(args.training.save_model_path, f"cifar10_{accumulated_model.__class__.__name__}_dim{hidden_dim}_single.pth")
    torch.save(accumulated_model.state_dict(),save_name)

    # Print the results
    print(f"Test using model {args.model}: hidden_dim {hidden_dim}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc*100:.2f}%")
    print('\n')

    # Define the directory and filename structure
    filename = f"cifar10_results_{args.experiment.name}.txt"
    filepath = os.path.join(args.training.save_model_path, filename)

    # Write the results. 'a' is used to append the results; a new file will be created if it doesn't exist.
    with open(filepath, "a") as file:
        file.write(f"Hidden_dim: {hidden_dim}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc*100:.2f}%\n")


Replace the last 2 block of layer3 with new block with hidden dim 16
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 73.88it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 16, Validation Loss: 0.5833, Validation Accuracy: 86.04%


Replace the last 2 block of layer3 with new block with hidden dim 17
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 71.75it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 17, Validation Loss: 0.4120, Validation Accuracy: 87.45%


Replace the last 2 block of layer3 with new block with hidden dim 18
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 70.46it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 18, Validation Loss: 0.5548, Validation Accuracy: 86.53%


Replace the last 2 block of layer3 with new block with hidden dim 19
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 70.83it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 19, Validation Loss: 0.5272, Validation Accuracy: 87.68%


Replace the last 2 block of layer3 with new block with hidden dim 20
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 70.86it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 20, Validation Loss: 0.4715, Validation Accuracy: 88.73%


Replace the last 2 block of layer3 with new block with hidden dim 21
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 70.40it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 21, Validation Loss: 0.4964, Validation Accuracy: 89.06%


Replace the last 2 block of layer3 with new block with hidden dim 22
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 70.59it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 22, Validation Loss: 0.5281, Validation Accuracy: 88.59%


Replace the last 2 block of layer3 with new block with hidden dim 23
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 70.89it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 23, Validation Loss: 0.5110, Validation Accuracy: 89.60%


Replace the last 2 block of layer3 with new block with hidden dim 24
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 71.60it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 24, Validation Loss: 0.4651, Validation Accuracy: 90.54%


Replace the last 2 block of layer3 with new block with hidden dim 25
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 69.64it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 25, Validation Loss: 0.5091, Validation Accuracy: 89.84%


Replace the last 2 block of layer3 with new block with hidden dim 26
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 70.15it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 26, Validation Loss: 0.5510, Validation Accuracy: 88.93%


Replace the last 2 block of layer3 with new block with hidden dim 27
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 71.78it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 27, Validation Loss: 0.5131, Validation Accuracy: 89.54%


Replace the last 2 block of layer3 with new block with hidden dim 28
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 71.45it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 28, Validation Loss: 0.5564, Validation Accuracy: 89.60%


Replace the last 2 block of layer3 with new block with hidden dim 29
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 69.85it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 29, Validation Loss: 0.6313, Validation Accuracy: 88.32%


Replace the last 2 block of layer3 with new block with hidden dim 30
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 68.84it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 30, Validation Loss: 0.4967, Validation Accuracy: 90.45%


Replace the last 2 block of layer3 with new block with hidden dim 31
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 69.54it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 31, Validation Loss: 0.5196, Validation Accuracy: 89.41%


Replace the last 2 block of layer3 with new block with hidden dim 32
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 70.81it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 32, Validation Loss: 0.5462, Validation Accuracy: 89.45%


Replace the last 2 block of layer3 with new block with hidden dim 33
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 69.59it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 33, Validation Loss: 0.4485, Validation Accuracy: 90.41%


Replace the last 2 block of layer3 with new block with hidden dim 34
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 68.69it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 34, Validation Loss: 0.5846, Validation Accuracy: 88.17%


Replace the last 2 block of layer3 with new block with hidden dim 35
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 69.24it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 35, Validation Loss: 0.4792, Validation Accuracy: 90.59%


Replace the last 2 block of layer3 with new block with hidden dim 36
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 68.89it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 36, Validation Loss: 0.4853, Validation Accuracy: 90.52%


Replace the last 2 block of layer3 with new block with hidden dim 37
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 71.24it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 37, Validation Loss: 0.5153, Validation Accuracy: 89.24%


Replace the last 2 block of layer3 with new block with hidden dim 38
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 71.54it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 38, Validation Loss: 0.4686, Validation Accuracy: 90.72%


Replace the last 2 block of layer3 with new block with hidden dim 39
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 71.93it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 39, Validation Loss: 0.4530, Validation Accuracy: 91.07%


Replace the last 2 block of layer3 with new block with hidden dim 40
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 70.58it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 40, Validation Loss: 0.5330, Validation Accuracy: 89.59%


Replace the last 2 block of layer3 with new block with hidden dim 41
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 71.09it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 41, Validation Loss: 0.4433, Validation Accuracy: 90.97%


Replace the last 2 block of layer3 with new block with hidden dim 42
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 70.06it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 42, Validation Loss: 0.4653, Validation Accuracy: 90.69%


Replace the last 2 block of layer3 with new block with hidden dim 43
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 71.60it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 43, Validation Loss: 0.5036, Validation Accuracy: 89.73%


Replace the last 2 block of layer3 with new block with hidden dim 44
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 73.50it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 44, Validation Loss: 0.5226, Validation Accuracy: 90.12%


Replace the last 2 block of layer3 with new block with hidden dim 45
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 78.13it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 45, Validation Loss: 0.4604, Validation Accuracy: 90.66%


Replace the last 2 block of layer3 with new block with hidden dim 46
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 78.21it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 46, Validation Loss: 0.4825, Validation Accuracy: 90.43%


Replace the last 2 block of layer3 with new block with hidden dim 47
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 70.04it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 47, Validation Loss: 0.4856, Validation Accuracy: 90.30%


Replace the last 2 block of layer3 with new block with hidden dim 48
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 72.03it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 48, Validation Loss: 0.4545, Validation Accuracy: 90.61%


Replace the last 2 block of layer3 with new block with hidden dim 49
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 69.18it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 49, Validation Loss: 0.4398, Validation Accuracy: 90.98%


Replace the last 2 block of layer3 with new block with hidden dim 50
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 67.97it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 50, Validation Loss: 0.4495, Validation Accuracy: 91.16%


Replace the last 2 block of layer3 with new block with hidden dim 51
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 67.87it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 51, Validation Loss: 0.4770, Validation Accuracy: 90.78%


Replace the last 2 block of layer3 with new block with hidden dim 52
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 64.61it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 52, Validation Loss: 0.5386, Validation Accuracy: 89.86%


Replace the last 2 block of layer3 with new block with hidden dim 53
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 64.96it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 53, Validation Loss: 0.4338, Validation Accuracy: 91.07%


Replace the last 2 block of layer3 with new block with hidden dim 54
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 67.14it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 54, Validation Loss: 0.4758, Validation Accuracy: 90.49%


Replace the last 2 block of layer3 with new block with hidden dim 55
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 67.49it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 55, Validation Loss: 0.5492, Validation Accuracy: 89.85%


Replace the last 2 block of layer3 with new block with hidden dim 56
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 68.04it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 56, Validation Loss: 0.5069, Validation Accuracy: 90.31%


Replace the last 2 block of layer3 with new block with hidden dim 57
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 67.70it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 57, Validation Loss: 0.4919, Validation Accuracy: 90.29%


Replace the last 2 block of layer3 with new block with hidden dim 58
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 68.09it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 58, Validation Loss: 0.4860, Validation Accuracy: 90.50%


Replace the last 2 block of layer3 with new block with hidden dim 59
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 69.80it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 59, Validation Loss: 0.4830, Validation Accuracy: 90.38%


Replace the last 2 block of layer3 with new block with hidden dim 60
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 69.14it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 60, Validation Loss: 0.4793, Validation Accuracy: 90.40%


Replace the last 2 block of layer3 with new block with hidden dim 61
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 67.81it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 61, Validation Loss: 0.4551, Validation Accuracy: 90.76%


Replace the last 2 block of layer3 with new block with hidden dim 62
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 68.33it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 62, Validation Loss: 0.4838, Validation Accuracy: 90.39%


Replace the last 2 block of layer3 with new block with hidden dim 63
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 68.45it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 63, Validation Loss: 0.4391, Validation Accuracy: 91.03%


Replace the last 2 block of layer3 with new block with hidden dim 64
Loading pretrained weights for resnet20
Applying EMA


100%|██████████| 157/157 [00:02<00:00, 67.47it/s]


Test using model {'type': 'ResNet20', 'pretrained_path': 'toy/checkpoint/cifar10_ResNet20_smooth_Ours.pth', 'smooth': False}: hidden_dim 64, Validation Loss: 0.4512, Validation Accuracy: 90.84%


