## Import

In [1]:
import os
import random

In [2]:
import torch
import torch.nn as nn

import wandb

from sklearn.metrics import accuracy_score

In [3]:
from neumeta.models import create_densenet_model as create_model
from neumeta.utils import (
    parse_args, print_omegaconf,
    load_checkpoint, save_checkpoint,
    set_seed,
    get_dataset,
    sample_coordinates, sample_subset, shuffle_coordinates_all,
    get_hypernetwork, get_optimizer,
    sample_weights,
    weighted_regression_loss, validate_single, AverageMeter, EMA,
    sample_merge_model
)

## Functions

### Find max dimension of the model

In [4]:
def find_max_dim(model_cls):
    """Find maximum dimension of the model"""
    # Get the learnable parameters of the model
    checkpoint = model_cls.learnable_parameter 

    # Set the maximum value to the length of the checkpoint
    max_value = len(checkpoint)

    # Iterate over the new model's weight
    for i, (k, tensor) in enumerate(checkpoint.items()):
        # Handle 2D tensors (e.g., weight matrices) 
        if len(tensor.shape) == 4:
            coords = [tensor.shape[0], tensor.shape[1]]
            max_value = max(max_value, max(coords))
        # Handle 1D tensors (e.g., biases)
        elif len(tensor.shape) == 1:
            max_value = max(max_value, tensor.shape[0])
    
    return max_value

### Initialize wandb

In [5]:
def initialize_wandb(config):
    import time
    """
    Initializes Weights and Biases (wandb) with the given configuration.
    
    Args:
        configuration (dict): Configuration parameters for the run.
    """
    # Name the run using current time and configuration name
    run_name = f"{time.strftime('%Y%m%d%H%M%S')}-{config.experiment.name}"
    
    wandb.init(project="dense-inr-trial", name=run_name, config=dict(config), group='cifar10')

### Init model dictionary

In [6]:
def init_model_dict(args, device):
    """
    Initializes a dictionary of models for each dimension in the given range, along with ground truth models for the starting dimension.

    Args:
        args: An object containing the arguments for initializing the models.

    Returns:
        dim_dict: A dictionary containing the models for each dimension, along with their corresponding coordinates, keys, indices, size, and ground truth models.
        gt_model_dict: A dictionary containing the ground truth models for the starting dimension.
    """
    dim_dict = {}
    gt_model_dict = {}
    
    # Create a model for each dimension in dimensions range
    for dim in args.dimensions.range:
        model_cls = create_model(args.model.type,
                                 layers=args.model.layers,
                                 growth=args.model.growth,
                                 compression=args.model.compression,
                                 bottleneck=args.model.bottleneck,
                                 drop_rate=args.model.drop_rate,
                                 hidden_dim=dim,
                                 path=args.model.pretrained_path).to(device)
        # Sample the coordinates, keys, indices, and the size for the model
        coords_tensor, keys_list, indices_list, size_list = sample_coordinates(model_cls)
        # Add the model, coordinates, keys, indices, size, and key mask to the dictionary
        dim_dict[f"{dim}"] = (model_cls, coords_tensor, keys_list, indices_list, size_list, None)

        # Print to makes line better
        print('\n')
        
        # If the dimension is the starting dimension (the dimension of pretrained_model), add the ground truth model to the dictionary
        if dim == args.dimensions.start:
            print(f"Loading model for dim {dim}")
            model_trained = create_model(args.model.type,
                                         layers=args.model.layers,
                                         growth=args.model.growth,
                                         compression=args.model.compression,
                                         bottleneck=args.model.bottleneck,
                                         drop_rate=args.model.drop_rate,
                                         path=args.model.pretrained_path,
                                         smooth=True,
                                         hidden_dim=dim).to(device)
            model_trained.eval()
            gt_model_dict[f'{dim}'] = model_trained

    
    return dim_dict, gt_model_dict

### Training function

In [7]:
# Function to train the model for one epoch
def train_one_epoch(model, train_loader, optimizer, criterion, dim_dict, gt_model_dict, epoch_idx, ema=None, args=None, device='cpu'):
    # Set the model to training mode
    model.train()
    total_loss = 0.0

    # Initialize AverageMeter objects to track the losses
    losses = AverageMeter()
    cls_losses = AverageMeter()
    reg_losses = AverageMeter()
    reconstruct_losses = AverageMeter()

    # Training accuracy
    preds = []
    gt = []

    # Iterate over the training data
    for batch_idx, (x, target) in enumerate(train_loader):
        # Zero the gradients
        optimizer.zero_grad()

        # Preprocess input
        # ------------------------------------------------------------------------------------------------------
        # Move the data to the device
        x, target = x.to(device), target.to(device)
        # Choose a random hidden dimension
        hidden_dim = random.choice(args.dimensions.range)
        # Get the model class, coordinates, keys, indices, size, and key mask for the chosen dimension
        model_cls, coords_tensor, keys_list, indices_list, size_list, key_mask = dim_dict[f"{hidden_dim}"]
        # Sample a subset the input tensor of the coordinates, keys, indices, size, and selected keys
        coords_tensor, keys_list, indices_list, size_list, selected_keys = sample_subset(coords_tensor,
                                                                                         keys_list,
                                                                                         indices_list,
                                                                                         size_list,
                                                                                         key_mask,
                                                                                         ratio=args.ratio)
        # Add noise to the coordinates if specified
        if args.training.coordinate_noise > 0.0:
            coords_tensor = coords_tensor + (torch.rand_like(coords_tensor) - 0.5) * args.training.coordinate_noise


        # Main task of hypernetwork and target network
        # ------------------------------------------------------------------------------------------------------
        # Sample the weights for the target model using hypernetwork
        model_cls, reconstructed_weights = sample_weights(model, model_cls,
                                                          coords_tensor, keys_list, indices_list, size_list, key_mask, selected_keys,
                                                          device=device, NORM=args.dimensions.norm)
        # Forward pass
        predict = model_cls(x)
        
        # Sample test model to see training accuracy

        pred = torch.argmax(predict, dim=-1)

        preds.append(pred)
        gt.append(target)

        # Compute losses
        # ------------------------------------------------------------------------------------------------------
        # Compute classification loss
        cls_loss = criterion(predict, target) 
        # Compute regularization loss
        reg_loss = sum([torch.norm(w, p=2) for w in reconstructed_weights])
        # Compute reconstruction loss if ground truth model is available
        if f"{hidden_dim}" in gt_model_dict:
            gt_model = gt_model_dict[f"{hidden_dim}"]
            gt_selected_weights = [
                w for k, w in gt_model.learnable_parameter.items() if k in selected_keys]

            reconstruct_loss = weighted_regression_loss(
                reconstructed_weights, gt_selected_weights)
        else:
            reconstruct_loss = torch.tensor(0.0)
        # Compute the total loss
        loss = args.hyper_model.loss_weight.ce_weight * cls_loss + args.hyper_model.loss_weight.reg_weight * \
            reg_loss + args.hyper_model.loss_weight.recon_weight * reconstruct_loss


        # Compute gradients and update weights
        # ------------------------------------------------------------------------------------------------------
        # Zero the gradients of the updated weights
        for updated_weight in model_cls.parameters():
            updated_weight.grad = None

        # Compute the gradients of the reconstructed weights
        loss.backward(retain_graph=True)
        torch.autograd.backward(reconstructed_weights, [
                                w.grad for k, w in model_cls.named_parameters() if k in selected_keys])
        
        # Clip the gradients if specified
        if args.training.get('clip_grad', 0.0) > 0:
            torch.nn.utils.clip_grad_value_(
                model.parameters(), args.training.clip_grad)
            
        # Update the weights
        optimizer.step()

        # Update the EMA if specified
        if ema:
            ema.update()  # Update the EMA after each training step
        total_loss += loss.item()

        # Update the AverageMeter objects
        losses.update(loss.item())
        cls_losses.update(cls_loss.item())
        reg_losses.update(reg_loss.item())
        reconstruct_losses.update(reconstruct_loss.item())

        # Log (or plot) losses
        # ------------------------------------------------------------------------------------------------------
        # Log the losses and learning rate to wandb
        if batch_idx % args.experiment.log_interval == 0:
            wandb.log({
                "Loss": losses.avg,
                "Cls Loss": cls_losses.avg,
                "Reg Loss": reg_losses.avg,
                "Reconstruct Loss": reconstruct_losses.avg,
                "Learning rate": optimizer.param_groups[0]['lr']
            }, step=batch_idx + epoch_idx * len(train_loader))
            # Print the losses and learning rate
            print(
                f"Iteration {batch_idx}: Loss = {losses.avg:.4f}, Reg Loss = {reg_losses.avg:.4f}, Reconstruct Loss = {reconstruct_losses.avg:.4f}, Cls Loss = {cls_losses.avg:.4f}, Learning rate = {optimizer.param_groups[0]['lr']:.4e}")
    
    train_acc = accuracy_score(torch.cat(gt).cpu().numpy(), torch.cat(preds).cpu().numpy())

    wandb.log({
        "Training accuracy": train_acc
    })

    # Returns the training loss, structure of network in each dimension, and the original structure of pretrained network
    return losses.avg, dim_dict, gt_model_dict, train_acc

## Main

### 0 Set device to GPU

In [8]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

### 1 Parsing arguments for input

In [9]:
CONFIG_PATH = 'neumeta/config/densenet_inr_train/dense_18th_experiment.yaml'
RATIO = '1.0'
CHECKPOINT_PATH = 'toy/experiments_densenet/dense_18th_experiment/cifar10_nerf_best.pth'

In [10]:
argv_train = ['--config', CONFIG_PATH, '--ratio', RATIO, '--resume_from', CHECKPOINT_PATH]

In [11]:
args = parse_args(argv_train)  # Parse arguments
print_omegaconf(args)  # Print arguments

+--------------------------------------+------------------------------------------------------------------------------------------------------+
|                 Key                  |                                                Value                                                 |
+--------------------------------------+------------------------------------------------------------------------------------------------------+
|           experiment.name            |                                        dense_18th_experiment                                         |
|        experiment.num_epochs         |                                                 100                                                  |
|       experiment.log_interval        |                                                  50                                                  |
|       experiment.eval_interval       |                                                  1                                             

In [12]:
set_seed(args.experiment.seed)

Setting seed... 42 for reproducibility


### 2 Get training and validation dataloader

In [13]:
train_loader, val_loader = get_dataset('cifar10', args.training.batch_size, strong_transform=args.training.get('strong_aug', None))

Using dataset: cifar10 with batch size: 128 and strong transform: None


### 3 Create target model

#### 3.0 Create the model

In [14]:
model = create_model(args.model.type,
                     layers=args.model.layers,
                     growth=args.model.growth,
                     compression=args.model.compression,
                     bottleneck=args.model.bottleneck,
                     drop_rate=args.model.drop_rate,
                     hidden_dim=args.dimensions.start,
                     path=args.model.pretrained_path).to(device)

Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


#### 3.1 Print the structure and shape of the model

In [15]:
model

DenseNet3(
  (conv1): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (block1): DenseBlock(
    (layer): Sequential(
      (0): BottleneckBlock(
        (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv1): Conv2d(24, 48, kernel_size=(1, 1), stride=(1, 1))
        (bn2): Identity()
        (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (dropout): Dropout(p=0.0, inplace=False)
      )
      (1): BottleneckBlock(
        (bn1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv1): Conv2d(36, 48, kernel_size=(1, 1), stride=(1, 1))
        (bn2): Identity()
        (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (dropout): Dropout(p=0.0, inplace=False)
      )
      (2): BottleneckBlock(
        (bn1): Bat

In [16]:
for i, (k, tensor) in enumerate(model.learnable_parameter.items()):
    print(k, tensor.shape)

block3.layer.5.conv1.weight torch.Size([48, 120, 1, 1])
block3.layer.5.conv1.bias torch.Size([48])
block3.layer.5.conv2.weight torch.Size([12, 48, 3, 3])


In [17]:
# Print the maximum dimension of the model
print(f'Maximum DIM: {find_max_dim(model)}')

Maximum DIM: 120


#### 3.2 Validate the accuracy of pretrained model

In [18]:
# Validate the model for the starting dimension (its pretrained form)
val_loss, acc = validate_single(model, val_loader, nn.CrossEntropyLoss(), args=args)
print(f'Initial Permutated model Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc * 100:.2f}%')

100%|██████████| 79/79 [00:03<00:00, 22.25it/s]

Initial Permutated model Validation Loss: 0.3248, Validation Accuracy: 91.92%





In [19]:
checkpoint = model.learnable_parameter
number_param = len(checkpoint)

In [20]:
# Print the keys of the parameters and the number of parameters
print(f"Parameters keys: {model.keys}")
print(f"Number of parameters to be learned: {number_param}")

Parameters keys: ['block3.layer.5.conv1.weight', 'block3.layer.5.conv1.bias', 'block3.layer.5.conv2.weight']
Number of parameters to be learned: 3


### 4 Create hypernetwork

#### 4.0 Create the model

In [21]:
# Get the hypermodel
hyper_model = get_hypernetwork(args, number_param)

Hyper model type: resmlp
Using scalar 0.1
num_freqs:  16 <class 'int'>


#### 4.1 Print model structure

In [22]:
hyper_model

NeRF_ResMLP_Compose(
  (positional_encoding): PositionalEncoding()
  (model): ModuleList(
    (0-2): 3 x NeRF_MLP_Residual_Scaled(
      (initial_layer): Linear(in_features=198, out_features=128, bias=True)
      (residual_blocks): ModuleList(
        (0-5): 6 x Linear(in_features=128, out_features=128, bias=True)
      )
      (scalars): ParameterList(
          (0): Parameter containing: [torch.float32 of size  (cuda:0)]
          (1): Parameter containing: [torch.float32 of size  (cuda:0)]
          (2): Parameter containing: [torch.float32 of size  (cuda:0)]
          (3): Parameter containing: [torch.float32 of size  (cuda:0)]
          (4): Parameter containing: [torch.float32 of size  (cuda:0)]
          (5): Parameter containing: [torch.float32 of size  (cuda:0)]
      )
      (act): ReLU(inplace=True)
      (output_layer): Linear(in_features=128, out_features=9, bias=True)
    )
  )
)

#### 4.2 Initialize EMA to track only a smooth version of the model weight

In [23]:
# Initialize the EMA
ema = EMA(hyper_model, decay=args.hyper_model.ema_decay)

### 5 Get loss function, optimizer and scheduler

In [24]:
criterion, val_criterion, optimizer, scheduler = get_optimizer(args, hyper_model)

In [25]:
print(f'Criterion: {criterion}\nVal_criterion: {val_criterion}\nOptimizer: {optimizer}\nScheduler: {scheduler}')

Criterion: CrossEntropyLoss()
Val_criterion: CrossEntropyLoss()
Optimizer: AdamW (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    initial_lr: 0.001
    lr: 0.001
    maximize: False
    weight_decay: 0.01
)
Scheduler: <torch.optim.lr_scheduler.MultiStepLR object at 0x000002BF99402250>


### 6 Training loop

#### 6.1 Initialize training parameters

In [26]:
# Initialize the starting epoch and best accuracy
start_epoch = 0
best_acc = 0.0

#### 6.2 Directory to save the model

In [27]:
# Create the directory to save the model
os.makedirs(args.training.save_model_path, exist_ok=True)

#### 6.3 Resume training loop

In [28]:
args.resume_from

'toy/experiments_densenet/dense_18th_experiment/cifar10_nerf_best.pth'

In [29]:
args.resume_from = False

In [30]:
if args.resume_from:
        print(f"Resuming from checkpoint: {args.resume_from}")
        checkpoint_info = load_checkpoint(args.resume_from, hyper_model, optimizer, ema)
        start_epoch = checkpoint_info['epoch']
        best_acc = checkpoint_info['best_acc']
        print(f"Resuming from epoch: {start_epoch}, best accuracy: {best_acc*100:.2f}%")
        # Note: If there are more elements to retrieve, do so here.

#### 6.4 Initialize model dictionary for each dimension and shuffle it

In [31]:
# Initialize model dictionary
dim_dict, gt_model_dict = init_model_dict(args, device)

Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth




Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/exp

In [32]:
gt_model_dict['48']

DenseNet3(
  (conv1): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (block1): DenseBlock(
    (layer): Sequential(
      (0): BottleneckBlock(
        (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv1): Conv2d(24, 48, kernel_size=(1, 1), stride=(1, 1))
        (bn2): Identity()
        (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (dropout): Dropout(p=0.0, inplace=False)
      )
      (1): BottleneckBlock(
        (bn1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv1): Conv2d(36, 48, kernel_size=(1, 1), stride=(1, 1))
        (bn2): Identity()
        (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (dropout): Dropout(p=0.0, inplace=False)
      )
      (2): BottleneckBlock(
        (bn1): Bat

In [33]:
# Validate the model for the starting dimension (its pretrained form)
val_loss, acc = validate_single(gt_model_dict['48'], val_loader, nn.CrossEntropyLoss(), args=args)
print(f'Initial Permutated model Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc * 100:.2f}%')

100%|██████████| 79/79 [00:03<00:00, 23.42it/s]

Initial Permutated model Validation Loss: 0.3248, Validation Accuracy: 91.92%





In [34]:
dim_dict

{'24': (DenseNet3(
    (conv1): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (block1): DenseBlock(
      (layer): Sequential(
        (0): BottleneckBlock(
          (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv1): Conv2d(24, 48, kernel_size=(1, 1), stride=(1, 1))
          (bn2): Identity()
          (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (dropout): Dropout(p=0.0, inplace=False)
        )
        (1): BottleneckBlock(
          (bn1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv1): Conv2d(36, 48, kernel_size=(1, 1), stride=(1, 1))
          (bn2): Identity()
          (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (dropout): Dropout(p=0.0, inplace=False)
        )


In [35]:
dim_dict = shuffle_coordinates_all(dim_dict)
dim_dict

{'24': (DenseNet3(
    (conv1): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (block1): DenseBlock(
      (layer): Sequential(
        (0): BottleneckBlock(
          (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv1): Conv2d(24, 48, kernel_size=(1, 1), stride=(1, 1))
          (bn2): Identity()
          (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (dropout): Dropout(p=0.0, inplace=False)
        )
        (1): BottleneckBlock(
          (bn1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv1): Conv2d(36, 48, kernel_size=(1, 1), stride=(1, 1))
          (bn2): Identity()
          (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (dropout): Dropout(p=0.0, inplace=False)
        )


#### 6.5 Initialize wandb for plotting

In [36]:
initialize_wandb(args)

[34m[1mwandb[0m: Currently logged in as: [33mefradosuryadi[0m ([33mefradosuryadi-universitas-indonesia[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


#### 6.6 Hypernetwork training loop

In [37]:
args.experiment.num_epochs

100

In [38]:
# Iterate over the epochs
for epoch in range(start_epoch, args.experiment.num_epochs):
    # Train the hypernetwork to generate a model with random dimension for one epoch
    train_loss, dim_dict, gt_model_dict, train_acc = train_one_epoch(hyper_model, train_loader, optimizer, criterion, 
                                                                     dim_dict, gt_model_dict, epoch_idx=epoch, ema=ema, 
                                                                     args=args, device=device)
    # Step the scheduler
    scheduler.step()

    # Print the training loss and learning rate
    print(f"Epoch [{epoch+1}/{args.experiment.num_epochs}], Training Loss: {train_loss:.4f}, Training Accuracy: {train_acc*100:.2f}, Learning Rate: {scheduler.get_last_lr()[0]:.6f}")

    # If it's time to evaluate the model
    if (epoch + 1) % args.experiment.eval_interval == 0:
        # Apply EMA if it is specified
        if ema:
            ema.apply()  # Save the weights of original model created before training_loop
        
        # Sample the merged model (create model of same structure before training loop by using the hypernetwork)
        # And then test the performance of the hypernetwork by seeing how good it is in generating the weights
        model = sample_merge_model(hyper_model, model, args) 
        # Validate the merged model
        val_loss, acc = validate_single(model, val_loader, val_criterion, args=args)

        # If EMA is specified, restore the original weights
        if ema:
            ema.restore()  # Restore the original weights to the weights of the pretrained networks

        # Log the validation loss and accuracy to wandb
        wandb.log({
            "Validation Loss": val_loss,
            "Validation Accuracy": acc
        })
        # Print the validation loss and accuracy
        print(f"Epoch [{epoch+1}/{args.experiment.num_epochs}], Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc*100:.2f}%")
        print('\n\n')

        # Save the checkpoint if the accuracy is better than the previous best
        if acc > best_acc:
            best_acc = acc
            save_checkpoint(f"{args.training.save_model_path}/cifar10_nerf_best.pth",hyper_model,optimizer,ema,epoch,best_acc)
            print(f"Checkpoint saved at epoch {epoch} with accuracy: {best_acc*100:.2f}%")


Iteration 0: Loss = 0.2434, Reg Loss = 1.4436, Reconstruct Loss = 0.0000, Cls Loss = 0.2432, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.2243, Reg Loss = 4.3301, Reconstruct Loss = 0.0023, Cls Loss = 0.2215, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.2218, Reg Loss = 5.0227, Reconstruct Loss = 0.0025, Cls Loss = 0.2188, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.2198, Reg Loss = 4.9593, Reconstruct Loss = 0.0027, Cls Loss = 0.2166, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.2161, Reg Loss = 4.8525, Reconstruct Loss = 0.0031, Cls Loss = 0.2125, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.2124, Reg Loss = 4.8966, Reconstruct Loss = 0.0030, Cls Loss = 0.2089, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.2088, Reg Loss = 5.2356, Reconstruct Loss = 0.0025, Cls Loss = 0.2058, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.2071, Reg Loss = 5.3455, Reconstruct Loss = 0.0022, Cls Loss = 0.2043, Learning rate = 1.0000e-03
Epoch [1/100], Training Los

100%|██████████| 79/79 [00:04<00:00, 17.23it/s]


Epoch [1/100], Validation Loss: 1.1307, Validation Accuracy: 67.07%



Checkpoint saved at epoch 0 with accuracy: 67.07%
Iteration 0: Loss = 0.2107, Reg Loss = 5.0841, Reconstruct Loss = 0.0000, Cls Loss = 0.2102, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1891, Reg Loss = 5.3436, Reconstruct Loss = 0.0010, Cls Loss = 0.1876, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1923, Reg Loss = 5.0270, Reconstruct Loss = 0.0009, Cls Loss = 0.1909, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1948, Reg Loss = 4.9524, Reconstruct Loss = 0.0011, Cls Loss = 0.1932, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1916, Reg Loss = 4.7313, Reconstruct Loss = 0.0008, Cls Loss = 0.1904, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1891, Reg Loss = 4.7654, Reconstruct Loss = 0.0008, Cls Loss = 0.1879, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1873, Reg Loss = 4.7395, Reconstruct Loss = 0.0012, Cls Loss = 0.1856, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1853

100%|██████████| 79/79 [00:04<00:00, 16.86it/s]


Epoch [2/100], Validation Loss: 1.0993, Validation Accuracy: 68.46%



Checkpoint saved at epoch 1 with accuracy: 68.46%
Iteration 0: Loss = 0.1751, Reg Loss = 4.9179, Reconstruct Loss = 0.0000, Cls Loss = 0.1746, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1819, Reg Loss = 4.3184, Reconstruct Loss = 0.0028, Cls Loss = 0.1787, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1776, Reg Loss = 4.3926, Reconstruct Loss = 0.0024, Cls Loss = 0.1748, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1768, Reg Loss = 4.4825, Reconstruct Loss = 0.0020, Cls Loss = 0.1744, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1759, Reg Loss = 4.4019, Reconstruct Loss = 0.0020, Cls Loss = 0.1735, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1758, Reg Loss = 4.3720, Reconstruct Loss = 0.0019, Cls Loss = 0.1735, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1748, Reg Loss = 4.3535, Reconstruct Loss = 0.0019, Cls Loss = 0.1725, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1756

100%|██████████| 79/79 [00:03<00:00, 22.83it/s]


Epoch [3/100], Validation Loss: 1.0814, Validation Accuracy: 71.35%



Checkpoint saved at epoch 2 with accuracy: 71.35%
Iteration 0: Loss = 0.1779, Reg Loss = 3.4752, Reconstruct Loss = 0.0000, Cls Loss = 0.1776, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1788, Reg Loss = 3.7496, Reconstruct Loss = 0.0024, Cls Loss = 0.1761, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1788, Reg Loss = 3.8778, Reconstruct Loss = 0.0016, Cls Loss = 0.1768, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1753, Reg Loss = 3.6922, Reconstruct Loss = 0.0017, Cls Loss = 0.1732, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1732, Reg Loss = 3.5559, Reconstruct Loss = 0.0016, Cls Loss = 0.1712, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1704, Reg Loss = 3.4715, Reconstruct Loss = 0.0015, Cls Loss = 0.1685, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1697, Reg Loss = 3.4102, Reconstruct Loss = 0.0016, Cls Loss = 0.1678, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1692

100%|██████████| 79/79 [00:03<00:00, 22.97it/s]


Epoch [4/100], Validation Loss: 1.0700, Validation Accuracy: 69.23%



Iteration 0: Loss = 0.1364, Reg Loss = 3.7858, Reconstruct Loss = 0.0000, Cls Loss = 0.1360, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1677, Reg Loss = 3.4222, Reconstruct Loss = 0.0016, Cls Loss = 0.1658, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1677, Reg Loss = 3.4529, Reconstruct Loss = 0.0012, Cls Loss = 0.1662, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1681, Reg Loss = 3.4211, Reconstruct Loss = 0.0011, Cls Loss = 0.1666, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1689, Reg Loss = 3.3912, Reconstruct Loss = 0.0017, Cls Loss = 0.1669, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1698, Reg Loss = 3.3700, Reconstruct Loss = 0.0018, Cls Loss = 0.1677, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1694, Reg Loss = 3.3539, Reconstruct Loss = 0.0016, Cls Loss = 0.1675, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1691, Reg Loss = 3.3349, Reconstruct Loss = 0.0016, Cl

100%|██████████| 79/79 [00:04<00:00, 16.63it/s]


Epoch [5/100], Validation Loss: 1.0666, Validation Accuracy: 72.27%



Checkpoint saved at epoch 4 with accuracy: 72.27%
Iteration 0: Loss = 0.1663, Reg Loss = 3.2077, Reconstruct Loss = 0.0000, Cls Loss = 0.1660, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1571, Reg Loss = 3.1929, Reconstruct Loss = 0.0000, Cls Loss = 0.1568, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1627, Reg Loss = 3.1437, Reconstruct Loss = 0.0003, Cls Loss = 0.1621, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1628, Reg Loss = 3.3885, Reconstruct Loss = 0.0010, Cls Loss = 0.1615, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1636, Reg Loss = 3.7304, Reconstruct Loss = 0.0014, Cls Loss = 0.1618, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1641, Reg Loss = 3.8132, Reconstruct Loss = 0.0014, Cls Loss = 0.1623, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1655, Reg Loss = 3.8041, Reconstruct Loss = 0.0012, Cls Loss = 0.1640, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1652

100%|██████████| 79/79 [00:04<00:00, 17.12it/s]


Epoch [6/100], Validation Loss: 1.0727, Validation Accuracy: 69.75%



Iteration 0: Loss = 0.1821, Reg Loss = 3.8458, Reconstruct Loss = 0.0000, Cls Loss = 0.1817, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1583, Reg Loss = 3.8509, Reconstruct Loss = 0.0018, Cls Loss = 0.1561, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1589, Reg Loss = 3.8236, Reconstruct Loss = 0.0013, Cls Loss = 0.1573, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1602, Reg Loss = 3.8276, Reconstruct Loss = 0.0015, Cls Loss = 0.1583, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1621, Reg Loss = 3.8410, Reconstruct Loss = 0.0013, Cls Loss = 0.1604, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1616, Reg Loss = 3.7892, Reconstruct Loss = 0.0012, Cls Loss = 0.1601, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1631, Reg Loss = 3.8095, Reconstruct Loss = 0.0012, Cls Loss = 0.1615, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1628, Reg Loss = 3.8176, Reconstruct Loss = 0.0012, Cl

100%|██████████| 79/79 [00:04<00:00, 17.02it/s]


Epoch [7/100], Validation Loss: 1.2174, Validation Accuracy: 67.77%



Iteration 0: Loss = 0.1489, Reg Loss = 4.0681, Reconstruct Loss = 0.0000, Cls Loss = 0.1485, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1632, Reg Loss = 3.6801, Reconstruct Loss = 0.0011, Cls Loss = 0.1617, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1604, Reg Loss = 3.6899, Reconstruct Loss = 0.0011, Cls Loss = 0.1590, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1610, Reg Loss = 3.8486, Reconstruct Loss = 0.0012, Cls Loss = 0.1594, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1613, Reg Loss = 3.9481, Reconstruct Loss = 0.0010, Cls Loss = 0.1599, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1606, Reg Loss = 3.9971, Reconstruct Loss = 0.0012, Cls Loss = 0.1590, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1607, Reg Loss = 4.0260, Reconstruct Loss = 0.0012, Cls Loss = 0.1591, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1616, Reg Loss = 4.1518, Reconstruct Loss = 0.0013, Cl

100%|██████████| 79/79 [00:04<00:00, 17.13it/s]


Epoch [8/100], Validation Loss: 1.1544, Validation Accuracy: 67.14%



Iteration 0: Loss = 0.2170, Reg Loss = 3.7789, Reconstruct Loss = 0.0000, Cls Loss = 0.2166, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1521, Reg Loss = 3.9982, Reconstruct Loss = 0.0015, Cls Loss = 0.1502, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1545, Reg Loss = 3.9832, Reconstruct Loss = 0.0013, Cls Loss = 0.1528, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1602, Reg Loss = 4.1695, Reconstruct Loss = 0.0014, Cls Loss = 0.1583, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1621, Reg Loss = 4.3676, Reconstruct Loss = 0.0013, Cls Loss = 0.1604, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1622, Reg Loss = 4.4156, Reconstruct Loss = 0.0011, Cls Loss = 0.1607, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1609, Reg Loss = 4.4388, Reconstruct Loss = 0.0010, Cls Loss = 0.1594, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1611, Reg Loss = 4.4428, Reconstruct Loss = 0.0012, Cl

100%|██████████| 79/79 [00:04<00:00, 17.28it/s]


Epoch [9/100], Validation Loss: 1.1241, Validation Accuracy: 68.16%



Iteration 0: Loss = 0.1220, Reg Loss = 4.3230, Reconstruct Loss = 0.0000, Cls Loss = 0.1216, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1691, Reg Loss = 4.0462, Reconstruct Loss = 0.0005, Cls Loss = 0.1682, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1624, Reg Loss = 4.0935, Reconstruct Loss = 0.0006, Cls Loss = 0.1614, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1628, Reg Loss = 4.2445, Reconstruct Loss = 0.0007, Cls Loss = 0.1616, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1616, Reg Loss = 4.2067, Reconstruct Loss = 0.0007, Cls Loss = 0.1605, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1614, Reg Loss = 4.1497, Reconstruct Loss = 0.0007, Cls Loss = 0.1603, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1595, Reg Loss = 4.1547, Reconstruct Loss = 0.0006, Cls Loss = 0.1585, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1599, Reg Loss = 4.1427, Reconstruct Loss = 0.0009, Cl

100%|██████████| 79/79 [00:04<00:00, 17.18it/s]


Epoch [10/100], Validation Loss: 1.1454, Validation Accuracy: 68.07%



Iteration 0: Loss = 0.1548, Reg Loss = 4.0042, Reconstruct Loss = 0.0000, Cls Loss = 0.1544, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1522, Reg Loss = 4.0006, Reconstruct Loss = 0.0016, Cls Loss = 0.1501, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1540, Reg Loss = 4.0560, Reconstruct Loss = 0.0013, Cls Loss = 0.1523, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1572, Reg Loss = 4.0970, Reconstruct Loss = 0.0010, Cls Loss = 0.1558, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1581, Reg Loss = 4.2303, Reconstruct Loss = 0.0010, Cls Loss = 0.1566, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1587, Reg Loss = 4.2019, Reconstruct Loss = 0.0012, Cls Loss = 0.1571, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1573, Reg Loss = 4.1406, Reconstruct Loss = 0.0011, Cls Loss = 0.1557, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1576, Reg Loss = 4.0909, Reconstruct Loss = 0.0011, C

100%|██████████| 79/79 [00:04<00:00, 17.31it/s]


Epoch [11/100], Validation Loss: 1.1558, Validation Accuracy: 65.76%



Iteration 0: Loss = 0.1433, Reg Loss = 3.8004, Reconstruct Loss = 0.0000, Cls Loss = 0.1429, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1547, Reg Loss = 4.6440, Reconstruct Loss = 0.0025, Cls Loss = 0.1518, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1586, Reg Loss = 4.4777, Reconstruct Loss = 0.0017, Cls Loss = 0.1564, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1574, Reg Loss = 4.3389, Reconstruct Loss = 0.0014, Cls Loss = 0.1556, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1569, Reg Loss = 4.2178, Reconstruct Loss = 0.0011, Cls Loss = 0.1553, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1557, Reg Loss = 4.1648, Reconstruct Loss = 0.0011, Cls Loss = 0.1542, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1555, Reg Loss = 4.1663, Reconstruct Loss = 0.0009, Cls Loss = 0.1541, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1550, Reg Loss = 4.1501, Reconstruct Loss = 0.0008, C

100%|██████████| 79/79 [00:04<00:00, 17.14it/s]


Epoch [12/100], Validation Loss: 1.1139, Validation Accuracy: 66.51%



Iteration 0: Loss = 0.1718, Reg Loss = 3.7819, Reconstruct Loss = 0.0000, Cls Loss = 0.1714, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1542, Reg Loss = 4.1096, Reconstruct Loss = 0.0006, Cls Loss = 0.1532, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1547, Reg Loss = 4.0207, Reconstruct Loss = 0.0003, Cls Loss = 0.1540, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1537, Reg Loss = 4.0676, Reconstruct Loss = 0.0004, Cls Loss = 0.1529, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1536, Reg Loss = 4.0301, Reconstruct Loss = 0.0005, Cls Loss = 0.1527, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1520, Reg Loss = 4.0865, Reconstruct Loss = 0.0005, Cls Loss = 0.1510, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1521, Reg Loss = 4.0780, Reconstruct Loss = 0.0006, Cls Loss = 0.1511, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1519, Reg Loss = 4.0952, Reconstruct Loss = 0.0007, C

100%|██████████| 79/79 [00:04<00:00, 17.51it/s]


Epoch [13/100], Validation Loss: 1.4124, Validation Accuracy: 60.72%



Iteration 0: Loss = 0.1565, Reg Loss = 3.7909, Reconstruct Loss = 0.0000, Cls Loss = 0.1561, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1376, Reg Loss = 4.1167, Reconstruct Loss = 0.0005, Cls Loss = 0.1368, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1387, Reg Loss = 4.2726, Reconstruct Loss = 0.0008, Cls Loss = 0.1375, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1342, Reg Loss = 4.4173, Reconstruct Loss = 0.0014, Cls Loss = 0.1323, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1329, Reg Loss = 4.4959, Reconstruct Loss = 0.0013, Cls Loss = 0.1311, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1313, Reg Loss = 4.6128, Reconstruct Loss = 0.0011, Cls Loss = 0.1297, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1302, Reg Loss = 4.6726, Reconstruct Loss = 0.0011, Cls Loss = 0.1286, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1297, Reg Loss = 4.7197, Reconstruct Loss = 0.0010, C

100%|██████████| 79/79 [00:04<00:00, 17.04it/s]


Epoch [14/100], Validation Loss: 1.7315, Validation Accuracy: 58.09%



Iteration 0: Loss = 0.1116, Reg Loss = 4.9233, Reconstruct Loss = 0.0000, Cls Loss = 0.1111, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1183, Reg Loss = 4.8141, Reconstruct Loss = 0.0005, Cls Loss = 0.1173, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1159, Reg Loss = 4.7862, Reconstruct Loss = 0.0008, Cls Loss = 0.1145, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1167, Reg Loss = 4.8151, Reconstruct Loss = 0.0012, Cls Loss = 0.1150, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1171, Reg Loss = 5.1225, Reconstruct Loss = 0.0011, Cls Loss = 0.1155, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1169, Reg Loss = 5.2091, Reconstruct Loss = 0.0012, Cls Loss = 0.1151, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1156, Reg Loss = 5.2349, Reconstruct Loss = 0.0013, Cls Loss = 0.1138, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1148, Reg Loss = 5.2176, Reconstruct Loss = 0.0014, C

100%|██████████| 79/79 [00:04<00:00, 16.97it/s]


Epoch [15/100], Validation Loss: 6.1008, Validation Accuracy: 46.68%



Iteration 0: Loss = 0.1289, Reg Loss = 4.3444, Reconstruct Loss = 0.0000, Cls Loss = 0.1285, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1085, Reg Loss = 4.9693, Reconstruct Loss = 0.0010, Cls Loss = 0.1070, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1078, Reg Loss = 5.0069, Reconstruct Loss = 0.0011, Cls Loss = 0.1063, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1092, Reg Loss = 4.9971, Reconstruct Loss = 0.0008, Cls Loss = 0.1079, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1105, Reg Loss = 5.0112, Reconstruct Loss = 0.0007, Cls Loss = 0.1093, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1099, Reg Loss = 5.0742, Reconstruct Loss = 0.0010, Cls Loss = 0.1084, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1094, Reg Loss = 5.0913, Reconstruct Loss = 0.0009, Cls Loss = 0.1079, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1082, Reg Loss = 5.1124, Reconstruct Loss = 0.0008, C

100%|██████████| 79/79 [00:04<00:00, 17.53it/s]


Epoch [16/100], Validation Loss: 4.7811, Validation Accuracy: 48.61%



Iteration 0: Loss = 0.0647, Reg Loss = 5.0089, Reconstruct Loss = 0.0000, Cls Loss = 0.0642, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1063, Reg Loss = 4.9357, Reconstruct Loss = 0.0018, Cls Loss = 0.1040, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1077, Reg Loss = 5.0680, Reconstruct Loss = 0.0014, Cls Loss = 0.1058, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1063, Reg Loss = 5.1756, Reconstruct Loss = 0.0011, Cls Loss = 0.1047, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1065, Reg Loss = 5.2817, Reconstruct Loss = 0.0011, Cls Loss = 0.1049, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1059, Reg Loss = 5.3301, Reconstruct Loss = 0.0012, Cls Loss = 0.1042, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1058, Reg Loss = 5.3442, Reconstruct Loss = 0.0010, Cls Loss = 0.1042, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1046, Reg Loss = 5.3800, Reconstruct Loss = 0.0012, C

100%|██████████| 79/79 [00:04<00:00, 16.86it/s]


Epoch [17/100], Validation Loss: 3.7702, Validation Accuracy: 52.92%



Iteration 0: Loss = 0.1342, Reg Loss = 5.2232, Reconstruct Loss = 0.0000, Cls Loss = 0.1336, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1015, Reg Loss = 5.5016, Reconstruct Loss = 0.0000, Cls Loss = 0.1010, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1012, Reg Loss = 5.6022, Reconstruct Loss = 0.0007, Cls Loss = 0.1000, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0978, Reg Loss = 5.6686, Reconstruct Loss = 0.0009, Cls Loss = 0.0964, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0958, Reg Loss = 5.5880, Reconstruct Loss = 0.0008, Cls Loss = 0.0945, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0969, Reg Loss = 5.5338, Reconstruct Loss = 0.0009, Cls Loss = 0.0954, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0974, Reg Loss = 5.5127, Reconstruct Loss = 0.0010, Cls Loss = 0.0959, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0984, Reg Loss = 5.5197, Reconstruct Loss = 0.0010, C

100%|██████████| 79/79 [00:04<00:00, 17.61it/s]


Epoch [18/100], Validation Loss: 4.0186, Validation Accuracy: 55.43%



Iteration 0: Loss = 0.1760, Reg Loss = 6.8379, Reconstruct Loss = 0.0245, Cls Loss = 0.1508, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0968, Reg Loss = 5.8364, Reconstruct Loss = 0.0013, Cls Loss = 0.0948, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0970, Reg Loss = 5.8056, Reconstruct Loss = 0.0016, Cls Loss = 0.0948, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0964, Reg Loss = 5.6872, Reconstruct Loss = 0.0014, Cls Loss = 0.0945, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0945, Reg Loss = 5.6254, Reconstruct Loss = 0.0013, Cls Loss = 0.0926, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0944, Reg Loss = 5.5746, Reconstruct Loss = 0.0012, Cls Loss = 0.0927, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0944, Reg Loss = 5.5546, Reconstruct Loss = 0.0014, Cls Loss = 0.0924, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0932, Reg Loss = 5.5366, Reconstruct Loss = 0.0014, C

100%|██████████| 79/79 [00:04<00:00, 17.02it/s]


Epoch [19/100], Validation Loss: 6.7221, Validation Accuracy: 49.16%



Iteration 0: Loss = 0.0869, Reg Loss = 5.9081, Reconstruct Loss = 0.0000, Cls Loss = 0.0863, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0966, Reg Loss = 5.5826, Reconstruct Loss = 0.0005, Cls Loss = 0.0956, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0918, Reg Loss = 5.5648, Reconstruct Loss = 0.0002, Cls Loss = 0.0910, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0879, Reg Loss = 5.4844, Reconstruct Loss = 0.0006, Cls Loss = 0.0867, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0890, Reg Loss = 5.4894, Reconstruct Loss = 0.0008, Cls Loss = 0.0876, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0898, Reg Loss = 5.4314, Reconstruct Loss = 0.0010, Cls Loss = 0.0883, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0899, Reg Loss = 5.3909, Reconstruct Loss = 0.0010, Cls Loss = 0.0884, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0897, Reg Loss = 5.4123, Reconstruct Loss = 0.0011, C

100%|██████████| 79/79 [00:04<00:00, 16.96it/s]


Epoch [20/100], Validation Loss: 5.9186, Validation Accuracy: 48.30%



Iteration 0: Loss = 0.0875, Reg Loss = 5.2351, Reconstruct Loss = 0.0000, Cls Loss = 0.0869, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0940, Reg Loss = 5.4820, Reconstruct Loss = 0.0010, Cls Loss = 0.0924, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0872, Reg Loss = 5.4378, Reconstruct Loss = 0.0005, Cls Loss = 0.0862, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0869, Reg Loss = 5.4351, Reconstruct Loss = 0.0005, Cls Loss = 0.0858, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0888, Reg Loss = 5.4157, Reconstruct Loss = 0.0006, Cls Loss = 0.0877, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0883, Reg Loss = 5.4386, Reconstruct Loss = 0.0009, Cls Loss = 0.0869, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0873, Reg Loss = 5.4410, Reconstruct Loss = 0.0007, Cls Loss = 0.0860, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0876, Reg Loss = 5.4941, Reconstruct Loss = 0.0009, C

100%|██████████| 79/79 [00:04<00:00, 17.02it/s]


Epoch [21/100], Validation Loss: 4.7201, Validation Accuracy: 48.71%



Iteration 0: Loss = 0.1258, Reg Loss = 5.5222, Reconstruct Loss = 0.0000, Cls Loss = 0.1253, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0873, Reg Loss = 5.7413, Reconstruct Loss = 0.0013, Cls Loss = 0.0854, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0859, Reg Loss = 5.6977, Reconstruct Loss = 0.0007, Cls Loss = 0.0846, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0870, Reg Loss = 5.7169, Reconstruct Loss = 0.0004, Cls Loss = 0.0860, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0872, Reg Loss = 5.6487, Reconstruct Loss = 0.0007, Cls Loss = 0.0860, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0851, Reg Loss = 5.6062, Reconstruct Loss = 0.0006, Cls Loss = 0.0840, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0864, Reg Loss = 5.6106, Reconstruct Loss = 0.0007, Cls Loss = 0.0851, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0851, Reg Loss = 5.6037, Reconstruct Loss = 0.0008, C

100%|██████████| 79/79 [00:04<00:00, 16.94it/s]


Epoch [22/100], Validation Loss: 3.9877, Validation Accuracy: 49.61%



Iteration 0: Loss = 0.0733, Reg Loss = 5.0799, Reconstruct Loss = 0.0000, Cls Loss = 0.0728, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0902, Reg Loss = 5.7759, Reconstruct Loss = 0.0033, Cls Loss = 0.0863, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0894, Reg Loss = 5.7935, Reconstruct Loss = 0.0022, Cls Loss = 0.0866, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0870, Reg Loss = 5.8474, Reconstruct Loss = 0.0017, Cls Loss = 0.0847, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0865, Reg Loss = 5.8056, Reconstruct Loss = 0.0013, Cls Loss = 0.0847, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0862, Reg Loss = 5.8426, Reconstruct Loss = 0.0014, Cls Loss = 0.0843, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0863, Reg Loss = 5.8508, Reconstruct Loss = 0.0013, Cls Loss = 0.0844, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0857, Reg Loss = 5.8655, Reconstruct Loss = 0.0012, C

100%|██████████| 79/79 [00:04<00:00, 16.77it/s]


Epoch [23/100], Validation Loss: 5.4364, Validation Accuracy: 45.66%



Iteration 0: Loss = 0.0626, Reg Loss = 5.7801, Reconstruct Loss = 0.0000, Cls Loss = 0.0620, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0843, Reg Loss = 6.1439, Reconstruct Loss = 0.0010, Cls Loss = 0.0827, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0842, Reg Loss = 6.0899, Reconstruct Loss = 0.0008, Cls Loss = 0.0827, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0834, Reg Loss = 6.0398, Reconstruct Loss = 0.0010, Cls Loss = 0.0818, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0838, Reg Loss = 5.9787, Reconstruct Loss = 0.0008, Cls Loss = 0.0825, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0827, Reg Loss = 5.9745, Reconstruct Loss = 0.0007, Cls Loss = 0.0814, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0825, Reg Loss = 5.9779, Reconstruct Loss = 0.0007, Cls Loss = 0.0811, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0815, Reg Loss = 5.9538, Reconstruct Loss = 0.0007, C

100%|██████████| 79/79 [00:04<00:00, 17.08it/s]


Epoch [24/100], Validation Loss: 6.7341, Validation Accuracy: 41.72%



Iteration 0: Loss = 0.0996, Reg Loss = 5.2718, Reconstruct Loss = 0.0000, Cls Loss = 0.0991, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0776, Reg Loss = 5.9255, Reconstruct Loss = 0.0006, Cls Loss = 0.0763, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0759, Reg Loss = 5.9549, Reconstruct Loss = 0.0007, Cls Loss = 0.0746, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0752, Reg Loss = 5.9786, Reconstruct Loss = 0.0009, Cls Loss = 0.0737, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0757, Reg Loss = 6.1050, Reconstruct Loss = 0.0010, Cls Loss = 0.0741, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0766, Reg Loss = 6.1076, Reconstruct Loss = 0.0011, Cls Loss = 0.0749, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0757, Reg Loss = 6.0973, Reconstruct Loss = 0.0010, Cls Loss = 0.0741, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0753, Reg Loss = 6.1260, Reconstruct Loss = 0.0011, C

100%|██████████| 79/79 [00:04<00:00, 17.10it/s]


Epoch [25/100], Validation Loss: 5.3589, Validation Accuracy: 44.99%



Iteration 0: Loss = 0.1049, Reg Loss = 5.9282, Reconstruct Loss = 0.0000, Cls Loss = 0.1043, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0721, Reg Loss = 6.3043, Reconstruct Loss = 0.0012, Cls Loss = 0.0703, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0695, Reg Loss = 6.1912, Reconstruct Loss = 0.0010, Cls Loss = 0.0679, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0690, Reg Loss = 6.1228, Reconstruct Loss = 0.0008, Cls Loss = 0.0675, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0698, Reg Loss = 6.0847, Reconstruct Loss = 0.0007, Cls Loss = 0.0684, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0691, Reg Loss = 6.0717, Reconstruct Loss = 0.0007, Cls Loss = 0.0678, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0698, Reg Loss = 6.0831, Reconstruct Loss = 0.0007, Cls Loss = 0.0685, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0698, Reg Loss = 6.0994, Reconstruct Loss = 0.0006, C

100%|██████████| 79/79 [00:04<00:00, 16.89it/s]


Epoch [26/100], Validation Loss: 5.6698, Validation Accuracy: 43.67%



Iteration 0: Loss = 0.0679, Reg Loss = 6.3262, Reconstruct Loss = 0.0000, Cls Loss = 0.0673, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0719, Reg Loss = 6.6700, Reconstruct Loss = 0.0018, Cls Loss = 0.0695, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0707, Reg Loss = 6.5290, Reconstruct Loss = 0.0012, Cls Loss = 0.0689, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0691, Reg Loss = 6.4389, Reconstruct Loss = 0.0008, Cls Loss = 0.0676, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0682, Reg Loss = 6.5326, Reconstruct Loss = 0.0008, Cls Loss = 0.0668, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0670, Reg Loss = 6.5510, Reconstruct Loss = 0.0007, Cls Loss = 0.0657, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0669, Reg Loss = 6.5929, Reconstruct Loss = 0.0007, Cls Loss = 0.0655, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0673, Reg Loss = 6.6277, Reconstruct Loss = 0.0008, C

100%|██████████| 79/79 [00:04<00:00, 17.03it/s]


Epoch [27/100], Validation Loss: 12.0171, Validation Accuracy: 25.81%



Iteration 0: Loss = 0.0524, Reg Loss = 6.2594, Reconstruct Loss = 0.0000, Cls Loss = 0.0517, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0633, Reg Loss = 6.5478, Reconstruct Loss = 0.0008, Cls Loss = 0.0618, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0640, Reg Loss = 6.4630, Reconstruct Loss = 0.0004, Cls Loss = 0.0629, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0651, Reg Loss = 6.4429, Reconstruct Loss = 0.0004, Cls Loss = 0.0640, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0669, Reg Loss = 6.3849, Reconstruct Loss = 0.0003, Cls Loss = 0.0660, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0666, Reg Loss = 6.3833, Reconstruct Loss = 0.0004, Cls Loss = 0.0656, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0661, Reg Loss = 6.4390, Reconstruct Loss = 0.0006, Cls Loss = 0.0649, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0664, Reg Loss = 6.4744, Reconstruct Loss = 0.0008, 

100%|██████████| 79/79 [00:03<00:00, 22.71it/s]


Epoch [28/100], Validation Loss: 10.9300, Validation Accuracy: 31.17%



Iteration 0: Loss = 0.0586, Reg Loss = 6.6640, Reconstruct Loss = 0.0000, Cls Loss = 0.0580, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0604, Reg Loss = 6.4132, Reconstruct Loss = 0.0011, Cls Loss = 0.0587, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0621, Reg Loss = 6.4581, Reconstruct Loss = 0.0008, Cls Loss = 0.0607, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0617, Reg Loss = 6.4757, Reconstruct Loss = 0.0007, Cls Loss = 0.0603, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0602, Reg Loss = 6.5130, Reconstruct Loss = 0.0007, Cls Loss = 0.0589, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0609, Reg Loss = 6.5277, Reconstruct Loss = 0.0008, Cls Loss = 0.0595, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0613, Reg Loss = 6.5834, Reconstruct Loss = 0.0007, Cls Loss = 0.0599, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0618, Reg Loss = 6.6110, Reconstruct Loss = 0.0008, 

100%|██████████| 79/79 [00:03<00:00, 22.68it/s]


Epoch [29/100], Validation Loss: 6.3746, Validation Accuracy: 43.31%



Iteration 0: Loss = 0.0379, Reg Loss = 7.1478, Reconstruct Loss = 0.0000, Cls Loss = 0.0371, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0639, Reg Loss = 6.9894, Reconstruct Loss = 0.0008, Cls Loss = 0.0624, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0636, Reg Loss = 6.8222, Reconstruct Loss = 0.0007, Cls Loss = 0.0622, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0612, Reg Loss = 6.7276, Reconstruct Loss = 0.0005, Cls Loss = 0.0601, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0619, Reg Loss = 6.6842, Reconstruct Loss = 0.0004, Cls Loss = 0.0608, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0626, Reg Loss = 6.7186, Reconstruct Loss = 0.0006, Cls Loss = 0.0613, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0620, Reg Loss = 6.7366, Reconstruct Loss = 0.0006, Cls Loss = 0.0608, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0611, Reg Loss = 6.7168, Reconstruct Loss = 0.0005, C

100%|██████████| 79/79 [00:03<00:00, 22.60it/s]


Epoch [30/100], Validation Loss: 7.8825, Validation Accuracy: 36.54%



Iteration 0: Loss = 0.0388, Reg Loss = 6.7893, Reconstruct Loss = 0.0000, Cls Loss = 0.0381, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0554, Reg Loss = 6.7745, Reconstruct Loss = 0.0003, Cls Loss = 0.0544, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0591, Reg Loss = 6.8636, Reconstruct Loss = 0.0006, Cls Loss = 0.0579, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0586, Reg Loss = 6.7965, Reconstruct Loss = 0.0005, Cls Loss = 0.0574, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0590, Reg Loss = 6.8014, Reconstruct Loss = 0.0010, Cls Loss = 0.0573, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0594, Reg Loss = 6.8092, Reconstruct Loss = 0.0011, Cls Loss = 0.0575, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0585, Reg Loss = 6.7750, Reconstruct Loss = 0.0009, Cls Loss = 0.0569, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0580, Reg Loss = 6.7453, Reconstruct Loss = 0.0008, C

100%|██████████| 79/79 [00:03<00:00, 22.70it/s]


Epoch [31/100], Validation Loss: 4.9713, Validation Accuracy: 46.31%



Iteration 0: Loss = 0.0457, Reg Loss = 6.4226, Reconstruct Loss = 0.0000, Cls Loss = 0.0450, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0527, Reg Loss = 6.5205, Reconstruct Loss = 0.0012, Cls Loss = 0.0509, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0547, Reg Loss = 6.6202, Reconstruct Loss = 0.0011, Cls Loss = 0.0530, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0548, Reg Loss = 6.5767, Reconstruct Loss = 0.0007, Cls Loss = 0.0535, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0545, Reg Loss = 6.5727, Reconstruct Loss = 0.0008, Cls Loss = 0.0530, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0561, Reg Loss = 6.5980, Reconstruct Loss = 0.0009, Cls Loss = 0.0546, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0559, Reg Loss = 6.6309, Reconstruct Loss = 0.0007, Cls Loss = 0.0545, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0552, Reg Loss = 6.6217, Reconstruct Loss = 0.0008, C

100%|██████████| 79/79 [00:04<00:00, 17.02it/s]


Epoch [32/100], Validation Loss: 4.8388, Validation Accuracy: 50.36%



Iteration 0: Loss = 0.0821, Reg Loss = 6.8816, Reconstruct Loss = 0.0000, Cls Loss = 0.0814, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0578, Reg Loss = 6.6703, Reconstruct Loss = 0.0003, Cls Loss = 0.0568, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0562, Reg Loss = 6.6064, Reconstruct Loss = 0.0007, Cls Loss = 0.0549, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0553, Reg Loss = 6.6444, Reconstruct Loss = 0.0011, Cls Loss = 0.0535, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0559, Reg Loss = 6.6560, Reconstruct Loss = 0.0009, Cls Loss = 0.0544, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0558, Reg Loss = 6.7339, Reconstruct Loss = 0.0008, Cls Loss = 0.0543, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0555, Reg Loss = 6.7345, Reconstruct Loss = 0.0008, Cls Loss = 0.0540, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0559, Reg Loss = 6.7438, Reconstruct Loss = 0.0007, C

100%|██████████| 79/79 [00:04<00:00, 16.64it/s]


Epoch [33/100], Validation Loss: 8.1328, Validation Accuracy: 32.79%



Iteration 0: Loss = 0.0870, Reg Loss = 6.9842, Reconstruct Loss = 0.0000, Cls Loss = 0.0863, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0558, Reg Loss = 6.9596, Reconstruct Loss = 0.0007, Cls Loss = 0.0544, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0564, Reg Loss = 6.7901, Reconstruct Loss = 0.0007, Cls Loss = 0.0550, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0545, Reg Loss = 6.7556, Reconstruct Loss = 0.0007, Cls Loss = 0.0532, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0552, Reg Loss = 6.7428, Reconstruct Loss = 0.0006, Cls Loss = 0.0539, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0545, Reg Loss = 6.7234, Reconstruct Loss = 0.0005, Cls Loss = 0.0534, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0553, Reg Loss = 6.6716, Reconstruct Loss = 0.0004, Cls Loss = 0.0542, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0547, Reg Loss = 6.7299, Reconstruct Loss = 0.0005, C

100%|██████████| 79/79 [00:04<00:00, 17.14it/s]


Epoch [34/100], Validation Loss: 4.4993, Validation Accuracy: 49.38%



Iteration 0: Loss = 0.0698, Reg Loss = 6.9567, Reconstruct Loss = 0.0000, Cls Loss = 0.0691, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0535, Reg Loss = 6.9119, Reconstruct Loss = 0.0008, Cls Loss = 0.0520, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0521, Reg Loss = 6.9272, Reconstruct Loss = 0.0006, Cls Loss = 0.0508, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0524, Reg Loss = 6.8895, Reconstruct Loss = 0.0008, Cls Loss = 0.0510, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0521, Reg Loss = 6.8735, Reconstruct Loss = 0.0008, Cls Loss = 0.0506, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0529, Reg Loss = 6.7847, Reconstruct Loss = 0.0007, Cls Loss = 0.0515, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0537, Reg Loss = 6.7952, Reconstruct Loss = 0.0007, Cls Loss = 0.0524, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0541, Reg Loss = 6.8247, Reconstruct Loss = 0.0007, C

100%|██████████| 79/79 [00:04<00:00, 17.06it/s]


Epoch [35/100], Validation Loss: 5.7200, Validation Accuracy: 48.48%



Iteration 0: Loss = 0.0449, Reg Loss = 7.1311, Reconstruct Loss = 0.0000, Cls Loss = 0.0442, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0529, Reg Loss = 6.9143, Reconstruct Loss = 0.0005, Cls Loss = 0.0517, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0511, Reg Loss = 6.8911, Reconstruct Loss = 0.0005, Cls Loss = 0.0499, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0516, Reg Loss = 6.9861, Reconstruct Loss = 0.0008, Cls Loss = 0.0501, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0512, Reg Loss = 6.9627, Reconstruct Loss = 0.0006, Cls Loss = 0.0499, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0512, Reg Loss = 6.9317, Reconstruct Loss = 0.0006, Cls Loss = 0.0499, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0515, Reg Loss = 6.9200, Reconstruct Loss = 0.0006, Cls Loss = 0.0501, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0513, Reg Loss = 6.9168, Reconstruct Loss = 0.0006, C

100%|██████████| 79/79 [00:04<00:00, 17.62it/s]


Epoch [36/100], Validation Loss: 5.4083, Validation Accuracy: 48.71%



Iteration 0: Loss = 0.0768, Reg Loss = 6.8937, Reconstruct Loss = 0.0000, Cls Loss = 0.0761, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0518, Reg Loss = 7.0444, Reconstruct Loss = 0.0009, Cls Loss = 0.0502, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0493, Reg Loss = 6.9950, Reconstruct Loss = 0.0009, Cls Loss = 0.0477, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0505, Reg Loss = 7.0500, Reconstruct Loss = 0.0011, Cls Loss = 0.0487, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0508, Reg Loss = 6.9573, Reconstruct Loss = 0.0010, Cls Loss = 0.0491, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0499, Reg Loss = 6.9616, Reconstruct Loss = 0.0009, Cls Loss = 0.0484, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0513, Reg Loss = 6.9889, Reconstruct Loss = 0.0009, Cls Loss = 0.0497, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0515, Reg Loss = 6.9789, Reconstruct Loss = 0.0008, C

100%|██████████| 79/79 [00:04<00:00, 16.60it/s]


Epoch [37/100], Validation Loss: 5.3205, Validation Accuracy: 47.58%



Iteration 0: Loss = 0.0434, Reg Loss = 6.9331, Reconstruct Loss = 0.0000, Cls Loss = 0.0427, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0532, Reg Loss = 7.0193, Reconstruct Loss = 0.0010, Cls Loss = 0.0515, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0510, Reg Loss = 7.0319, Reconstruct Loss = 0.0005, Cls Loss = 0.0498, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0516, Reg Loss = 7.0254, Reconstruct Loss = 0.0012, Cls Loss = 0.0497, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0510, Reg Loss = 6.9648, Reconstruct Loss = 0.0012, Cls Loss = 0.0491, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0520, Reg Loss = 6.9059, Reconstruct Loss = 0.0011, Cls Loss = 0.0503, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0520, Reg Loss = 6.8596, Reconstruct Loss = 0.0009, Cls Loss = 0.0504, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0507, Reg Loss = 6.8263, Reconstruct Loss = 0.0009, C

100%|██████████| 79/79 [00:04<00:00, 17.18it/s]


Epoch [38/100], Validation Loss: 4.4425, Validation Accuracy: 50.45%



Iteration 0: Loss = 0.0545, Reg Loss = 6.9768, Reconstruct Loss = 0.0000, Cls Loss = 0.0538, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0500, Reg Loss = 6.9523, Reconstruct Loss = 0.0007, Cls Loss = 0.0486, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0499, Reg Loss = 6.7888, Reconstruct Loss = 0.0009, Cls Loss = 0.0483, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0524, Reg Loss = 6.7634, Reconstruct Loss = 0.0010, Cls Loss = 0.0507, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0510, Reg Loss = 6.6976, Reconstruct Loss = 0.0007, Cls Loss = 0.0496, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0504, Reg Loss = 6.6790, Reconstruct Loss = 0.0006, Cls Loss = 0.0492, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0503, Reg Loss = 6.7072, Reconstruct Loss = 0.0007, Cls Loss = 0.0489, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0494, Reg Loss = 6.6942, Reconstruct Loss = 0.0006, C

100%|██████████| 79/79 [00:04<00:00, 17.68it/s]


Epoch [39/100], Validation Loss: 4.7491, Validation Accuracy: 51.51%



Iteration 0: Loss = 0.0339, Reg Loss = 6.6630, Reconstruct Loss = 0.0000, Cls Loss = 0.0333, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0529, Reg Loss = 6.7802, Reconstruct Loss = 0.0003, Cls Loss = 0.0519, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0536, Reg Loss = 6.9490, Reconstruct Loss = 0.0003, Cls Loss = 0.0525, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0535, Reg Loss = 7.0101, Reconstruct Loss = 0.0007, Cls Loss = 0.0521, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0513, Reg Loss = 6.9581, Reconstruct Loss = 0.0005, Cls Loss = 0.0500, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0506, Reg Loss = 6.9228, Reconstruct Loss = 0.0004, Cls Loss = 0.0495, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0510, Reg Loss = 6.9496, Reconstruct Loss = 0.0005, Cls Loss = 0.0498, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0510, Reg Loss = 6.9769, Reconstruct Loss = 0.0007, C

100%|██████████| 79/79 [00:03<00:00, 22.64it/s]


Epoch [40/100], Validation Loss: 4.3720, Validation Accuracy: 52.55%



Iteration 0: Loss = 0.0588, Reg Loss = 6.8600, Reconstruct Loss = 0.0000, Cls Loss = 0.0581, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0528, Reg Loss = 7.0657, Reconstruct Loss = 0.0000, Cls Loss = 0.0521, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0523, Reg Loss = 7.1025, Reconstruct Loss = 0.0010, Cls Loss = 0.0506, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0503, Reg Loss = 7.0865, Reconstruct Loss = 0.0009, Cls Loss = 0.0487, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0501, Reg Loss = 7.1581, Reconstruct Loss = 0.0009, Cls Loss = 0.0485, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0500, Reg Loss = 7.1570, Reconstruct Loss = 0.0008, Cls Loss = 0.0485, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0494, Reg Loss = 7.2058, Reconstruct Loss = 0.0009, Cls Loss = 0.0478, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0488, Reg Loss = 7.2745, Reconstruct Loss = 0.0009, C

100%|██████████| 79/79 [00:04<00:00, 16.91it/s]


Epoch [41/100], Validation Loss: 8.8003, Validation Accuracy: 27.07%



Iteration 0: Loss = 0.0404, Reg Loss = 7.5968, Reconstruct Loss = 0.0000, Cls Loss = 0.0397, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0460, Reg Loss = 7.6642, Reconstruct Loss = 0.0000, Cls Loss = 0.0452, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0485, Reg Loss = 7.5805, Reconstruct Loss = 0.0004, Cls Loss = 0.0473, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0491, Reg Loss = 7.5945, Reconstruct Loss = 0.0006, Cls Loss = 0.0478, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0496, Reg Loss = 7.5646, Reconstruct Loss = 0.0006, Cls Loss = 0.0483, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0484, Reg Loss = 7.5381, Reconstruct Loss = 0.0008, Cls Loss = 0.0468, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0488, Reg Loss = 7.5172, Reconstruct Loss = 0.0007, Cls Loss = 0.0473, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0484, Reg Loss = 7.4959, Reconstruct Loss = 0.0007, C

100%|██████████| 79/79 [00:04<00:00, 17.11it/s]


Epoch [42/100], Validation Loss: 4.2825, Validation Accuracy: 50.31%



Iteration 0: Loss = 0.0537, Reg Loss = 6.9341, Reconstruct Loss = 0.0000, Cls Loss = 0.0530, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0511, Reg Loss = 7.7240, Reconstruct Loss = 0.0013, Cls Loss = 0.0491, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0473, Reg Loss = 7.6458, Reconstruct Loss = 0.0010, Cls Loss = 0.0455, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0474, Reg Loss = 7.6001, Reconstruct Loss = 0.0012, Cls Loss = 0.0454, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0479, Reg Loss = 7.5973, Reconstruct Loss = 0.0011, Cls Loss = 0.0460, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0473, Reg Loss = 7.6225, Reconstruct Loss = 0.0010, Cls Loss = 0.0456, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0475, Reg Loss = 7.6220, Reconstruct Loss = 0.0009, Cls Loss = 0.0458, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0476, Reg Loss = 7.5988, Reconstruct Loss = 0.0009, C

100%|██████████| 79/79 [00:04<00:00, 17.01it/s]


Epoch [43/100], Validation Loss: 8.1827, Validation Accuracy: 33.90%



Iteration 0: Loss = 0.0979, Reg Loss = 7.1516, Reconstruct Loss = 0.0000, Cls Loss = 0.0972, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0510, Reg Loss = 7.4469, Reconstruct Loss = 0.0009, Cls Loss = 0.0493, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0483, Reg Loss = 7.5388, Reconstruct Loss = 0.0008, Cls Loss = 0.0467, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0458, Reg Loss = 7.5324, Reconstruct Loss = 0.0006, Cls Loss = 0.0444, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0446, Reg Loss = 7.5978, Reconstruct Loss = 0.0005, Cls Loss = 0.0433, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0440, Reg Loss = 7.6073, Reconstruct Loss = 0.0005, Cls Loss = 0.0427, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0442, Reg Loss = 7.6022, Reconstruct Loss = 0.0006, Cls Loss = 0.0428, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0443, Reg Loss = 7.6192, Reconstruct Loss = 0.0006, C

100%|██████████| 79/79 [00:04<00:00, 16.52it/s]


Epoch [44/100], Validation Loss: 7.4427, Validation Accuracy: 39.00%



Iteration 0: Loss = 0.0850, Reg Loss = 7.7977, Reconstruct Loss = 0.0000, Cls Loss = 0.0842, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0480, Reg Loss = 7.4093, Reconstruct Loss = 0.0012, Cls Loss = 0.0461, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0473, Reg Loss = 7.7878, Reconstruct Loss = 0.0013, Cls Loss = 0.0453, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0482, Reg Loss = 7.7778, Reconstruct Loss = 0.0010, Cls Loss = 0.0464, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0482, Reg Loss = 7.7251, Reconstruct Loss = 0.0011, Cls Loss = 0.0464, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0472, Reg Loss = 7.7597, Reconstruct Loss = 0.0009, Cls Loss = 0.0455, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0470, Reg Loss = 7.7247, Reconstruct Loss = 0.0008, Cls Loss = 0.0454, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0464, Reg Loss = 7.6941, Reconstruct Loss = 0.0008, C

100%|██████████| 79/79 [00:04<00:00, 17.06it/s]


Epoch [45/100], Validation Loss: 6.6243, Validation Accuracy: 39.35%



Iteration 0: Loss = 0.0832, Reg Loss = 7.5417, Reconstruct Loss = 0.0000, Cls Loss = 0.0824, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0443, Reg Loss = 7.6435, Reconstruct Loss = 0.0022, Cls Loss = 0.0414, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0443, Reg Loss = 7.5770, Reconstruct Loss = 0.0018, Cls Loss = 0.0418, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0428, Reg Loss = 7.5631, Reconstruct Loss = 0.0012, Cls Loss = 0.0409, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0432, Reg Loss = 7.5762, Reconstruct Loss = 0.0011, Cls Loss = 0.0413, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0439, Reg Loss = 7.6002, Reconstruct Loss = 0.0012, Cls Loss = 0.0419, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0444, Reg Loss = 7.6050, Reconstruct Loss = 0.0010, Cls Loss = 0.0426, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0444, Reg Loss = 7.5879, Reconstruct Loss = 0.0009, C

100%|██████████| 79/79 [00:04<00:00, 17.07it/s]


Epoch [46/100], Validation Loss: 5.6026, Validation Accuracy: 45.91%



Iteration 0: Loss = 0.0483, Reg Loss = 7.2428, Reconstruct Loss = 0.0000, Cls Loss = 0.0476, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0476, Reg Loss = 7.4884, Reconstruct Loss = 0.0012, Cls Loss = 0.0457, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0454, Reg Loss = 7.4986, Reconstruct Loss = 0.0015, Cls Loss = 0.0431, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0444, Reg Loss = 7.4976, Reconstruct Loss = 0.0015, Cls Loss = 0.0422, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0458, Reg Loss = 7.4867, Reconstruct Loss = 0.0013, Cls Loss = 0.0438, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0453, Reg Loss = 7.4903, Reconstruct Loss = 0.0013, Cls Loss = 0.0433, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0449, Reg Loss = 7.4802, Reconstruct Loss = 0.0012, Cls Loss = 0.0430, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0448, Reg Loss = 7.4781, Reconstruct Loss = 0.0012, C

100%|██████████| 79/79 [00:04<00:00, 17.25it/s]


Epoch [47/100], Validation Loss: 4.2002, Validation Accuracy: 54.84%



Iteration 0: Loss = 0.0349, Reg Loss = 7.2957, Reconstruct Loss = 0.0000, Cls Loss = 0.0342, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0438, Reg Loss = 7.4887, Reconstruct Loss = 0.0009, Cls Loss = 0.0422, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0433, Reg Loss = 7.4727, Reconstruct Loss = 0.0006, Cls Loss = 0.0419, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0426, Reg Loss = 7.4944, Reconstruct Loss = 0.0005, Cls Loss = 0.0413, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0421, Reg Loss = 7.4748, Reconstruct Loss = 0.0004, Cls Loss = 0.0410, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0428, Reg Loss = 7.4730, Reconstruct Loss = 0.0004, Cls Loss = 0.0416, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0429, Reg Loss = 7.5151, Reconstruct Loss = 0.0005, Cls Loss = 0.0417, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0433, Reg Loss = 7.4938, Reconstruct Loss = 0.0005, C

100%|██████████| 79/79 [00:04<00:00, 16.93it/s]


Epoch [48/100], Validation Loss: 3.9025, Validation Accuracy: 55.62%



Iteration 0: Loss = 0.0347, Reg Loss = 7.6235, Reconstruct Loss = 0.0000, Cls Loss = 0.0339, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0465, Reg Loss = 7.8132, Reconstruct Loss = 0.0013, Cls Loss = 0.0444, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0443, Reg Loss = 7.6910, Reconstruct Loss = 0.0009, Cls Loss = 0.0426, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0439, Reg Loss = 7.7060, Reconstruct Loss = 0.0008, Cls Loss = 0.0422, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0443, Reg Loss = 7.6737, Reconstruct Loss = 0.0010, Cls Loss = 0.0425, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0442, Reg Loss = 7.6009, Reconstruct Loss = 0.0009, Cls Loss = 0.0425, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0443, Reg Loss = 7.5787, Reconstruct Loss = 0.0008, Cls Loss = 0.0427, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0438, Reg Loss = 7.5631, Reconstruct Loss = 0.0007, C

100%|██████████| 79/79 [00:03<00:00, 22.67it/s]


Epoch [49/100], Validation Loss: 3.9097, Validation Accuracy: 53.03%



Iteration 0: Loss = 0.0409, Reg Loss = 7.6020, Reconstruct Loss = 0.0000, Cls Loss = 0.0401, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0463, Reg Loss = 7.5722, Reconstruct Loss = 0.0009, Cls Loss = 0.0447, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0458, Reg Loss = 7.5181, Reconstruct Loss = 0.0006, Cls Loss = 0.0445, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0446, Reg Loss = 7.4967, Reconstruct Loss = 0.0006, Cls Loss = 0.0432, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0449, Reg Loss = 7.5006, Reconstruct Loss = 0.0007, Cls Loss = 0.0435, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0449, Reg Loss = 7.4340, Reconstruct Loss = 0.0006, Cls Loss = 0.0436, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0447, Reg Loss = 7.4311, Reconstruct Loss = 0.0006, Cls Loss = 0.0434, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0444, Reg Loss = 7.4331, Reconstruct Loss = 0.0006, C

100%|██████████| 79/79 [00:03<00:00, 22.31it/s]


Epoch [50/100], Validation Loss: 3.7370, Validation Accuracy: 55.83%



Iteration 0: Loss = 0.1087, Reg Loss = 7.3290, Reconstruct Loss = 0.0000, Cls Loss = 0.1080, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0448, Reg Loss = 7.6749, Reconstruct Loss = 0.0006, Cls Loss = 0.0434, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0433, Reg Loss = 7.7169, Reconstruct Loss = 0.0003, Cls Loss = 0.0422, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0431, Reg Loss = 7.7408, Reconstruct Loss = 0.0004, Cls Loss = 0.0419, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0433, Reg Loss = 7.7400, Reconstruct Loss = 0.0005, Cls Loss = 0.0420, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0433, Reg Loss = 7.6801, Reconstruct Loss = 0.0005, Cls Loss = 0.0420, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0429, Reg Loss = 7.6900, Reconstruct Loss = 0.0004, Cls Loss = 0.0417, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0427, Reg Loss = 7.6907, Reconstruct Loss = 0.0004, C

100%|██████████| 79/79 [00:03<00:00, 22.80it/s]


Epoch [51/100], Validation Loss: 4.0461, Validation Accuracy: 54.66%



Iteration 0: Loss = 0.0476, Reg Loss = 8.1632, Reconstruct Loss = 0.0000, Cls Loss = 0.0468, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0416, Reg Loss = 7.8092, Reconstruct Loss = 0.0000, Cls Loss = 0.0408, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0419, Reg Loss = 7.8754, Reconstruct Loss = 0.0005, Cls Loss = 0.0406, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0402, Reg Loss = 7.8200, Reconstruct Loss = 0.0007, Cls Loss = 0.0386, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0408, Reg Loss = 7.7718, Reconstruct Loss = 0.0006, Cls Loss = 0.0394, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0417, Reg Loss = 7.6992, Reconstruct Loss = 0.0005, Cls Loss = 0.0404, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0428, Reg Loss = 7.6791, Reconstruct Loss = 0.0006, Cls Loss = 0.0415, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0423, Reg Loss = 7.7174, Reconstruct Loss = 0.0006, C

100%|██████████| 79/79 [00:03<00:00, 22.34it/s]


Epoch [52/100], Validation Loss: 5.9838, Validation Accuracy: 44.54%



Iteration 0: Loss = 0.0578, Reg Loss = 7.8111, Reconstruct Loss = 0.0000, Cls Loss = 0.0570, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0449, Reg Loss = 7.9147, Reconstruct Loss = 0.0017, Cls Loss = 0.0424, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0427, Reg Loss = 7.7237, Reconstruct Loss = 0.0015, Cls Loss = 0.0404, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0418, Reg Loss = 7.6897, Reconstruct Loss = 0.0012, Cls Loss = 0.0398, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0418, Reg Loss = 7.5969, Reconstruct Loss = 0.0010, Cls Loss = 0.0400, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0419, Reg Loss = 7.5703, Reconstruct Loss = 0.0009, Cls Loss = 0.0402, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0412, Reg Loss = 7.5484, Reconstruct Loss = 0.0008, Cls Loss = 0.0396, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0405, Reg Loss = 7.5114, Reconstruct Loss = 0.0008, C

100%|██████████| 79/79 [00:03<00:00, 22.86it/s]


Epoch [53/100], Validation Loss: 4.0742, Validation Accuracy: 52.82%



Iteration 0: Loss = 0.0240, Reg Loss = 7.3913, Reconstruct Loss = 0.0000, Cls Loss = 0.0233, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0406, Reg Loss = 7.5497, Reconstruct Loss = 0.0010, Cls Loss = 0.0388, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0422, Reg Loss = 7.5997, Reconstruct Loss = 0.0010, Cls Loss = 0.0404, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0436, Reg Loss = 7.4901, Reconstruct Loss = 0.0011, Cls Loss = 0.0418, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0436, Reg Loss = 7.3908, Reconstruct Loss = 0.0009, Cls Loss = 0.0419, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0427, Reg Loss = 7.3886, Reconstruct Loss = 0.0008, Cls Loss = 0.0412, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0418, Reg Loss = 7.4439, Reconstruct Loss = 0.0006, Cls Loss = 0.0404, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0422, Reg Loss = 7.4687, Reconstruct Loss = 0.0007, C

100%|██████████| 79/79 [00:03<00:00, 22.57it/s]


Epoch [54/100], Validation Loss: 2.6048, Validation Accuracy: 64.61%



Iteration 0: Loss = 0.0249, Reg Loss = 7.0239, Reconstruct Loss = 0.0000, Cls Loss = 0.0242, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0416, Reg Loss = 7.3091, Reconstruct Loss = 0.0003, Cls Loss = 0.0406, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0410, Reg Loss = 7.1957, Reconstruct Loss = 0.0005, Cls Loss = 0.0398, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0433, Reg Loss = 7.2975, Reconstruct Loss = 0.0004, Cls Loss = 0.0421, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0435, Reg Loss = 7.3458, Reconstruct Loss = 0.0005, Cls Loss = 0.0423, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0442, Reg Loss = 7.3226, Reconstruct Loss = 0.0005, Cls Loss = 0.0430, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0437, Reg Loss = 7.3523, Reconstruct Loss = 0.0004, Cls Loss = 0.0425, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0434, Reg Loss = 7.3667, Reconstruct Loss = 0.0004, C

100%|██████████| 79/79 [00:03<00:00, 22.89it/s]


Epoch [55/100], Validation Loss: 2.4965, Validation Accuracy: 64.02%



Iteration 0: Loss = 0.0160, Reg Loss = 7.3915, Reconstruct Loss = 0.0000, Cls Loss = 0.0152, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0373, Reg Loss = 7.5787, Reconstruct Loss = 0.0008, Cls Loss = 0.0358, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0371, Reg Loss = 7.5745, Reconstruct Loss = 0.0009, Cls Loss = 0.0354, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0374, Reg Loss = 7.5977, Reconstruct Loss = 0.0006, Cls Loss = 0.0361, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0389, Reg Loss = 7.6178, Reconstruct Loss = 0.0007, Cls Loss = 0.0374, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0395, Reg Loss = 7.5742, Reconstruct Loss = 0.0006, Cls Loss = 0.0382, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0401, Reg Loss = 7.6140, Reconstruct Loss = 0.0006, Cls Loss = 0.0388, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0404, Reg Loss = 7.6596, Reconstruct Loss = 0.0005, C

100%|██████████| 79/79 [00:03<00:00, 22.58it/s]


Epoch [56/100], Validation Loss: 3.1450, Validation Accuracy: 61.02%



Iteration 0: Loss = 0.0336, Reg Loss = 7.4018, Reconstruct Loss = 0.0000, Cls Loss = 0.0329, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0388, Reg Loss = 7.6513, Reconstruct Loss = 0.0003, Cls Loss = 0.0377, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0412, Reg Loss = 7.8050, Reconstruct Loss = 0.0003, Cls Loss = 0.0401, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0419, Reg Loss = 7.9154, Reconstruct Loss = 0.0004, Cls Loss = 0.0407, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0418, Reg Loss = 7.9834, Reconstruct Loss = 0.0004, Cls Loss = 0.0406, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0419, Reg Loss = 7.9639, Reconstruct Loss = 0.0005, Cls Loss = 0.0406, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0426, Reg Loss = 7.9900, Reconstruct Loss = 0.0007, Cls Loss = 0.0411, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0430, Reg Loss = 8.0147, Reconstruct Loss = 0.0009, C

100%|██████████| 79/79 [00:03<00:00, 22.58it/s]


Epoch [57/100], Validation Loss: 3.0517, Validation Accuracy: 60.96%



Iteration 0: Loss = 0.0587, Reg Loss = 8.2152, Reconstruct Loss = 0.0000, Cls Loss = 0.0579, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0396, Reg Loss = 7.9257, Reconstruct Loss = 0.0004, Cls Loss = 0.0384, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0406, Reg Loss = 8.0729, Reconstruct Loss = 0.0007, Cls Loss = 0.0391, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0413, Reg Loss = 8.0156, Reconstruct Loss = 0.0006, Cls Loss = 0.0400, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0409, Reg Loss = 8.0421, Reconstruct Loss = 0.0004, Cls Loss = 0.0396, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0411, Reg Loss = 8.0027, Reconstruct Loss = 0.0003, Cls Loss = 0.0400, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0412, Reg Loss = 8.0099, Reconstruct Loss = 0.0004, Cls Loss = 0.0400, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0416, Reg Loss = 7.9695, Reconstruct Loss = 0.0005, C

100%|██████████| 79/79 [00:03<00:00, 22.50it/s]


Epoch [58/100], Validation Loss: 2.3897, Validation Accuracy: 66.32%



Iteration 0: Loss = 0.0726, Reg Loss = 7.9112, Reconstruct Loss = 0.0000, Cls Loss = 0.0719, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0390, Reg Loss = 7.9914, Reconstruct Loss = 0.0005, Cls Loss = 0.0377, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0385, Reg Loss = 7.8975, Reconstruct Loss = 0.0006, Cls Loss = 0.0372, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0393, Reg Loss = 7.9198, Reconstruct Loss = 0.0006, Cls Loss = 0.0379, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0402, Reg Loss = 7.9785, Reconstruct Loss = 0.0006, Cls Loss = 0.0388, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0401, Reg Loss = 8.0671, Reconstruct Loss = 0.0007, Cls Loss = 0.0385, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0401, Reg Loss = 8.0946, Reconstruct Loss = 0.0007, Cls Loss = 0.0385, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0401, Reg Loss = 8.0552, Reconstruct Loss = 0.0007, C

100%|██████████| 79/79 [00:03<00:00, 22.60it/s]


Epoch [59/100], Validation Loss: 3.7681, Validation Accuracy: 56.25%



Iteration 0: Loss = 0.0776, Reg Loss = 7.7466, Reconstruct Loss = 0.0000, Cls Loss = 0.0768, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0408, Reg Loss = 7.8770, Reconstruct Loss = 0.0005, Cls Loss = 0.0395, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0410, Reg Loss = 7.9653, Reconstruct Loss = 0.0007, Cls Loss = 0.0395, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0413, Reg Loss = 8.0926, Reconstruct Loss = 0.0007, Cls Loss = 0.0398, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0418, Reg Loss = 8.1157, Reconstruct Loss = 0.0008, Cls Loss = 0.0401, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0423, Reg Loss = 8.1161, Reconstruct Loss = 0.0008, Cls Loss = 0.0407, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0419, Reg Loss = 8.1520, Reconstruct Loss = 0.0008, Cls Loss = 0.0403, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0423, Reg Loss = 8.1588, Reconstruct Loss = 0.0008, C

100%|██████████| 79/79 [00:03<00:00, 22.40it/s]


Epoch [60/100], Validation Loss: 4.3455, Validation Accuracy: 51.10%



Iteration 0: Loss = 0.0307, Reg Loss = 7.9779, Reconstruct Loss = 0.0000, Cls Loss = 0.0299, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0389, Reg Loss = 8.1700, Reconstruct Loss = 0.0013, Cls Loss = 0.0367, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0403, Reg Loss = 8.2504, Reconstruct Loss = 0.0013, Cls Loss = 0.0381, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0398, Reg Loss = 8.0797, Reconstruct Loss = 0.0011, Cls Loss = 0.0379, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0409, Reg Loss = 8.1097, Reconstruct Loss = 0.0011, Cls Loss = 0.0390, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0403, Reg Loss = 8.1092, Reconstruct Loss = 0.0010, Cls Loss = 0.0385, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0403, Reg Loss = 8.0612, Reconstruct Loss = 0.0010, Cls Loss = 0.0386, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0411, Reg Loss = 8.0472, Reconstruct Loss = 0.0009, C

100%|██████████| 79/79 [00:03<00:00, 22.91it/s]


Epoch [61/100], Validation Loss: 4.8783, Validation Accuracy: 47.78%



Iteration 0: Loss = 0.0507, Reg Loss = 7.6929, Reconstruct Loss = 0.0000, Cls Loss = 0.0499, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0398, Reg Loss = 7.9388, Reconstruct Loss = 0.0007, Cls Loss = 0.0383, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0395, Reg Loss = 7.8650, Reconstruct Loss = 0.0008, Cls Loss = 0.0379, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0406, Reg Loss = 7.8553, Reconstruct Loss = 0.0008, Cls Loss = 0.0390, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0402, Reg Loss = 7.9292, Reconstruct Loss = 0.0008, Cls Loss = 0.0387, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0401, Reg Loss = 7.9876, Reconstruct Loss = 0.0007, Cls Loss = 0.0386, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0404, Reg Loss = 8.0302, Reconstruct Loss = 0.0006, Cls Loss = 0.0390, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0395, Reg Loss = 8.0436, Reconstruct Loss = 0.0007, C

100%|██████████| 79/79 [00:03<00:00, 22.48it/s]


Epoch [62/100], Validation Loss: 1.6019, Validation Accuracy: 75.85%



Checkpoint saved at epoch 61 with accuracy: 75.85%
Iteration 0: Loss = 0.0276, Reg Loss = 8.2992, Reconstruct Loss = 0.0000, Cls Loss = 0.0267, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0360, Reg Loss = 8.0889, Reconstruct Loss = 0.0010, Cls Loss = 0.0342, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0391, Reg Loss = 8.0706, Reconstruct Loss = 0.0010, Cls Loss = 0.0373, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0409, Reg Loss = 8.1324, Reconstruct Loss = 0.0007, Cls Loss = 0.0394, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0398, Reg Loss = 8.0775, Reconstruct Loss = 0.0005, Cls Loss = 0.0385, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0397, Reg Loss = 8.0902, Reconstruct Loss = 0.0005, Cls Loss = 0.0384, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0391, Reg Loss = 8.0442, Reconstruct Loss = 0.0004, Cls Loss = 0.0379, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.03

100%|██████████| 79/79 [00:03<00:00, 22.87it/s]


Epoch [63/100], Validation Loss: 2.3985, Validation Accuracy: 66.44%



Iteration 0: Loss = 0.0217, Reg Loss = 7.6315, Reconstruct Loss = 0.0000, Cls Loss = 0.0209, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0402, Reg Loss = 7.8386, Reconstruct Loss = 0.0009, Cls Loss = 0.0385, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0407, Reg Loss = 7.9191, Reconstruct Loss = 0.0007, Cls Loss = 0.0392, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0394, Reg Loss = 7.8910, Reconstruct Loss = 0.0007, Cls Loss = 0.0379, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0390, Reg Loss = 7.9371, Reconstruct Loss = 0.0007, Cls Loss = 0.0376, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0399, Reg Loss = 7.9893, Reconstruct Loss = 0.0008, Cls Loss = 0.0383, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0397, Reg Loss = 8.0319, Reconstruct Loss = 0.0007, Cls Loss = 0.0382, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0401, Reg Loss = 8.0850, Reconstruct Loss = 0.0008, C

100%|██████████| 79/79 [00:03<00:00, 22.89it/s]


Epoch [64/100], Validation Loss: 3.1550, Validation Accuracy: 62.30%



Iteration 0: Loss = 0.0396, Reg Loss = 8.0955, Reconstruct Loss = 0.0000, Cls Loss = 0.0388, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0399, Reg Loss = 7.9277, Reconstruct Loss = 0.0002, Cls Loss = 0.0389, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0393, Reg Loss = 8.0050, Reconstruct Loss = 0.0005, Cls Loss = 0.0380, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0404, Reg Loss = 7.9770, Reconstruct Loss = 0.0005, Cls Loss = 0.0391, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0402, Reg Loss = 7.9648, Reconstruct Loss = 0.0003, Cls Loss = 0.0390, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0395, Reg Loss = 7.9851, Reconstruct Loss = 0.0004, Cls Loss = 0.0383, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0392, Reg Loss = 7.9828, Reconstruct Loss = 0.0005, Cls Loss = 0.0380, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0392, Reg Loss = 7.9758, Reconstruct Loss = 0.0004, C

100%|██████████| 79/79 [00:04<00:00, 18.40it/s]


Epoch [65/100], Validation Loss: 3.5303, Validation Accuracy: 58.10%



Iteration 0: Loss = 0.0505, Reg Loss = 8.1770, Reconstruct Loss = 0.0000, Cls Loss = 0.0497, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0430, Reg Loss = 8.2216, Reconstruct Loss = 0.0010, Cls Loss = 0.0413, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0411, Reg Loss = 8.3601, Reconstruct Loss = 0.0012, Cls Loss = 0.0391, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0399, Reg Loss = 8.3761, Reconstruct Loss = 0.0008, Cls Loss = 0.0383, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0391, Reg Loss = 8.5294, Reconstruct Loss = 0.0008, Cls Loss = 0.0374, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0393, Reg Loss = 8.5267, Reconstruct Loss = 0.0008, Cls Loss = 0.0377, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0404, Reg Loss = 8.4897, Reconstruct Loss = 0.0007, Cls Loss = 0.0388, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0411, Reg Loss = 8.4551, Reconstruct Loss = 0.0007, C

100%|██████████| 79/79 [00:04<00:00, 16.95it/s]


Epoch [66/100], Validation Loss: 3.5151, Validation Accuracy: 58.73%



Iteration 0: Loss = 0.0291, Reg Loss = 8.1358, Reconstruct Loss = 0.0000, Cls Loss = 0.0283, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0387, Reg Loss = 8.3909, Reconstruct Loss = 0.0018, Cls Loss = 0.0361, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0394, Reg Loss = 8.2783, Reconstruct Loss = 0.0009, Cls Loss = 0.0376, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0377, Reg Loss = 8.1802, Reconstruct Loss = 0.0007, Cls Loss = 0.0362, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0385, Reg Loss = 8.2045, Reconstruct Loss = 0.0007, Cls Loss = 0.0370, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0389, Reg Loss = 8.1796, Reconstruct Loss = 0.0005, Cls Loss = 0.0376, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0398, Reg Loss = 8.2216, Reconstruct Loss = 0.0006, Cls Loss = 0.0384, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0401, Reg Loss = 8.2180, Reconstruct Loss = 0.0006, C

100%|██████████| 79/79 [00:04<00:00, 17.02it/s]


Epoch [67/100], Validation Loss: 2.6897, Validation Accuracy: 66.45%



Iteration 0: Loss = 0.0148, Reg Loss = 8.0525, Reconstruct Loss = 0.0000, Cls Loss = 0.0140, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0369, Reg Loss = 8.3984, Reconstruct Loss = 0.0006, Cls Loss = 0.0355, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0373, Reg Loss = 8.2239, Reconstruct Loss = 0.0003, Cls Loss = 0.0362, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0379, Reg Loss = 8.1752, Reconstruct Loss = 0.0003, Cls Loss = 0.0368, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0377, Reg Loss = 8.1875, Reconstruct Loss = 0.0004, Cls Loss = 0.0365, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0382, Reg Loss = 8.2096, Reconstruct Loss = 0.0003, Cls Loss = 0.0371, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0383, Reg Loss = 8.2506, Reconstruct Loss = 0.0006, Cls Loss = 0.0369, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0383, Reg Loss = 8.2610, Reconstruct Loss = 0.0005, C

100%|██████████| 79/79 [00:04<00:00, 16.54it/s]


Epoch [68/100], Validation Loss: 3.5596, Validation Accuracy: 60.86%



Iteration 0: Loss = 0.0788, Reg Loss = 8.6966, Reconstruct Loss = 0.0000, Cls Loss = 0.0779, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0439, Reg Loss = 8.3891, Reconstruct Loss = 0.0003, Cls Loss = 0.0428, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0391, Reg Loss = 8.5399, Reconstruct Loss = 0.0005, Cls Loss = 0.0377, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0376, Reg Loss = 8.4590, Reconstruct Loss = 0.0006, Cls Loss = 0.0361, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0396, Reg Loss = 8.4278, Reconstruct Loss = 0.0007, Cls Loss = 0.0381, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0405, Reg Loss = 8.4301, Reconstruct Loss = 0.0007, Cls Loss = 0.0390, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0404, Reg Loss = 8.4232, Reconstruct Loss = 0.0007, Cls Loss = 0.0389, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0401, Reg Loss = 8.4660, Reconstruct Loss = 0.0006, C

100%|██████████| 79/79 [00:04<00:00, 17.12it/s]


Epoch [69/100], Validation Loss: 2.2506, Validation Accuracy: 74.89%



Iteration 0: Loss = 0.0548, Reg Loss = 8.2001, Reconstruct Loss = 0.0000, Cls Loss = 0.0540, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0366, Reg Loss = 8.6015, Reconstruct Loss = 0.0002, Cls Loss = 0.0355, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0358, Reg Loss = 8.5208, Reconstruct Loss = 0.0004, Cls Loss = 0.0346, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0357, Reg Loss = 8.3957, Reconstruct Loss = 0.0004, Cls Loss = 0.0345, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0360, Reg Loss = 8.2772, Reconstruct Loss = 0.0006, Cls Loss = 0.0346, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0366, Reg Loss = 8.1563, Reconstruct Loss = 0.0005, Cls Loss = 0.0353, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0373, Reg Loss = 8.1007, Reconstruct Loss = 0.0004, Cls Loss = 0.0360, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0371, Reg Loss = 8.0888, Reconstruct Loss = 0.0005, C

100%|██████████| 79/79 [00:04<00:00, 17.08it/s]


Epoch [70/100], Validation Loss: 2.6387, Validation Accuracy: 64.64%



Iteration 0: Loss = 0.0176, Reg Loss = 7.9768, Reconstruct Loss = 0.0000, Cls Loss = 0.0168, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0414, Reg Loss = 8.0785, Reconstruct Loss = 0.0003, Cls Loss = 0.0404, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0397, Reg Loss = 8.1117, Reconstruct Loss = 0.0003, Cls Loss = 0.0387, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0387, Reg Loss = 8.1015, Reconstruct Loss = 0.0004, Cls Loss = 0.0374, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0385, Reg Loss = 8.0038, Reconstruct Loss = 0.0004, Cls Loss = 0.0373, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0377, Reg Loss = 7.9423, Reconstruct Loss = 0.0005, Cls Loss = 0.0364, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0384, Reg Loss = 7.9505, Reconstruct Loss = 0.0005, Cls Loss = 0.0371, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0382, Reg Loss = 7.9839, Reconstruct Loss = 0.0005, C

100%|██████████| 79/79 [00:04<00:00, 17.07it/s]


Epoch [71/100], Validation Loss: 2.4975, Validation Accuracy: 67.73%



Iteration 0: Loss = 0.0728, Reg Loss = 7.6090, Reconstruct Loss = 0.0000, Cls Loss = 0.0721, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0380, Reg Loss = 7.7443, Reconstruct Loss = 0.0002, Cls Loss = 0.0370, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0384, Reg Loss = 7.9254, Reconstruct Loss = 0.0005, Cls Loss = 0.0371, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0395, Reg Loss = 8.0517, Reconstruct Loss = 0.0005, Cls Loss = 0.0381, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0397, Reg Loss = 8.0527, Reconstruct Loss = 0.0004, Cls Loss = 0.0385, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0395, Reg Loss = 8.0700, Reconstruct Loss = 0.0005, Cls Loss = 0.0382, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0395, Reg Loss = 8.0851, Reconstruct Loss = 0.0006, Cls Loss = 0.0381, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0389, Reg Loss = 8.0617, Reconstruct Loss = 0.0006, C

100%|██████████| 79/79 [00:04<00:00, 16.48it/s]


Epoch [72/100], Validation Loss: 2.3624, Validation Accuracy: 66.60%



Iteration 0: Loss = 0.0365, Reg Loss = 7.8932, Reconstruct Loss = 0.0000, Cls Loss = 0.0357, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0387, Reg Loss = 7.9881, Reconstruct Loss = 0.0003, Cls Loss = 0.0376, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0396, Reg Loss = 8.1418, Reconstruct Loss = 0.0006, Cls Loss = 0.0382, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0405, Reg Loss = 8.1283, Reconstruct Loss = 0.0007, Cls Loss = 0.0390, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0408, Reg Loss = 8.1386, Reconstruct Loss = 0.0007, Cls Loss = 0.0393, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0404, Reg Loss = 8.2047, Reconstruct Loss = 0.0006, Cls Loss = 0.0390, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0400, Reg Loss = 8.1815, Reconstruct Loss = 0.0006, Cls Loss = 0.0386, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0394, Reg Loss = 8.2236, Reconstruct Loss = 0.0005, C

100%|██████████| 79/79 [00:03<00:00, 22.49it/s]


Epoch [73/100], Validation Loss: 3.0514, Validation Accuracy: 61.00%



Iteration 0: Loss = 0.0132, Reg Loss = 8.3629, Reconstruct Loss = 0.0000, Cls Loss = 0.0124, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0328, Reg Loss = 8.2468, Reconstruct Loss = 0.0005, Cls Loss = 0.0315, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0343, Reg Loss = 8.0911, Reconstruct Loss = 0.0005, Cls Loss = 0.0330, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0355, Reg Loss = 8.0885, Reconstruct Loss = 0.0006, Cls Loss = 0.0341, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0351, Reg Loss = 8.0800, Reconstruct Loss = 0.0006, Cls Loss = 0.0336, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0361, Reg Loss = 8.1212, Reconstruct Loss = 0.0005, Cls Loss = 0.0347, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0362, Reg Loss = 8.1571, Reconstruct Loss = 0.0007, Cls Loss = 0.0348, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0364, Reg Loss = 8.1466, Reconstruct Loss = 0.0007, C

100%|██████████| 79/79 [00:03<00:00, 22.83it/s]


Epoch [74/100], Validation Loss: 2.6183, Validation Accuracy: 67.24%



Iteration 0: Loss = 0.0358, Reg Loss = 8.0322, Reconstruct Loss = 0.0000, Cls Loss = 0.0350, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0353, Reg Loss = 8.0215, Reconstruct Loss = 0.0006, Cls Loss = 0.0339, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0364, Reg Loss = 7.9778, Reconstruct Loss = 0.0006, Cls Loss = 0.0350, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0368, Reg Loss = 8.0761, Reconstruct Loss = 0.0005, Cls Loss = 0.0355, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0369, Reg Loss = 8.1211, Reconstruct Loss = 0.0006, Cls Loss = 0.0355, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0366, Reg Loss = 8.1663, Reconstruct Loss = 0.0007, Cls Loss = 0.0351, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0363, Reg Loss = 8.1209, Reconstruct Loss = 0.0007, Cls Loss = 0.0348, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0361, Reg Loss = 8.1156, Reconstruct Loss = 0.0006, C

100%|██████████| 79/79 [00:03<00:00, 22.62it/s]


Epoch [75/100], Validation Loss: 2.5802, Validation Accuracy: 66.52%



Iteration 0: Loss = 0.0239, Reg Loss = 8.2226, Reconstruct Loss = 0.0000, Cls Loss = 0.0231, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0416, Reg Loss = 7.9522, Reconstruct Loss = 0.0006, Cls Loss = 0.0403, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0390, Reg Loss = 7.8598, Reconstruct Loss = 0.0003, Cls Loss = 0.0379, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0369, Reg Loss = 7.8947, Reconstruct Loss = 0.0002, Cls Loss = 0.0359, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0373, Reg Loss = 7.9562, Reconstruct Loss = 0.0004, Cls Loss = 0.0361, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0377, Reg Loss = 8.0167, Reconstruct Loss = 0.0005, Cls Loss = 0.0364, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0377, Reg Loss = 8.0681, Reconstruct Loss = 0.0005, Cls Loss = 0.0364, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0378, Reg Loss = 8.0971, Reconstruct Loss = 0.0005, C

100%|██████████| 79/79 [00:03<00:00, 22.62it/s]


Epoch [76/100], Validation Loss: 2.2934, Validation Accuracy: 69.76%



Iteration 0: Loss = 0.0329, Reg Loss = 7.8755, Reconstruct Loss = 0.0000, Cls Loss = 0.0321, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0429, Reg Loss = 8.1747, Reconstruct Loss = 0.0003, Cls Loss = 0.0418, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0411, Reg Loss = 8.0565, Reconstruct Loss = 0.0002, Cls Loss = 0.0401, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0395, Reg Loss = 8.0835, Reconstruct Loss = 0.0005, Cls Loss = 0.0382, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0381, Reg Loss = 8.1498, Reconstruct Loss = 0.0008, Cls Loss = 0.0365, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0379, Reg Loss = 8.1548, Reconstruct Loss = 0.0007, Cls Loss = 0.0364, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0375, Reg Loss = 8.1867, Reconstruct Loss = 0.0008, Cls Loss = 0.0359, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0378, Reg Loss = 8.2036, Reconstruct Loss = 0.0007, C

100%|██████████| 79/79 [00:03<00:00, 22.87it/s]


Epoch [77/100], Validation Loss: 3.0853, Validation Accuracy: 64.30%



Iteration 0: Loss = 0.0219, Reg Loss = 8.4997, Reconstruct Loss = 0.0000, Cls Loss = 0.0211, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0349, Reg Loss = 8.6246, Reconstruct Loss = 0.0005, Cls Loss = 0.0336, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0342, Reg Loss = 8.6427, Reconstruct Loss = 0.0004, Cls Loss = 0.0329, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0343, Reg Loss = 8.5824, Reconstruct Loss = 0.0004, Cls Loss = 0.0331, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0347, Reg Loss = 8.5473, Reconstruct Loss = 0.0005, Cls Loss = 0.0333, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0351, Reg Loss = 8.5732, Reconstruct Loss = 0.0004, Cls Loss = 0.0338, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0360, Reg Loss = 8.5752, Reconstruct Loss = 0.0005, Cls Loss = 0.0346, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0361, Reg Loss = 8.5651, Reconstruct Loss = 0.0005, C

100%|██████████| 79/79 [00:04<00:00, 17.16it/s]


Epoch [78/100], Validation Loss: 2.8981, Validation Accuracy: 60.80%



Iteration 0: Loss = 0.0214, Reg Loss = 7.8488, Reconstruct Loss = 0.0000, Cls Loss = 0.0206, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0382, Reg Loss = 8.0580, Reconstruct Loss = 0.0006, Cls Loss = 0.0368, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0357, Reg Loss = 8.1253, Reconstruct Loss = 0.0003, Cls Loss = 0.0346, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0381, Reg Loss = 8.1593, Reconstruct Loss = 0.0005, Cls Loss = 0.0368, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0375, Reg Loss = 8.2438, Reconstruct Loss = 0.0004, Cls Loss = 0.0363, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0381, Reg Loss = 8.3101, Reconstruct Loss = 0.0005, Cls Loss = 0.0367, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0383, Reg Loss = 8.3733, Reconstruct Loss = 0.0005, Cls Loss = 0.0370, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0382, Reg Loss = 8.4241, Reconstruct Loss = 0.0005, C

100%|██████████| 79/79 [00:04<00:00, 17.18it/s]


Epoch [79/100], Validation Loss: 2.9903, Validation Accuracy: 64.90%



Iteration 0: Loss = 0.0425, Reg Loss = 7.8963, Reconstruct Loss = 0.0000, Cls Loss = 0.0417, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0397, Reg Loss = 8.1418, Reconstruct Loss = 0.0012, Cls Loss = 0.0377, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0354, Reg Loss = 8.1599, Reconstruct Loss = 0.0009, Cls Loss = 0.0338, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0349, Reg Loss = 8.1799, Reconstruct Loss = 0.0007, Cls Loss = 0.0333, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0348, Reg Loss = 8.1595, Reconstruct Loss = 0.0006, Cls Loss = 0.0334, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0356, Reg Loss = 8.2005, Reconstruct Loss = 0.0007, Cls Loss = 0.0340, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0359, Reg Loss = 8.2742, Reconstruct Loss = 0.0009, Cls Loss = 0.0342, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0362, Reg Loss = 8.3111, Reconstruct Loss = 0.0008, C

100%|██████████| 79/79 [00:03<00:00, 20.89it/s]


Epoch [80/100], Validation Loss: 2.2571, Validation Accuracy: 67.26%



Iteration 0: Loss = 0.0264, Reg Loss = 8.1831, Reconstruct Loss = 0.0000, Cls Loss = 0.0256, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0365, Reg Loss = 8.1731, Reconstruct Loss = 0.0011, Cls Loss = 0.0346, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0351, Reg Loss = 8.1336, Reconstruct Loss = 0.0009, Cls Loss = 0.0334, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0363, Reg Loss = 8.0979, Reconstruct Loss = 0.0008, Cls Loss = 0.0347, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0366, Reg Loss = 8.1315, Reconstruct Loss = 0.0006, Cls Loss = 0.0351, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0363, Reg Loss = 8.2023, Reconstruct Loss = 0.0007, Cls Loss = 0.0348, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0361, Reg Loss = 8.2154, Reconstruct Loss = 0.0007, Cls Loss = 0.0346, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0358, Reg Loss = 8.2021, Reconstruct Loss = 0.0007, C

100%|██████████| 79/79 [00:03<00:00, 23.18it/s]


Epoch [81/100], Validation Loss: 1.8511, Validation Accuracy: 71.82%



Iteration 0: Loss = 0.0407, Reg Loss = 8.4353, Reconstruct Loss = 0.0000, Cls Loss = 0.0398, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0363, Reg Loss = 8.3677, Reconstruct Loss = 0.0003, Cls Loss = 0.0352, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0354, Reg Loss = 8.4658, Reconstruct Loss = 0.0003, Cls Loss = 0.0342, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0352, Reg Loss = 8.3611, Reconstruct Loss = 0.0004, Cls Loss = 0.0340, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0344, Reg Loss = 8.3782, Reconstruct Loss = 0.0005, Cls Loss = 0.0331, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0347, Reg Loss = 8.3759, Reconstruct Loss = 0.0006, Cls Loss = 0.0333, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0353, Reg Loss = 8.3279, Reconstruct Loss = 0.0005, Cls Loss = 0.0340, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0356, Reg Loss = 8.2974, Reconstruct Loss = 0.0005, C

100%|██████████| 79/79 [00:04<00:00, 17.68it/s]


Epoch [82/100], Validation Loss: 1.8605, Validation Accuracy: 72.30%



Iteration 0: Loss = 0.0201, Reg Loss = 8.0001, Reconstruct Loss = 0.0000, Cls Loss = 0.0193, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0414, Reg Loss = 8.3678, Reconstruct Loss = 0.0013, Cls Loss = 0.0393, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0383, Reg Loss = 8.3407, Reconstruct Loss = 0.0010, Cls Loss = 0.0365, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0370, Reg Loss = 8.2768, Reconstruct Loss = 0.0008, Cls Loss = 0.0353, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0368, Reg Loss = 8.2429, Reconstruct Loss = 0.0007, Cls Loss = 0.0352, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0376, Reg Loss = 8.2483, Reconstruct Loss = 0.0009, Cls Loss = 0.0358, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0378, Reg Loss = 8.2608, Reconstruct Loss = 0.0009, Cls Loss = 0.0361, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0376, Reg Loss = 8.2935, Reconstruct Loss = 0.0010, C

100%|██████████| 79/79 [00:03<00:00, 23.13it/s]


Epoch [83/100], Validation Loss: 2.7035, Validation Accuracy: 65.27%



Iteration 0: Loss = 0.0427, Reg Loss = 8.1181, Reconstruct Loss = 0.0000, Cls Loss = 0.0419, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0367, Reg Loss = 8.7111, Reconstruct Loss = 0.0011, Cls Loss = 0.0348, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0383, Reg Loss = 8.5370, Reconstruct Loss = 0.0008, Cls Loss = 0.0366, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0374, Reg Loss = 8.5702, Reconstruct Loss = 0.0006, Cls Loss = 0.0359, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0381, Reg Loss = 8.6198, Reconstruct Loss = 0.0007, Cls Loss = 0.0365, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0370, Reg Loss = 8.5717, Reconstruct Loss = 0.0007, Cls Loss = 0.0355, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0371, Reg Loss = 8.5567, Reconstruct Loss = 0.0006, Cls Loss = 0.0357, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0372, Reg Loss = 8.5315, Reconstruct Loss = 0.0006, C

100%|██████████| 79/79 [00:03<00:00, 22.94it/s]


Epoch [84/100], Validation Loss: 4.4251, Validation Accuracy: 55.61%



Iteration 0: Loss = 0.0186, Reg Loss = 8.0451, Reconstruct Loss = 0.0000, Cls Loss = 0.0178, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0313, Reg Loss = 8.2209, Reconstruct Loss = 0.0003, Cls Loss = 0.0302, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0355, Reg Loss = 8.2900, Reconstruct Loss = 0.0005, Cls Loss = 0.0341, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0338, Reg Loss = 8.3224, Reconstruct Loss = 0.0007, Cls Loss = 0.0323, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0348, Reg Loss = 8.2689, Reconstruct Loss = 0.0008, Cls Loss = 0.0332, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0355, Reg Loss = 8.2655, Reconstruct Loss = 0.0007, Cls Loss = 0.0340, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0357, Reg Loss = 8.2857, Reconstruct Loss = 0.0007, Cls Loss = 0.0342, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0356, Reg Loss = 8.2923, Reconstruct Loss = 0.0007, C

100%|██████████| 79/79 [00:03<00:00, 22.85it/s]


Epoch [85/100], Validation Loss: 2.2844, Validation Accuracy: 67.15%



Iteration 0: Loss = 0.0399, Reg Loss = 8.3812, Reconstruct Loss = 0.0000, Cls Loss = 0.0390, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0380, Reg Loss = 8.3991, Reconstruct Loss = 0.0005, Cls Loss = 0.0367, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0343, Reg Loss = 8.6751, Reconstruct Loss = 0.0002, Cls Loss = 0.0332, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0338, Reg Loss = 8.5890, Reconstruct Loss = 0.0003, Cls Loss = 0.0326, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0347, Reg Loss = 8.5498, Reconstruct Loss = 0.0003, Cls Loss = 0.0335, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0359, Reg Loss = 8.4925, Reconstruct Loss = 0.0003, Cls Loss = 0.0347, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0359, Reg Loss = 8.4743, Reconstruct Loss = 0.0003, Cls Loss = 0.0348, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0355, Reg Loss = 8.4576, Reconstruct Loss = 0.0003, C

100%|██████████| 79/79 [00:03<00:00, 23.04it/s]


Epoch [86/100], Validation Loss: 1.8061, Validation Accuracy: 73.03%



Iteration 0: Loss = 0.0360, Reg Loss = 8.2776, Reconstruct Loss = 0.0000, Cls Loss = 0.0352, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0380, Reg Loss = 8.4505, Reconstruct Loss = 0.0003, Cls Loss = 0.0369, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0370, Reg Loss = 8.5035, Reconstruct Loss = 0.0006, Cls Loss = 0.0356, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0349, Reg Loss = 8.5757, Reconstruct Loss = 0.0005, Cls Loss = 0.0336, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0353, Reg Loss = 8.5325, Reconstruct Loss = 0.0005, Cls Loss = 0.0339, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0365, Reg Loss = 8.5299, Reconstruct Loss = 0.0004, Cls Loss = 0.0353, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0358, Reg Loss = 8.5333, Reconstruct Loss = 0.0004, Cls Loss = 0.0345, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0358, Reg Loss = 8.5350, Reconstruct Loss = 0.0005, C

100%|██████████| 79/79 [00:03<00:00, 21.84it/s]


Epoch [87/100], Validation Loss: 2.1944, Validation Accuracy: 71.59%



Iteration 0: Loss = 0.0711, Reg Loss = 9.3328, Reconstruct Loss = 0.0000, Cls Loss = 0.0702, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0407, Reg Loss = 8.4624, Reconstruct Loss = 0.0006, Cls Loss = 0.0393, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0379, Reg Loss = 8.3653, Reconstruct Loss = 0.0008, Cls Loss = 0.0362, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0366, Reg Loss = 8.2164, Reconstruct Loss = 0.0007, Cls Loss = 0.0351, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0359, Reg Loss = 8.1642, Reconstruct Loss = 0.0007, Cls Loss = 0.0343, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0353, Reg Loss = 8.1934, Reconstruct Loss = 0.0007, Cls Loss = 0.0337, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0349, Reg Loss = 8.1821, Reconstruct Loss = 0.0006, Cls Loss = 0.0334, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0354, Reg Loss = 8.2048, Reconstruct Loss = 0.0006, C

100%|██████████| 79/79 [00:03<00:00, 22.25it/s]


Epoch [88/100], Validation Loss: 1.1910, Validation Accuracy: 78.09%



Checkpoint saved at epoch 87 with accuracy: 78.09%
Iteration 0: Loss = 0.0272, Reg Loss = 8.2344, Reconstruct Loss = 0.0000, Cls Loss = 0.0264, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0373, Reg Loss = 8.2698, Reconstruct Loss = 0.0006, Cls Loss = 0.0359, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0364, Reg Loss = 8.2301, Reconstruct Loss = 0.0006, Cls Loss = 0.0350, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0359, Reg Loss = 8.2712, Reconstruct Loss = 0.0008, Cls Loss = 0.0342, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0360, Reg Loss = 8.2847, Reconstruct Loss = 0.0007, Cls Loss = 0.0344, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0348, Reg Loss = 8.3613, Reconstruct Loss = 0.0006, Cls Loss = 0.0334, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0352, Reg Loss = 8.3742, Reconstruct Loss = 0.0006, Cls Loss = 0.0338, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.03

100%|██████████| 79/79 [00:03<00:00, 22.39it/s]


Epoch [89/100], Validation Loss: 1.6555, Validation Accuracy: 71.43%



Iteration 0: Loss = 0.0564, Reg Loss = 8.4448, Reconstruct Loss = 0.0000, Cls Loss = 0.0555, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0336, Reg Loss = 8.4343, Reconstruct Loss = 0.0011, Cls Loss = 0.0317, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0342, Reg Loss = 8.2206, Reconstruct Loss = 0.0007, Cls Loss = 0.0327, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0351, Reg Loss = 8.2114, Reconstruct Loss = 0.0004, Cls Loss = 0.0339, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0350, Reg Loss = 8.2404, Reconstruct Loss = 0.0004, Cls Loss = 0.0338, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0348, Reg Loss = 8.2903, Reconstruct Loss = 0.0003, Cls Loss = 0.0337, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0352, Reg Loss = 8.2808, Reconstruct Loss = 0.0004, Cls Loss = 0.0339, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0353, Reg Loss = 8.2539, Reconstruct Loss = 0.0004, C

100%|██████████| 79/79 [00:03<00:00, 22.42it/s]


Epoch [90/100], Validation Loss: 1.7367, Validation Accuracy: 72.15%



Iteration 0: Loss = 0.0340, Reg Loss = 8.3685, Reconstruct Loss = 0.0000, Cls Loss = 0.0332, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0325, Reg Loss = 8.2805, Reconstruct Loss = 0.0007, Cls Loss = 0.0310, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0344, Reg Loss = 8.2306, Reconstruct Loss = 0.0009, Cls Loss = 0.0327, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0341, Reg Loss = 8.2195, Reconstruct Loss = 0.0008, Cls Loss = 0.0325, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0339, Reg Loss = 8.2255, Reconstruct Loss = 0.0008, Cls Loss = 0.0323, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0341, Reg Loss = 8.2111, Reconstruct Loss = 0.0007, Cls Loss = 0.0325, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0339, Reg Loss = 8.2403, Reconstruct Loss = 0.0007, Cls Loss = 0.0324, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0346, Reg Loss = 8.2576, Reconstruct Loss = 0.0006, C

100%|██████████| 79/79 [00:03<00:00, 21.87it/s]


Epoch [91/100], Validation Loss: 1.4610, Validation Accuracy: 74.70%



Iteration 0: Loss = 0.0324, Reg Loss = 7.9086, Reconstruct Loss = 0.0000, Cls Loss = 0.0316, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0313, Reg Loss = 8.0405, Reconstruct Loss = 0.0003, Cls Loss = 0.0303, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0329, Reg Loss = 8.1186, Reconstruct Loss = 0.0004, Cls Loss = 0.0317, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0359, Reg Loss = 8.1469, Reconstruct Loss = 0.0004, Cls Loss = 0.0347, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0359, Reg Loss = 8.1111, Reconstruct Loss = 0.0005, Cls Loss = 0.0345, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0366, Reg Loss = 8.0983, Reconstruct Loss = 0.0007, Cls Loss = 0.0351, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0370, Reg Loss = 8.1145, Reconstruct Loss = 0.0006, Cls Loss = 0.0356, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0374, Reg Loss = 8.1561, Reconstruct Loss = 0.0007, C

100%|██████████| 79/79 [00:03<00:00, 22.42it/s]


Epoch [92/100], Validation Loss: 1.4407, Validation Accuracy: 71.64%



Iteration 0: Loss = 0.0237, Reg Loss = 8.0368, Reconstruct Loss = 0.0000, Cls Loss = 0.0229, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0356, Reg Loss = 8.3199, Reconstruct Loss = 0.0006, Cls Loss = 0.0341, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0367, Reg Loss = 8.4846, Reconstruct Loss = 0.0005, Cls Loss = 0.0353, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0354, Reg Loss = 8.4613, Reconstruct Loss = 0.0005, Cls Loss = 0.0340, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0350, Reg Loss = 8.4216, Reconstruct Loss = 0.0004, Cls Loss = 0.0337, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0351, Reg Loss = 8.4232, Reconstruct Loss = 0.0005, Cls Loss = 0.0338, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0353, Reg Loss = 8.3920, Reconstruct Loss = 0.0004, Cls Loss = 0.0341, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0351, Reg Loss = 8.3564, Reconstruct Loss = 0.0005, C

100%|██████████| 79/79 [00:03<00:00, 21.32it/s]


Epoch [93/100], Validation Loss: 1.3423, Validation Accuracy: 74.57%



Iteration 0: Loss = 0.0287, Reg Loss = 8.2368, Reconstruct Loss = 0.0000, Cls Loss = 0.0278, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0353, Reg Loss = 8.5539, Reconstruct Loss = 0.0010, Cls Loss = 0.0334, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0348, Reg Loss = 8.4948, Reconstruct Loss = 0.0006, Cls Loss = 0.0334, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0349, Reg Loss = 8.5015, Reconstruct Loss = 0.0005, Cls Loss = 0.0335, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0342, Reg Loss = 8.5685, Reconstruct Loss = 0.0006, Cls Loss = 0.0327, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0345, Reg Loss = 8.5541, Reconstruct Loss = 0.0005, Cls Loss = 0.0331, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0346, Reg Loss = 8.5404, Reconstruct Loss = 0.0004, Cls Loss = 0.0333, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0342, Reg Loss = 8.5186, Reconstruct Loss = 0.0004, C

100%|██████████| 79/79 [00:03<00:00, 22.92it/s]


Epoch [94/100], Validation Loss: 2.2972, Validation Accuracy: 66.22%



Iteration 0: Loss = 0.0335, Reg Loss = 8.6681, Reconstruct Loss = 0.0000, Cls Loss = 0.0327, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0345, Reg Loss = 8.8179, Reconstruct Loss = 0.0008, Cls Loss = 0.0328, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0349, Reg Loss = 8.7474, Reconstruct Loss = 0.0009, Cls Loss = 0.0332, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0337, Reg Loss = 8.6538, Reconstruct Loss = 0.0008, Cls Loss = 0.0320, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0343, Reg Loss = 8.6349, Reconstruct Loss = 0.0008, Cls Loss = 0.0326, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0339, Reg Loss = 8.6263, Reconstruct Loss = 0.0007, Cls Loss = 0.0323, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0348, Reg Loss = 8.6654, Reconstruct Loss = 0.0007, Cls Loss = 0.0333, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0347, Reg Loss = 8.6583, Reconstruct Loss = 0.0006, C

100%|██████████| 79/79 [00:03<00:00, 22.81it/s]


Epoch [95/100], Validation Loss: 2.9926, Validation Accuracy: 62.50%



Iteration 0: Loss = 0.0275, Reg Loss = 8.1090, Reconstruct Loss = 0.0000, Cls Loss = 0.0267, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0310, Reg Loss = 8.4345, Reconstruct Loss = 0.0012, Cls Loss = 0.0289, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0318, Reg Loss = 8.4278, Reconstruct Loss = 0.0007, Cls Loss = 0.0303, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0342, Reg Loss = 8.4720, Reconstruct Loss = 0.0005, Cls Loss = 0.0328, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0349, Reg Loss = 8.5198, Reconstruct Loss = 0.0005, Cls Loss = 0.0335, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0357, Reg Loss = 8.5223, Reconstruct Loss = 0.0005, Cls Loss = 0.0343, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0353, Reg Loss = 8.5405, Reconstruct Loss = 0.0005, Cls Loss = 0.0340, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0348, Reg Loss = 8.5450, Reconstruct Loss = 0.0005, C

100%|██████████| 79/79 [00:03<00:00, 22.91it/s]


Epoch [96/100], Validation Loss: 1.9618, Validation Accuracy: 69.97%



Iteration 0: Loss = 0.0284, Reg Loss = 8.6445, Reconstruct Loss = 0.0000, Cls Loss = 0.0275, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0322, Reg Loss = 8.6507, Reconstruct Loss = 0.0009, Cls Loss = 0.0304, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0319, Reg Loss = 8.5722, Reconstruct Loss = 0.0006, Cls Loss = 0.0305, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0306, Reg Loss = 8.4670, Reconstruct Loss = 0.0006, Cls Loss = 0.0292, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0319, Reg Loss = 8.4218, Reconstruct Loss = 0.0006, Cls Loss = 0.0304, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0324, Reg Loss = 8.4017, Reconstruct Loss = 0.0007, Cls Loss = 0.0309, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0327, Reg Loss = 8.4134, Reconstruct Loss = 0.0007, Cls Loss = 0.0312, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0330, Reg Loss = 8.4343, Reconstruct Loss = 0.0006, C

100%|██████████| 79/79 [00:03<00:00, 23.20it/s]


Epoch [97/100], Validation Loss: 1.8179, Validation Accuracy: 72.91%



Iteration 0: Loss = 0.0270, Reg Loss = 8.3215, Reconstruct Loss = 0.0000, Cls Loss = 0.0261, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0303, Reg Loss = 8.3519, Reconstruct Loss = 0.0008, Cls Loss = 0.0287, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0301, Reg Loss = 8.4033, Reconstruct Loss = 0.0009, Cls Loss = 0.0284, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0324, Reg Loss = 8.5362, Reconstruct Loss = 0.0009, Cls Loss = 0.0307, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0327, Reg Loss = 8.6312, Reconstruct Loss = 0.0007, Cls Loss = 0.0311, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0328, Reg Loss = 8.5992, Reconstruct Loss = 0.0009, Cls Loss = 0.0310, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0323, Reg Loss = 8.5677, Reconstruct Loss = 0.0008, Cls Loss = 0.0306, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0327, Reg Loss = 8.5283, Reconstruct Loss = 0.0008, C

100%|██████████| 79/79 [00:03<00:00, 22.95it/s]


Epoch [98/100], Validation Loss: 1.9687, Validation Accuracy: 69.06%



Iteration 0: Loss = 0.0208, Reg Loss = 8.2833, Reconstruct Loss = 0.0000, Cls Loss = 0.0200, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0325, Reg Loss = 8.0547, Reconstruct Loss = 0.0002, Cls Loss = 0.0314, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0322, Reg Loss = 8.0238, Reconstruct Loss = 0.0002, Cls Loss = 0.0311, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0337, Reg Loss = 8.0575, Reconstruct Loss = 0.0004, Cls Loss = 0.0325, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0338, Reg Loss = 8.1325, Reconstruct Loss = 0.0005, Cls Loss = 0.0324, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0341, Reg Loss = 8.1749, Reconstruct Loss = 0.0006, Cls Loss = 0.0327, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0343, Reg Loss = 8.1534, Reconstruct Loss = 0.0006, Cls Loss = 0.0329, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0345, Reg Loss = 8.1669, Reconstruct Loss = 0.0006, C

100%|██████████| 79/79 [00:03<00:00, 23.16it/s]


Epoch [99/100], Validation Loss: 2.1769, Validation Accuracy: 67.34%



Iteration 0: Loss = 0.0343, Reg Loss = 8.8190, Reconstruct Loss = 0.0000, Cls Loss = 0.0334, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0310, Reg Loss = 8.4872, Reconstruct Loss = 0.0005, Cls Loss = 0.0296, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0314, Reg Loss = 8.2215, Reconstruct Loss = 0.0004, Cls Loss = 0.0302, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0320, Reg Loss = 8.2054, Reconstruct Loss = 0.0004, Cls Loss = 0.0307, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0319, Reg Loss = 8.2467, Reconstruct Loss = 0.0004, Cls Loss = 0.0307, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0316, Reg Loss = 8.2204, Reconstruct Loss = 0.0004, Cls Loss = 0.0304, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0322, Reg Loss = 8.2044, Reconstruct Loss = 0.0003, Cls Loss = 0.0310, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0328, Reg Loss = 8.2120, Reconstruct Loss = 0.0006, C

100%|██████████| 79/79 [00:03<00:00, 22.89it/s]

Epoch [100/100], Validation Loss: 1.9824, Validation Accuracy: 70.84%








In [39]:
wandb.finish()

0,1
Cls Loss,█▇▇▇▇▅▄▄▅▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss,█▇▆▆▄▃▃▃▂▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▃▁▁▁▁▁▁▁▁▁
Reconstruct Loss,▆█▅▁▂▃▃▆▄▃▃▂▂▂▂▃▃▅▂▂▂▁▃▄▄▁▂▃▂▃▁▂▂▂▂▁▂▂▂▂
Reg Loss,▁▂▃▃▄▄▄▅▄▄▅▅▆▆▆▇▇▆▇▇▇▆▇▇█▇▇▇▇███████████
Training accuracy,▁▂▂▂▂▄▄▅▅▆▆▆▇▇▇▇▇▇▇▇████████████████████
Validation Accuracy,▇▇▇▇▆▅▄▄▅▄▄▄▁▂▃▄▂▄▄▄▄▂▃▄▅▆▆▆▆▆▆▇▆▇▆█▇▇▆▇
Validation Loss,▁▁▁▁▁▂▃▅▅▆▆▅▇▄▅▄▄█▇▇▅▄▃▅▂▃▄▃▃▂▂▃▂▄▂▂▁▁▃▂

0,1
Cls Loss,0.03144
Learning rate,0.001
Loss,0.03282
Reconstruct Loss,0.00056
Reg Loss,8.21204
Training accuracy,0.9904
Validation Accuracy,0.7084
Validation Loss,1.98235


### 7 Testing loop

In [40]:
saved_hypernet_path = args.training.save_model_path + '/cifar10_nerf_best.pth'

In [41]:
saved_hypernet_path

'toy/experiments_densenet/dense_18th_experiment/cifar10_nerf_best.pth'

In [42]:
hyper_model_test = get_hypernetwork(args, number_param)

Hyper model type: resmlp
Using scalar 0.1
num_freqs:  16 <class 'int'>


In [43]:
checkpoint = torch.load(saved_hypernet_path, map_location="cpu")  # or "cuda" if using GPU
hyper_model_test.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [44]:
for hidden_dim in range(12, 49):
    # Create a model for this given dimension
    model_trained = create_model(args.model.type,
                                 layers=args.model.layers,
                                 growth=args.model.growth,
                                 compression=args.model.compression,
                                 bottleneck=args.model.bottleneck,
                                 drop_rate=args.model.drop_rate,
                                 path=args.model.pretrained_path,
                                 hidden_dim=hidden_dim).to(device)
    
    # If EMA is specified, apply it
    if ema:
        print('Applying EMA')
        ema.apply()

    # Sample the merged model
    accumulated_model = sample_merge_model(hyper_model_test, model_trained, args, K=100)

    # Validate the merged model
    val_loss, acc = validate_single(accumulated_model, val_loader, val_criterion, args=args)

    # If EMA is specified, restore the original weights after applying EMA
    if ema:
        ema.restore()  # Restore the original weights after applying 
        
    # Save the model
    save_name = os.path.join(args.training.save_model_path, f"cifar10_{accumulated_model.__class__.__name__}_dim{hidden_dim}_single.pth")
    torch.save(accumulated_model.state_dict(),save_name)

    # Print the results
    print(f"Test using model {args.model}: hidden_dim {hidden_dim}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc*100:.2f}%")
    print('\n')

    # Define the directory and filename structure
    filename = f"cifar10_results_{args.experiment.name}.txt"
    filepath = os.path.join(args.training.save_model_path, filename)

    # Write the results. 'a' is used to append the results; a new file will be created if it doesn't exist.
    with open(filepath, "a") as file:
        file.write(f"Hidden_dim: {hidden_dim}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc*100:.2f}%\n")

Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.44it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 12, Validation Loss: 7.8556, Validation Accuracy: 45.01%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.39it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 13, Validation Loss: 5.3322, Validation Accuracy: 41.51%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.37it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 14, Validation Loss: 7.0388, Validation Accuracy: 36.91%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.84it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 15, Validation Loss: 1.4606, Validation Accuracy: 74.33%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.06it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 16, Validation Loss: 1.5181, Validation Accuracy: 75.58%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.19it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 17, Validation Loss: 3.0324, Validation Accuracy: 64.86%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.97it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 18, Validation Loss: 2.7149, Validation Accuracy: 59.28%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.94it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 19, Validation Loss: 3.0723, Validation Accuracy: 61.82%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.56it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 20, Validation Loss: 5.1727, Validation Accuracy: 47.95%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.23it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 21, Validation Loss: 3.8672, Validation Accuracy: 51.22%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.25it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 22, Validation Loss: 6.5300, Validation Accuracy: 46.18%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.61it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 23, Validation Loss: 3.3907, Validation Accuracy: 64.62%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.39it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 24, Validation Loss: 8.0320, Validation Accuracy: 35.43%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.08it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 25, Validation Loss: 4.3093, Validation Accuracy: 56.20%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.45it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 26, Validation Loss: 4.0563, Validation Accuracy: 55.39%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 21.99it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 27, Validation Loss: 5.6042, Validation Accuracy: 43.38%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 21.65it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 28, Validation Loss: 4.0532, Validation Accuracy: 54.22%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:04<00:00, 17.02it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 29, Validation Loss: 3.2899, Validation Accuracy: 59.85%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.10it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 30, Validation Loss: 4.5406, Validation Accuracy: 48.21%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.37it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 31, Validation Loss: 2.0499, Validation Accuracy: 71.01%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.16it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 32, Validation Loss: 4.7485, Validation Accuracy: 51.27%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.60it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 33, Validation Loss: 3.4569, Validation Accuracy: 58.20%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.85it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 34, Validation Loss: 2.6106, Validation Accuracy: 61.84%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.03it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 35, Validation Loss: 2.9260, Validation Accuracy: 61.35%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.02it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 36, Validation Loss: 3.7886, Validation Accuracy: 54.10%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.65it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 37, Validation Loss: 4.0247, Validation Accuracy: 50.38%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.39it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 38, Validation Loss: 2.8213, Validation Accuracy: 66.64%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.36it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 39, Validation Loss: 2.8950, Validation Accuracy: 64.59%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.41it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 40, Validation Loss: 3.2731, Validation Accuracy: 58.06%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.03it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 41, Validation Loss: 4.0677, Validation Accuracy: 54.41%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.07it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 42, Validation Loss: 5.3012, Validation Accuracy: 43.66%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.10it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 43, Validation Loss: 4.9409, Validation Accuracy: 47.68%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 21.72it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 44, Validation Loss: 5.1130, Validation Accuracy: 44.32%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:04<00:00, 17.57it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 45, Validation Loss: 3.0437, Validation Accuracy: 59.17%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:04<00:00, 17.56it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 46, Validation Loss: 4.6976, Validation Accuracy: 47.66%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:04<00:00, 17.58it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 47, Validation Loss: 3.0835, Validation Accuracy: 58.67%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:04<00:00, 18.08it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 48, Validation Loss: 1.8540, Validation Accuracy: 66.40%


