## Import

In [1]:
import os
import random

In [2]:
import torch
import torch.nn as nn

import wandb

from sklearn.metrics import accuracy_score

In [3]:
from neumeta.models import create_densenet_model as create_model
from neumeta.utils import (
    parse_args, print_omegaconf,
    load_checkpoint, save_checkpoint,
    set_seed,
    get_dataset,
    sample_coordinates, sample_subset, shuffle_coordinates_all,
    get_hypernetwork, get_optimizer,
    sample_weights,
    weighted_regression_loss, validate_single, AverageMeter, EMA,
    sample_merge_model
)

## Functions

### Find max dimension of the model

In [4]:
def find_max_dim(model_cls):
    """Find maximum dimension of the model"""
    # Get the learnable parameters of the model
    checkpoint = model_cls.learnable_parameter 

    # Set the maximum value to the length of the checkpoint
    max_value = len(checkpoint)

    # Iterate over the new model's weight
    for i, (k, tensor) in enumerate(checkpoint.items()):
        # Handle 2D tensors (e.g., weight matrices) 
        if len(tensor.shape) == 4:
            coords = [tensor.shape[0], tensor.shape[1]]
            max_value = max(max_value, max(coords))
        # Handle 1D tensors (e.g., biases)
        elif len(tensor.shape) == 1:
            max_value = max(max_value, tensor.shape[0])
    
    return max_value

### Initialize wandb

In [5]:
def initialize_wandb(config):
    import time
    """
    Initializes Weights and Biases (wandb) with the given configuration.
    
    Args:
        configuration (dict): Configuration parameters for the run.
    """
    # Name the run using current time and configuration name
    run_name = f"{time.strftime('%Y%m%d%H%M%S')}-{config.experiment.name}"
    
    wandb.init(project="dense-inr-trial", name=run_name, config=dict(config), group='cifar10')

### Init model dictionary

In [6]:
def init_model_dict(args, device):
    """
    Initializes a dictionary of models for each dimension in the given range, along with ground truth models for the starting dimension.

    Args:
        args: An object containing the arguments for initializing the models.

    Returns:
        dim_dict: A dictionary containing the models for each dimension, along with their corresponding coordinates, keys, indices, size, and ground truth models.
        gt_model_dict: A dictionary containing the ground truth models for the starting dimension.
    """
    dim_dict = {}
    gt_model_dict = {}
    
    # Create a model for each dimension in dimensions range
    for dim in args.dimensions.range:
        model_cls = create_model(args.model.type,
                                 layers=args.model.layers,
                                 growth=args.model.growth,
                                 compression=args.model.compression,
                                 bottleneck=args.model.bottleneck,
                                 drop_rate=args.model.drop_rate,
                                 hidden_dim=dim,
                                 path=args.model.pretrained_path).to(device)
        # Sample the coordinates, keys, indices, and the size for the model
        coords_tensor, keys_list, indices_list, size_list = sample_coordinates(model_cls)
        # Add the model, coordinates, keys, indices, size, and key mask to the dictionary
        dim_dict[f"{dim}"] = (model_cls, coords_tensor, keys_list, indices_list, size_list, None)

        # Print to makes line better
        print('\n')
        
        # If the dimension is the starting dimension (the dimension of pretrained_model), add the ground truth model to the dictionary
        if dim == args.dimensions.start:
            print(f"Loading model for dim {dim}")
            model_trained = create_model(args.model.type,
                                         layers=args.model.layers,
                                         growth=args.model.growth,
                                         compression=args.model.compression,
                                         bottleneck=args.model.bottleneck,
                                         drop_rate=args.model.drop_rate,
                                         path=args.model.pretrained_path,
                                         smooth=True,
                                         hidden_dim=dim).to(device)
            model_trained.eval()
            gt_model_dict[f'{dim}'] = model_trained

    
    return dim_dict, gt_model_dict

### Training function

In [7]:
# Function to train the model for one epoch
def train_one_epoch(model, train_loader, optimizer, criterion, dim_dict, gt_model_dict, epoch_idx, ema=None, args=None, device='cpu'):
    # Set the model to training mode
    model.train()
    total_loss = 0.0

    # Initialize AverageMeter objects to track the losses
    losses = AverageMeter()
    cls_losses = AverageMeter()
    reg_losses = AverageMeter()
    reconstruct_losses = AverageMeter()

    # Training accuracy
    preds = []
    gt = []

    # Iterate over the training data
    for batch_idx, (x, target) in enumerate(train_loader):
        # Zero the gradients
        optimizer.zero_grad()

        # Preprocess input
        # ------------------------------------------------------------------------------------------------------
        # Move the data to the device
        x, target = x.to(device), target.to(device)
        # Choose a random hidden dimension
        hidden_dim = random.choice(args.dimensions.range)
        # Get the model class, coordinates, keys, indices, size, and key mask for the chosen dimension
        model_cls, coords_tensor, keys_list, indices_list, size_list, key_mask = dim_dict[f"{hidden_dim}"]
        # Sample a subset the input tensor of the coordinates, keys, indices, size, and selected keys
        coords_tensor, keys_list, indices_list, size_list, selected_keys = sample_subset(coords_tensor,
                                                                                         keys_list,
                                                                                         indices_list,
                                                                                         size_list,
                                                                                         key_mask,
                                                                                         ratio=args.ratio)
        # Add noise to the coordinates if specified
        if args.training.coordinate_noise > 0.0:
            coords_tensor = coords_tensor + (torch.rand_like(coords_tensor) - 0.5) * args.training.coordinate_noise


        # Main task of hypernetwork and target network
        # ------------------------------------------------------------------------------------------------------
        # Sample the weights for the target model using hypernetwork
        model_cls, reconstructed_weights = sample_weights(model, model_cls,
                                                          coords_tensor, keys_list, indices_list, size_list, key_mask, selected_keys,
                                                          device=device, NORM=args.dimensions.norm)
        # Forward pass
        predict = model_cls(x)
        
        # Sample test model to see training accuracy

        pred = torch.argmax(predict, dim=-1)

        preds.append(pred)
        gt.append(target)

        # Compute losses
        # ------------------------------------------------------------------------------------------------------
        # Compute classification loss
        cls_loss = criterion(predict, target) 
        # Compute regularization loss
        reg_loss = sum([torch.norm(w, p=2) for w in reconstructed_weights])
        # Compute reconstruction loss if ground truth model is available
        if f"{hidden_dim}" in gt_model_dict:
            gt_model = gt_model_dict[f"{hidden_dim}"]
            gt_selected_weights = [
                w for k, w in gt_model.learnable_parameter.items() if k in selected_keys]

            reconstruct_loss = weighted_regression_loss(
                reconstructed_weights, gt_selected_weights)
        else:
            reconstruct_loss = torch.tensor(0.0)
        # Compute the total loss
        loss = args.hyper_model.loss_weight.ce_weight * cls_loss + args.hyper_model.loss_weight.reg_weight * \
            reg_loss + args.hyper_model.loss_weight.recon_weight * reconstruct_loss


        # Compute gradients and update weights
        # ------------------------------------------------------------------------------------------------------
        # Zero the gradients of the updated weights
        for updated_weight in model_cls.parameters():
            updated_weight.grad = None

        # Compute the gradients of the reconstructed weights
        loss.backward(retain_graph=True)
        torch.autograd.backward(reconstructed_weights, [
                                w.grad for k, w in model_cls.named_parameters() if k in selected_keys])
        
        # Clip the gradients if specified
        if args.training.get('clip_grad', 0.0) > 0:
            torch.nn.utils.clip_grad_value_(
                model.parameters(), args.training.clip_grad)
            
        # Update the weights
        optimizer.step()

        # Update the EMA if specified
        if ema:
            ema.update()  # Update the EMA after each training step
        total_loss += loss.item()

        # Update the AverageMeter objects
        losses.update(loss.item())
        cls_losses.update(cls_loss.item())
        reg_losses.update(reg_loss.item())
        reconstruct_losses.update(reconstruct_loss.item())

        # Log (or plot) losses
        # ------------------------------------------------------------------------------------------------------
        # Log the losses and learning rate to wandb
        if batch_idx % args.experiment.log_interval == 0:
            wandb.log({
                "Loss": losses.avg,
                "Cls Loss": cls_losses.avg,
                "Reg Loss": reg_losses.avg,
                "Reconstruct Loss": reconstruct_losses.avg,
                "Learning rate": optimizer.param_groups[0]['lr']
            }, step=batch_idx + epoch_idx * len(train_loader))
            # Print the losses and learning rate
            print(
                f"Iteration {batch_idx}: Loss = {losses.avg:.4f}, Reg Loss = {reg_losses.avg:.4f}, Reconstruct Loss = {reconstruct_losses.avg:.4f}, Cls Loss = {cls_losses.avg:.4f}, Learning rate = {optimizer.param_groups[0]['lr']:.4e}")
    
    train_acc = accuracy_score(torch.cat(gt).cpu().numpy(), torch.cat(preds).cpu().numpy())

    wandb.log({
        "Training accuracy": train_acc
    })

    # Returns the training loss, structure of network in each dimension, and the original structure of pretrained network
    return losses.avg, dim_dict, gt_model_dict, train_acc

## Main

### 0 Set device to GPU

In [8]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

### 1 Parsing arguments for input

In [9]:
CONFIG_PATH = 'neumeta/config/densenet_inr_train/dense_5th_experiment.yaml'
RATIO = '1.0'
CHECKPOINT_PATH = 'toy/experiments_densenet/dense_5th_experiment/cifar10_nerf_best.pth'

In [10]:
argv_train = ['--config', CONFIG_PATH, '--ratio', RATIO, '--resume_from', CHECKPOINT_PATH]

In [11]:
args = parse_args(argv_train)  # Parse arguments
print_omegaconf(args)  # Print arguments

+--------------------------------------+------------------------------------------------------------------------------------------------------+
|                 Key                  |                                                Value                                                 |
+--------------------------------------+------------------------------------------------------------------------------------------------------+
|           experiment.name            |                                         dense_5th_experiment                                         |
|        experiment.num_epochs         |                                                 100                                                  |
|       experiment.log_interval        |                                                  50                                                  |
|       experiment.eval_interval       |                                                  1                                             

In [12]:
set_seed(args.experiment.seed)

Setting seed... 42 for reproducibility


### 2 Get training and validation dataloader

In [13]:
train_loader, val_loader = get_dataset('cifar10', args.training.batch_size, strong_transform=args.training.get('strong_aug', None))

Using dataset: cifar10 with batch size: 64 and strong transform: None


### 3 Create target model

#### 3.0 Create the model

In [14]:
model = create_model(args.model.type,
                     layers=args.model.layers,
                     growth=args.model.growth,
                     compression=args.model.compression,
                     bottleneck=args.model.bottleneck,
                     drop_rate=args.model.drop_rate,
                     hidden_dim=args.dimensions.start,
                     path=args.model.pretrained_path).to(device)

Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


#### 3.1 Print the structure and shape of the model

In [15]:
model

DenseNet3(
  (conv1): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (block1): DenseBlock(
    (layer): Sequential(
      (0): BottleneckBlock(
        (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv1): Conv2d(24, 48, kernel_size=(1, 1), stride=(1, 1))
        (bn2): Identity()
        (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (dropout): Dropout(p=0.0, inplace=False)
      )
      (1): BottleneckBlock(
        (bn1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv1): Conv2d(36, 48, kernel_size=(1, 1), stride=(1, 1))
        (bn2): Identity()
        (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (dropout): Dropout(p=0.0, inplace=False)
      )
      (2): BottleneckBlock(
        (bn1): Bat

In [16]:
for i, (k, tensor) in enumerate(model.learnable_parameter.items()):
    print(k, tensor.shape)

block3.layer.5.conv1.weight torch.Size([48, 120, 1, 1])
block3.layer.5.conv1.bias torch.Size([48])
block3.layer.5.conv2.weight torch.Size([12, 48, 3, 3])


In [17]:
# Print the maximum dimension of the model
print(f'Maximum DIM: {find_max_dim(model)}')

Maximum DIM: 120


#### 3.2 Validate the accuracy of pretrained model

In [18]:
# Validate the model for the starting dimension (its pretrained form)
val_loss, acc = validate_single(model, val_loader, nn.CrossEntropyLoss(), args=args)
print(f'Initial Permutated model Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc * 100:.2f}%')

100%|██████████| 157/157 [00:03<00:00, 41.68it/s]

Initial Permutated model Validation Loss: 0.3239, Validation Accuracy: 91.93%





In [19]:
checkpoint = model.learnable_parameter
number_param = len(checkpoint)

In [20]:
# Print the keys of the parameters and the number of parameters
print(f"Parameters keys: {model.keys}")
print(f"Number of parameters to be learned: {number_param}")

Parameters keys: ['block3.layer.5.conv1.weight', 'block3.layer.5.conv1.bias', 'block3.layer.5.conv2.weight']
Number of parameters to be learned: 3


### 4 Create hypernetwork

#### 4.0 Create the model

In [21]:
# Get the hypermodel
hyper_model = get_hypernetwork(args, number_param)

Hyper model type: resmlp
Using scalar 0.1
num_freqs:  16 <class 'int'>


#### 4.1 Print model structure

In [22]:
hyper_model

NeRF_ResMLP_Compose(
  (positional_encoding): PositionalEncoding()
  (model): ModuleList(
    (0-2): 3 x NeRF_MLP_Residual_Scaled(
      (initial_layer): Linear(in_features=198, out_features=128, bias=True)
      (residual_blocks): ModuleList(
        (0-2): 3 x Linear(in_features=128, out_features=128, bias=True)
      )
      (scalars): ParameterList(
          (0): Parameter containing: [torch.float32 of size  (cuda:0)]
          (1): Parameter containing: [torch.float32 of size  (cuda:0)]
          (2): Parameter containing: [torch.float32 of size  (cuda:0)]
      )
      (act): ReLU(inplace=True)
      (output_layer): Linear(in_features=128, out_features=9, bias=True)
    )
  )
)

#### 4.2 Initialize EMA to track only a smooth version of the model weight

In [23]:
# Initialize the EMA
ema = EMA(hyper_model, decay=args.hyper_model.ema_decay)

### 5 Get loss function, optimizer and scheduler

In [24]:
criterion, val_criterion, optimizer, scheduler = get_optimizer(args, hyper_model)

In [25]:
print(f'Criterion: {criterion}\nVal_criterion: {val_criterion}\nOptimizer: {optimizer}\nScheduler: {scheduler}')

Criterion: CrossEntropyLoss()
Val_criterion: CrossEntropyLoss()
Optimizer: AdamW (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    initial_lr: 0.001
    lr: 0.001
    maximize: False
    weight_decay: 0.01
)
Scheduler: <torch.optim.lr_scheduler.MultiStepLR object at 0x0000026BF81B5D10>


### 6 Training loop

#### 6.1 Initialize training parameters

In [26]:
# Initialize the starting epoch and best accuracy
start_epoch = 0
best_acc = 0.0

#### 6.2 Directory to save the model

In [27]:
# Create the directory to save the model
os.makedirs(args.training.save_model_path, exist_ok=True)

#### 6.3 Resume training loop

In [28]:
args.resume_from

'toy/experiments_densenet/dense_5th_experiment/cifar10_nerf_best.pth'

In [29]:
args.resume_from = False

In [30]:
if args.resume_from:
        print(f"Resuming from checkpoint: {args.resume_from}")
        checkpoint_info = load_checkpoint(args.resume_from, hyper_model, optimizer, ema)
        start_epoch = checkpoint_info['epoch']
        best_acc = checkpoint_info['best_acc']
        print(f"Resuming from epoch: {start_epoch}, best accuracy: {best_acc*100:.2f}%")
        # Note: If there are more elements to retrieve, do so here.

#### 6.4 Initialize model dictionary for each dimension and shuffle it

In [31]:
# Initialize model dictionary
dim_dict, gt_model_dict = init_model_dict(args, device)

Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/exper

In [32]:
gt_model_dict['48']

DenseNet3(
  (conv1): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (block1): DenseBlock(
    (layer): Sequential(
      (0): BottleneckBlock(
        (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv1): Conv2d(24, 48, kernel_size=(1, 1), stride=(1, 1))
        (bn2): Identity()
        (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (dropout): Dropout(p=0.0, inplace=False)
      )
      (1): BottleneckBlock(
        (bn1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv1): Conv2d(36, 48, kernel_size=(1, 1), stride=(1, 1))
        (bn2): Identity()
        (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (dropout): Dropout(p=0.0, inplace=False)
      )
      (2): BottleneckBlock(
        (bn1): Bat

In [33]:
# Validate the model for the starting dimension (its pretrained form)
val_loss, acc = validate_single(gt_model_dict['48'], val_loader, nn.CrossEntropyLoss(), args=args)
print(f'Initial Permutated model Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc * 100:.2f}%')

100%|██████████| 157/157 [00:03<00:00, 44.16it/s]

Initial Permutated model Validation Loss: 0.3239, Validation Accuracy: 91.94%





In [34]:
dim_dict

{'24': (DenseNet3(
    (conv1): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (block1): DenseBlock(
      (layer): Sequential(
        (0): BottleneckBlock(
          (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv1): Conv2d(24, 48, kernel_size=(1, 1), stride=(1, 1))
          (bn2): Identity()
          (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (dropout): Dropout(p=0.0, inplace=False)
        )
        (1): BottleneckBlock(
          (bn1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv1): Conv2d(36, 48, kernel_size=(1, 1), stride=(1, 1))
          (bn2): Identity()
          (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (dropout): Dropout(p=0.0, inplace=False)
        )


In [35]:
dim_dict = shuffle_coordinates_all(dim_dict)
dim_dict

{'24': (DenseNet3(
    (conv1): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (block1): DenseBlock(
      (layer): Sequential(
        (0): BottleneckBlock(
          (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv1): Conv2d(24, 48, kernel_size=(1, 1), stride=(1, 1))
          (bn2): Identity()
          (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (dropout): Dropout(p=0.0, inplace=False)
        )
        (1): BottleneckBlock(
          (bn1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv1): Conv2d(36, 48, kernel_size=(1, 1), stride=(1, 1))
          (bn2): Identity()
          (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (dropout): Dropout(p=0.0, inplace=False)
        )


#### 6.5 Initialize wandb for plotting

In [36]:
initialize_wandb(args)

[34m[1mwandb[0m: Currently logged in as: [33mefradosuryadi[0m ([33mefradosuryadi-universitas-indonesia[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


#### 6.6 Hypernetwork training loop

In [37]:
args.experiment.num_epochs

100

In [38]:
# Iterate over the epochs
for epoch in range(start_epoch, args.experiment.num_epochs):
    # Train the hypernetwork to generate a model with random dimension for one epoch
    train_loss, dim_dict, gt_model_dict, train_acc = train_one_epoch(hyper_model, train_loader, optimizer, criterion, 
                                                                     dim_dict, gt_model_dict, epoch_idx=epoch, ema=ema, 
                                                                     args=args, device=device)
    # Step the scheduler
    scheduler.step()

    # Print the training loss and learning rate
    print(f"Epoch [{epoch+1}/{args.experiment.num_epochs}], Training Loss: {train_loss:.4f}, Training Accuracy: {train_acc*100:.2f}, Learning Rate: {scheduler.get_last_lr()[0]:.6f}")

    # If it's time to evaluate the model
    if (epoch + 1) % args.experiment.eval_interval == 0:
        # Apply EMA if it is specified
        if ema:
            ema.apply()  # Save the weights of original model created before training_loop
        
        # Sample the merged model (create model of same structure before training loop by using the hypernetwork)
        # And then test the performance of the hypernetwork by seeing how good it is in generating the weights
        model = sample_merge_model(hyper_model, model, args) 
        # Validate the merged model
        val_loss, acc = validate_single(model, val_loader, val_criterion, args=args)

        # If EMA is specified, restore the original weights
        if ema:
            ema.restore()  # Restore the original weights to the weights of the pretrained networks

        # Log the validation loss and accuracy to wandb
        wandb.log({
            "Validation Loss": val_loss,
            "Validation Accuracy": acc
        })
        # Print the validation loss and accuracy
        print(f"Epoch [{epoch+1}/{args.experiment.num_epochs}], Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc*100:.2f}%")
        print('\n\n')

        # Save the checkpoint if the accuracy is better than the previous best
        if acc > best_acc:
            best_acc = acc
            save_checkpoint(f"{args.training.save_model_path}/cifar10_nerf_best.pth",hyper_model,optimizer,ema,epoch,best_acc)
            print(f"Checkpoint saved at epoch {epoch} with accuracy: {best_acc*100:.2f}%")


Iteration 0: Loss = 0.1346, Reg Loss = 2.0720, Reconstruct Loss = 0.0000, Cls Loss = 0.1344, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.2325, Reg Loss = 4.0841, Reconstruct Loss = 0.0026, Cls Loss = 0.2295, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.2371, Reg Loss = 4.7578, Reconstruct Loss = 0.0018, Cls Loss = 0.2348, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.2313, Reg Loss = 5.0503, Reconstruct Loss = 0.0020, Cls Loss = 0.2288, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.2286, Reg Loss = 5.0325, Reconstruct Loss = 0.0024, Cls Loss = 0.2256, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.2264, Reg Loss = 4.9494, Reconstruct Loss = 0.0024, Cls Loss = 0.2235, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.2264, Reg Loss = 5.3269, Reconstruct Loss = 0.0020, Cls Loss = 0.2239, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.2230, Reg Loss = 5.3403, Reconstruct Loss = 0.0018, Cls Loss = 0.2207, Learning rate = 1.0000e-03
Iteration 400: Loss = 0.221

100%|██████████| 157/157 [00:03<00:00, 42.73it/s]


Epoch [1/100], Validation Loss: 0.9773, Validation Accuracy: 71.08%



Checkpoint saved at epoch 0 with accuracy: 71.08%
Iteration 0: Loss = 0.1123, Reg Loss = 4.5584, Reconstruct Loss = 0.0000, Cls Loss = 0.1119, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1883, Reg Loss = 3.9241, Reconstruct Loss = 0.0027, Cls Loss = 0.1852, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1951, Reg Loss = 3.9410, Reconstruct Loss = 0.0024, Cls Loss = 0.1923, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1935, Reg Loss = 4.2423, Reconstruct Loss = 0.0020, Cls Loss = 0.1911, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1945, Reg Loss = 4.6452, Reconstruct Loss = 0.0019, Cls Loss = 0.1921, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1957, Reg Loss = 4.6333, Reconstruct Loss = 0.0018, Cls Loss = 0.1934, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1942, Reg Loss = 4.5823, Reconstruct Loss = 0.0019, Cls Loss = 0.1918, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1955

100%|██████████| 157/157 [00:03<00:00, 45.28it/s]


Epoch [2/100], Validation Loss: 1.0544, Validation Accuracy: 69.28%



Iteration 0: Loss = 0.1347, Reg Loss = 3.7626, Reconstruct Loss = 0.0000, Cls Loss = 0.1344, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1785, Reg Loss = 3.8550, Reconstruct Loss = 0.0013, Cls Loss = 0.1768, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1858, Reg Loss = 3.9788, Reconstruct Loss = 0.0009, Cls Loss = 0.1845, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1856, Reg Loss = 3.9707, Reconstruct Loss = 0.0009, Cls Loss = 0.1843, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1848, Reg Loss = 3.9170, Reconstruct Loss = 0.0014, Cls Loss = 0.1830, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1854, Reg Loss = 3.9589, Reconstruct Loss = 0.0015, Cls Loss = 0.1835, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1852, Reg Loss = 3.9716, Reconstruct Loss = 0.0015, Cls Loss = 0.1834, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1869, Reg Loss = 3.9285, Reconstruct Loss = 0.0014, Cl

100%|██████████| 157/157 [00:03<00:00, 45.01it/s]


Epoch [3/100], Validation Loss: 1.1782, Validation Accuracy: 68.83%



Iteration 0: Loss = 0.1141, Reg Loss = 4.0113, Reconstruct Loss = 0.0000, Cls Loss = 0.1137, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1861, Reg Loss = 4.4321, Reconstruct Loss = 0.0012, Cls Loss = 0.1845, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1864, Reg Loss = 4.8392, Reconstruct Loss = 0.0010, Cls Loss = 0.1849, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1874, Reg Loss = 4.7800, Reconstruct Loss = 0.0013, Cls Loss = 0.1856, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1876, Reg Loss = 4.5936, Reconstruct Loss = 0.0013, Cls Loss = 0.1858, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1871, Reg Loss = 4.5058, Reconstruct Loss = 0.0011, Cls Loss = 0.1855, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1841, Reg Loss = 4.3546, Reconstruct Loss = 0.0012, Cls Loss = 0.1825, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1850, Reg Loss = 4.2875, Reconstruct Loss = 0.0012, Cl

100%|██████████| 157/157 [00:03<00:00, 45.05it/s]


Epoch [4/100], Validation Loss: 1.0235, Validation Accuracy: 70.39%



Iteration 0: Loss = 0.2426, Reg Loss = 3.2431, Reconstruct Loss = 0.0000, Cls Loss = 0.2423, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1776, Reg Loss = 3.7515, Reconstruct Loss = 0.0025, Cls Loss = 0.1747, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1746, Reg Loss = 3.9439, Reconstruct Loss = 0.0020, Cls Loss = 0.1721, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1781, Reg Loss = 4.1747, Reconstruct Loss = 0.0020, Cls Loss = 0.1756, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1772, Reg Loss = 4.3032, Reconstruct Loss = 0.0019, Cls Loss = 0.1749, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1749, Reg Loss = 4.3563, Reconstruct Loss = 0.0015, Cls Loss = 0.1730, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1779, Reg Loss = 4.3706, Reconstruct Loss = 0.0015, Cls Loss = 0.1760, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1802, Reg Loss = 4.3992, Reconstruct Loss = 0.0017, Cl

100%|██████████| 157/157 [00:03<00:00, 45.24it/s]


Epoch [5/100], Validation Loss: 1.1978, Validation Accuracy: 67.76%



Iteration 0: Loss = 0.1634, Reg Loss = 4.5447, Reconstruct Loss = 0.0000, Cls Loss = 0.1630, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1708, Reg Loss = 4.3654, Reconstruct Loss = 0.0021, Cls Loss = 0.1683, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1738, Reg Loss = 4.4826, Reconstruct Loss = 0.0015, Cls Loss = 0.1718, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1735, Reg Loss = 4.5522, Reconstruct Loss = 0.0012, Cls Loss = 0.1719, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1764, Reg Loss = 4.6591, Reconstruct Loss = 0.0014, Cls Loss = 0.1745, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1746, Reg Loss = 4.5841, Reconstruct Loss = 0.0015, Cls Loss = 0.1727, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1728, Reg Loss = 4.5170, Reconstruct Loss = 0.0015, Cls Loss = 0.1709, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1724, Reg Loss = 4.5063, Reconstruct Loss = 0.0015, Cl

100%|██████████| 157/157 [00:03<00:00, 45.23it/s]


Epoch [6/100], Validation Loss: 1.3244, Validation Accuracy: 67.05%



Iteration 0: Loss = 0.1653, Reg Loss = 3.4554, Reconstruct Loss = 0.0000, Cls Loss = 0.1650, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1452, Reg Loss = 3.9786, Reconstruct Loss = 0.0005, Cls Loss = 0.1443, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1431, Reg Loss = 4.0759, Reconstruct Loss = 0.0003, Cls Loss = 0.1425, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1412, Reg Loss = 3.9189, Reconstruct Loss = 0.0006, Cls Loss = 0.1402, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1375, Reg Loss = 3.9634, Reconstruct Loss = 0.0008, Cls Loss = 0.1363, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1380, Reg Loss = 3.9915, Reconstruct Loss = 0.0008, Cls Loss = 0.1369, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1363, Reg Loss = 3.9767, Reconstruct Loss = 0.0010, Cls Loss = 0.1348, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1365, Reg Loss = 3.9664, Reconstruct Loss = 0.0012, Cl

100%|██████████| 157/157 [00:03<00:00, 45.35it/s]


Epoch [7/100], Validation Loss: 1.3772, Validation Accuracy: 62.95%



Iteration 0: Loss = 0.0788, Reg Loss = 4.1204, Reconstruct Loss = 0.0000, Cls Loss = 0.0784, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1244, Reg Loss = 3.9279, Reconstruct Loss = 0.0006, Cls Loss = 0.1234, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1220, Reg Loss = 3.7682, Reconstruct Loss = 0.0012, Cls Loss = 0.1205, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1251, Reg Loss = 3.7658, Reconstruct Loss = 0.0019, Cls Loss = 0.1228, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1258, Reg Loss = 3.7390, Reconstruct Loss = 0.0016, Cls Loss = 0.1238, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1277, Reg Loss = 3.7145, Reconstruct Loss = 0.0016, Cls Loss = 0.1257, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1278, Reg Loss = 3.6933, Reconstruct Loss = 0.0017, Cls Loss = 0.1257, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1277, Reg Loss = 3.6572, Reconstruct Loss = 0.0018, Cl

100%|██████████| 157/157 [00:03<00:00, 45.19it/s]


Epoch [8/100], Validation Loss: 1.2468, Validation Accuracy: 66.64%



Iteration 0: Loss = 0.0791, Reg Loss = 3.5544, Reconstruct Loss = 0.0000, Cls Loss = 0.0787, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1337, Reg Loss = 3.7126, Reconstruct Loss = 0.0025, Cls Loss = 0.1308, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1262, Reg Loss = 3.7842, Reconstruct Loss = 0.0020, Cls Loss = 0.1238, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1209, Reg Loss = 3.7439, Reconstruct Loss = 0.0016, Cls Loss = 0.1189, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1181, Reg Loss = 3.7576, Reconstruct Loss = 0.0015, Cls Loss = 0.1161, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1170, Reg Loss = 3.7289, Reconstruct Loss = 0.0017, Cls Loss = 0.1150, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1165, Reg Loss = 3.7452, Reconstruct Loss = 0.0014, Cls Loss = 0.1148, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1169, Reg Loss = 3.7342, Reconstruct Loss = 0.0017, Cl

100%|██████████| 157/157 [00:03<00:00, 45.11it/s]


Epoch [9/100], Validation Loss: 1.3767, Validation Accuracy: 66.59%



Iteration 0: Loss = 0.1517, Reg Loss = 5.7558, Reconstruct Loss = 0.0257, Cls Loss = 0.1254, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1058, Reg Loss = 4.2168, Reconstruct Loss = 0.0011, Cls Loss = 0.1042, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1060, Reg Loss = 4.2778, Reconstruct Loss = 0.0011, Cls Loss = 0.1044, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1072, Reg Loss = 4.2658, Reconstruct Loss = 0.0010, Cls Loss = 0.1058, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1071, Reg Loss = 4.2491, Reconstruct Loss = 0.0011, Cls Loss = 0.1056, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1075, Reg Loss = 4.2039, Reconstruct Loss = 0.0010, Cls Loss = 0.1061, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1076, Reg Loss = 4.2081, Reconstruct Loss = 0.0015, Cls Loss = 0.1057, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1068, Reg Loss = 4.1913, Reconstruct Loss = 0.0015, Cl

100%|██████████| 157/157 [00:03<00:00, 45.41it/s]


Epoch [10/100], Validation Loss: 2.3030, Validation Accuracy: 53.11%



Iteration 0: Loss = 0.0801, Reg Loss = 4.4213, Reconstruct Loss = 0.0000, Cls Loss = 0.0797, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0942, Reg Loss = 4.6890, Reconstruct Loss = 0.0014, Cls Loss = 0.0923, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0969, Reg Loss = 4.5254, Reconstruct Loss = 0.0007, Cls Loss = 0.0957, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0979, Reg Loss = 4.4757, Reconstruct Loss = 0.0007, Cls Loss = 0.0967, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0968, Reg Loss = 4.4728, Reconstruct Loss = 0.0009, Cls Loss = 0.0954, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0979, Reg Loss = 4.5327, Reconstruct Loss = 0.0013, Cls Loss = 0.0961, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1001, Reg Loss = 4.5075, Reconstruct Loss = 0.0011, Cls Loss = 0.0985, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0996, Reg Loss = 4.5123, Reconstruct Loss = 0.0013, C

100%|██████████| 157/157 [00:03<00:00, 45.29it/s]


Epoch [11/100], Validation Loss: 2.8663, Validation Accuracy: 49.08%



Iteration 0: Loss = 0.1016, Reg Loss = 4.3084, Reconstruct Loss = 0.0000, Cls Loss = 0.1012, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0975, Reg Loss = 4.5359, Reconstruct Loss = 0.0047, Cls Loss = 0.0924, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0905, Reg Loss = 4.5672, Reconstruct Loss = 0.0031, Cls Loss = 0.0870, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0895, Reg Loss = 4.5918, Reconstruct Loss = 0.0025, Cls Loss = 0.0866, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0887, Reg Loss = 4.5751, Reconstruct Loss = 0.0019, Cls Loss = 0.0863, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0881, Reg Loss = 4.5735, Reconstruct Loss = 0.0020, Cls Loss = 0.0856, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0902, Reg Loss = 4.6107, Reconstruct Loss = 0.0019, Cls Loss = 0.0878, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0905, Reg Loss = 4.6422, Reconstruct Loss = 0.0018, C

100%|██████████| 157/157 [00:03<00:00, 45.43it/s]


Epoch [12/100], Validation Loss: 2.4082, Validation Accuracy: 52.22%



Iteration 0: Loss = 0.0467, Reg Loss = 4.3684, Reconstruct Loss = 0.0000, Cls Loss = 0.0463, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0983, Reg Loss = 4.4749, Reconstruct Loss = 0.0010, Cls Loss = 0.0969, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0956, Reg Loss = 4.5928, Reconstruct Loss = 0.0010, Cls Loss = 0.0941, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0963, Reg Loss = 4.6654, Reconstruct Loss = 0.0013, Cls Loss = 0.0945, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0956, Reg Loss = 4.7321, Reconstruct Loss = 0.0015, Cls Loss = 0.0937, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0941, Reg Loss = 4.7219, Reconstruct Loss = 0.0016, Cls Loss = 0.0920, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0962, Reg Loss = 4.7868, Reconstruct Loss = 0.0016, Cls Loss = 0.0941, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0952, Reg Loss = 4.8066, Reconstruct Loss = 0.0016, C

100%|██████████| 157/157 [00:03<00:00, 45.12it/s]


Epoch [13/100], Validation Loss: 2.4902, Validation Accuracy: 49.91%



Iteration 0: Loss = 0.0754, Reg Loss = 5.0082, Reconstruct Loss = 0.0000, Cls Loss = 0.0749, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0863, Reg Loss = 5.4151, Reconstruct Loss = 0.0017, Cls Loss = 0.0840, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0825, Reg Loss = 5.1898, Reconstruct Loss = 0.0011, Cls Loss = 0.0808, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0794, Reg Loss = 5.1389, Reconstruct Loss = 0.0008, Cls Loss = 0.0781, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0823, Reg Loss = 5.0723, Reconstruct Loss = 0.0007, Cls Loss = 0.0811, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0834, Reg Loss = 5.0476, Reconstruct Loss = 0.0007, Cls Loss = 0.0822, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0853, Reg Loss = 5.0333, Reconstruct Loss = 0.0007, Cls Loss = 0.0841, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0856, Reg Loss = 5.0491, Reconstruct Loss = 0.0009, C

100%|██████████| 157/157 [00:03<00:00, 45.45it/s]


Epoch [14/100], Validation Loss: 3.6171, Validation Accuracy: 47.63%



Iteration 0: Loss = 0.1339, Reg Loss = 5.0289, Reconstruct Loss = 0.0000, Cls Loss = 0.1334, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0816, Reg Loss = 5.4325, Reconstruct Loss = 0.0021, Cls Loss = 0.0790, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0865, Reg Loss = 5.4447, Reconstruct Loss = 0.0014, Cls Loss = 0.0845, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0849, Reg Loss = 5.3973, Reconstruct Loss = 0.0011, Cls Loss = 0.0832, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0817, Reg Loss = 5.4147, Reconstruct Loss = 0.0012, Cls Loss = 0.0800, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0807, Reg Loss = 5.3802, Reconstruct Loss = 0.0013, Cls Loss = 0.0788, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0804, Reg Loss = 5.3399, Reconstruct Loss = 0.0012, Cls Loss = 0.0787, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0797, Reg Loss = 5.3648, Reconstruct Loss = 0.0013, C

100%|██████████| 157/157 [00:03<00:00, 45.37it/s]


Epoch [15/100], Validation Loss: 6.5786, Validation Accuracy: 35.68%



Iteration 0: Loss = 0.0475, Reg Loss = 5.6132, Reconstruct Loss = 0.0000, Cls Loss = 0.0469, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0792, Reg Loss = 5.8852, Reconstruct Loss = 0.0006, Cls Loss = 0.0781, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0789, Reg Loss = 5.8170, Reconstruct Loss = 0.0009, Cls Loss = 0.0774, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0782, Reg Loss = 5.7353, Reconstruct Loss = 0.0008, Cls Loss = 0.0768, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0758, Reg Loss = 5.7320, Reconstruct Loss = 0.0012, Cls Loss = 0.0740, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0757, Reg Loss = 5.7108, Reconstruct Loss = 0.0016, Cls Loss = 0.0735, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0750, Reg Loss = 5.7363, Reconstruct Loss = 0.0013, Cls Loss = 0.0731, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0738, Reg Loss = 5.7561, Reconstruct Loss = 0.0011, C

100%|██████████| 157/157 [00:03<00:00, 45.72it/s]


Epoch [16/100], Validation Loss: 4.8715, Validation Accuracy: 44.65%



Iteration 0: Loss = 0.0321, Reg Loss = 6.0423, Reconstruct Loss = 0.0000, Cls Loss = 0.0315, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0638, Reg Loss = 6.1374, Reconstruct Loss = 0.0007, Cls Loss = 0.0625, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0699, Reg Loss = 6.2679, Reconstruct Loss = 0.0012, Cls Loss = 0.0681, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0713, Reg Loss = 6.3222, Reconstruct Loss = 0.0017, Cls Loss = 0.0690, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0713, Reg Loss = 6.2867, Reconstruct Loss = 0.0014, Cls Loss = 0.0693, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0707, Reg Loss = 6.2735, Reconstruct Loss = 0.0012, Cls Loss = 0.0689, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0704, Reg Loss = 6.3064, Reconstruct Loss = 0.0012, Cls Loss = 0.0685, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0707, Reg Loss = 6.2651, Reconstruct Loss = 0.0011, C

100%|██████████| 157/157 [00:03<00:00, 45.48it/s]


Epoch [17/100], Validation Loss: 8.9900, Validation Accuracy: 27.56%



Iteration 0: Loss = 0.0232, Reg Loss = 6.9557, Reconstruct Loss = 0.0000, Cls Loss = 0.0225, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0748, Reg Loss = 6.7174, Reconstruct Loss = 0.0009, Cls Loss = 0.0733, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0775, Reg Loss = 6.6762, Reconstruct Loss = 0.0007, Cls Loss = 0.0762, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0765, Reg Loss = 6.6788, Reconstruct Loss = 0.0009, Cls Loss = 0.0749, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0755, Reg Loss = 6.7217, Reconstruct Loss = 0.0011, Cls Loss = 0.0738, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0744, Reg Loss = 6.6983, Reconstruct Loss = 0.0008, Cls Loss = 0.0729, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0725, Reg Loss = 6.7206, Reconstruct Loss = 0.0009, Cls Loss = 0.0710, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0712, Reg Loss = 6.7099, Reconstruct Loss = 0.0009, C

100%|██████████| 157/157 [00:03<00:00, 45.10it/s]


Epoch [18/100], Validation Loss: 6.3329, Validation Accuracy: 36.92%



Iteration 0: Loss = 0.0701, Reg Loss = 6.7957, Reconstruct Loss = 0.0000, Cls Loss = 0.0695, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0712, Reg Loss = 6.6055, Reconstruct Loss = 0.0012, Cls Loss = 0.0693, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0675, Reg Loss = 6.5224, Reconstruct Loss = 0.0013, Cls Loss = 0.0656, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0671, Reg Loss = 6.5756, Reconstruct Loss = 0.0015, Cls Loss = 0.0650, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0646, Reg Loss = 6.5525, Reconstruct Loss = 0.0013, Cls Loss = 0.0626, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0646, Reg Loss = 6.5061, Reconstruct Loss = 0.0012, Cls Loss = 0.0627, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0641, Reg Loss = 6.4901, Reconstruct Loss = 0.0012, Cls Loss = 0.0622, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0647, Reg Loss = 6.4577, Reconstruct Loss = 0.0010, C

100%|██████████| 157/157 [00:03<00:00, 45.04it/s]


Epoch [19/100], Validation Loss: 12.1544, Validation Accuracy: 20.04%



Iteration 0: Loss = 0.0888, Reg Loss = 6.6433, Reconstruct Loss = 0.0000, Cls Loss = 0.0881, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0614, Reg Loss = 6.7243, Reconstruct Loss = 0.0011, Cls Loss = 0.0597, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0632, Reg Loss = 6.6603, Reconstruct Loss = 0.0013, Cls Loss = 0.0612, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0629, Reg Loss = 6.6573, Reconstruct Loss = 0.0014, Cls Loss = 0.0609, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0642, Reg Loss = 6.6177, Reconstruct Loss = 0.0010, Cls Loss = 0.0625, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0633, Reg Loss = 6.5838, Reconstruct Loss = 0.0008, Cls Loss = 0.0618, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0625, Reg Loss = 6.6341, Reconstruct Loss = 0.0009, Cls Loss = 0.0609, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0622, Reg Loss = 6.6300, Reconstruct Loss = 0.0009, 

100%|██████████| 157/157 [00:03<00:00, 45.32it/s]


Epoch [20/100], Validation Loss: 10.1009, Validation Accuracy: 26.39%



Iteration 0: Loss = 0.0656, Reg Loss = 6.9106, Reconstruct Loss = 0.0000, Cls Loss = 0.0649, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0645, Reg Loss = 7.1055, Reconstruct Loss = 0.0000, Cls Loss = 0.0638, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0666, Reg Loss = 7.0527, Reconstruct Loss = 0.0008, Cls Loss = 0.0651, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0686, Reg Loss = 7.1268, Reconstruct Loss = 0.0009, Cls Loss = 0.0671, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0666, Reg Loss = 7.1040, Reconstruct Loss = 0.0009, Cls Loss = 0.0650, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0640, Reg Loss = 7.0703, Reconstruct Loss = 0.0008, Cls Loss = 0.0625, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0653, Reg Loss = 7.1047, Reconstruct Loss = 0.0010, Cls Loss = 0.0635, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0649, Reg Loss = 7.0804, Reconstruct Loss = 0.0011, 

100%|██████████| 157/157 [00:03<00:00, 44.84it/s]


Epoch [21/100], Validation Loss: 11.2324, Validation Accuracy: 24.94%



Iteration 0: Loss = 0.0991, Reg Loss = 7.1133, Reconstruct Loss = 0.0000, Cls Loss = 0.0984, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0652, Reg Loss = 7.2039, Reconstruct Loss = 0.0015, Cls Loss = 0.0630, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0664, Reg Loss = 7.2836, Reconstruct Loss = 0.0013, Cls Loss = 0.0644, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0668, Reg Loss = 7.1467, Reconstruct Loss = 0.0016, Cls Loss = 0.0646, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0686, Reg Loss = 7.1255, Reconstruct Loss = 0.0014, Cls Loss = 0.0665, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0679, Reg Loss = 7.1184, Reconstruct Loss = 0.0013, Cls Loss = 0.0658, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0672, Reg Loss = 7.0540, Reconstruct Loss = 0.0013, Cls Loss = 0.0651, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0660, Reg Loss = 7.0337, Reconstruct Loss = 0.0012, 

100%|██████████| 157/157 [00:03<00:00, 44.40it/s]


Epoch [22/100], Validation Loss: 8.6391, Validation Accuracy: 28.85%



Iteration 0: Loss = 0.0714, Reg Loss = 7.1813, Reconstruct Loss = 0.0000, Cls Loss = 0.0707, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0638, Reg Loss = 7.4742, Reconstruct Loss = 0.0024, Cls Loss = 0.0606, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0621, Reg Loss = 7.3893, Reconstruct Loss = 0.0023, Cls Loss = 0.0591, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0597, Reg Loss = 7.2089, Reconstruct Loss = 0.0019, Cls Loss = 0.0571, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0614, Reg Loss = 7.1321, Reconstruct Loss = 0.0018, Cls Loss = 0.0589, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0611, Reg Loss = 7.0929, Reconstruct Loss = 0.0016, Cls Loss = 0.0588, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0603, Reg Loss = 7.0677, Reconstruct Loss = 0.0013, Cls Loss = 0.0583, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0599, Reg Loss = 7.0741, Reconstruct Loss = 0.0013, C

100%|██████████| 157/157 [00:03<00:00, 45.29it/s]


Epoch [23/100], Validation Loss: 6.9019, Validation Accuracy: 36.10%



Iteration 0: Loss = 0.0310, Reg Loss = 6.7305, Reconstruct Loss = 0.0000, Cls Loss = 0.0304, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0612, Reg Loss = 7.0416, Reconstruct Loss = 0.0011, Cls Loss = 0.0594, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0623, Reg Loss = 7.1013, Reconstruct Loss = 0.0016, Cls Loss = 0.0600, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0621, Reg Loss = 7.1235, Reconstruct Loss = 0.0016, Cls Loss = 0.0598, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0618, Reg Loss = 7.1146, Reconstruct Loss = 0.0014, Cls Loss = 0.0597, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0625, Reg Loss = 7.1004, Reconstruct Loss = 0.0015, Cls Loss = 0.0603, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0626, Reg Loss = 7.0278, Reconstruct Loss = 0.0014, Cls Loss = 0.0604, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0609, Reg Loss = 6.9773, Reconstruct Loss = 0.0014, C

100%|██████████| 157/157 [00:03<00:00, 44.70it/s]


Epoch [24/100], Validation Loss: 9.6402, Validation Accuracy: 28.50%



Iteration 0: Loss = 0.0354, Reg Loss = 7.1955, Reconstruct Loss = 0.0000, Cls Loss = 0.0347, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0683, Reg Loss = 7.3775, Reconstruct Loss = 0.0020, Cls Loss = 0.0656, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0636, Reg Loss = 7.4154, Reconstruct Loss = 0.0013, Cls Loss = 0.0616, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0613, Reg Loss = 7.3713, Reconstruct Loss = 0.0012, Cls Loss = 0.0593, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0608, Reg Loss = 7.2881, Reconstruct Loss = 0.0015, Cls Loss = 0.0586, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0599, Reg Loss = 7.2649, Reconstruct Loss = 0.0013, Cls Loss = 0.0578, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0593, Reg Loss = 7.2301, Reconstruct Loss = 0.0011, Cls Loss = 0.0575, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0609, Reg Loss = 7.2382, Reconstruct Loss = 0.0009, C

100%|██████████| 157/157 [00:03<00:00, 45.01it/s]


Epoch [25/100], Validation Loss: 12.3984, Validation Accuracy: 22.14%



Iteration 0: Loss = 0.1315, Reg Loss = 6.8171, Reconstruct Loss = 0.0000, Cls Loss = 0.1309, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0654, Reg Loss = 7.3709, Reconstruct Loss = 0.0009, Cls Loss = 0.0637, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0621, Reg Loss = 7.2319, Reconstruct Loss = 0.0005, Cls Loss = 0.0609, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0634, Reg Loss = 7.2577, Reconstruct Loss = 0.0006, Cls Loss = 0.0621, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0619, Reg Loss = 7.2887, Reconstruct Loss = 0.0007, Cls Loss = 0.0605, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0618, Reg Loss = 7.2513, Reconstruct Loss = 0.0006, Cls Loss = 0.0604, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0629, Reg Loss = 7.2148, Reconstruct Loss = 0.0006, Cls Loss = 0.0616, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0621, Reg Loss = 7.1819, Reconstruct Loss = 0.0006, 

100%|██████████| 157/157 [00:03<00:00, 45.05it/s]


Epoch [26/100], Validation Loss: 8.5146, Validation Accuracy: 29.87%



Iteration 0: Loss = 0.0338, Reg Loss = 7.5511, Reconstruct Loss = 0.0000, Cls Loss = 0.0331, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0609, Reg Loss = 7.6698, Reconstruct Loss = 0.0023, Cls Loss = 0.0578, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0588, Reg Loss = 7.5835, Reconstruct Loss = 0.0018, Cls Loss = 0.0562, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0583, Reg Loss = 7.5643, Reconstruct Loss = 0.0015, Cls Loss = 0.0560, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0579, Reg Loss = 7.5960, Reconstruct Loss = 0.0013, Cls Loss = 0.0557, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0573, Reg Loss = 7.5509, Reconstruct Loss = 0.0012, Cls Loss = 0.0554, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0574, Reg Loss = 7.5620, Reconstruct Loss = 0.0010, Cls Loss = 0.0556, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0563, Reg Loss = 7.5899, Reconstruct Loss = 0.0010, C

100%|██████████| 157/157 [00:03<00:00, 44.79it/s]


Epoch [27/100], Validation Loss: 15.1311, Validation Accuracy: 21.55%



Iteration 0: Loss = 0.0773, Reg Loss = 7.3604, Reconstruct Loss = 0.0000, Cls Loss = 0.0766, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0655, Reg Loss = 7.3517, Reconstruct Loss = 0.0003, Cls Loss = 0.0645, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0618, Reg Loss = 7.3458, Reconstruct Loss = 0.0007, Cls Loss = 0.0604, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0602, Reg Loss = 7.3052, Reconstruct Loss = 0.0006, Cls Loss = 0.0589, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0596, Reg Loss = 7.3230, Reconstruct Loss = 0.0006, Cls Loss = 0.0583, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0581, Reg Loss = 7.3844, Reconstruct Loss = 0.0006, Cls Loss = 0.0567, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0570, Reg Loss = 7.3706, Reconstruct Loss = 0.0005, Cls Loss = 0.0558, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0560, Reg Loss = 7.3164, Reconstruct Loss = 0.0005, 

100%|██████████| 157/157 [00:03<00:00, 45.08it/s]


Epoch [28/100], Validation Loss: 8.7094, Validation Accuracy: 32.89%



Iteration 0: Loss = 0.0614, Reg Loss = 7.4523, Reconstruct Loss = 0.0000, Cls Loss = 0.0607, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0550, Reg Loss = 7.3307, Reconstruct Loss = 0.0003, Cls Loss = 0.0540, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0545, Reg Loss = 7.3674, Reconstruct Loss = 0.0003, Cls Loss = 0.0535, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0529, Reg Loss = 7.4298, Reconstruct Loss = 0.0005, Cls Loss = 0.0517, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0555, Reg Loss = 7.3031, Reconstruct Loss = 0.0005, Cls Loss = 0.0544, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0563, Reg Loss = 7.3561, Reconstruct Loss = 0.0006, Cls Loss = 0.0550, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0557, Reg Loss = 7.4061, Reconstruct Loss = 0.0008, Cls Loss = 0.0541, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0547, Reg Loss = 7.4905, Reconstruct Loss = 0.0011, C

100%|██████████| 157/157 [00:03<00:00, 44.88it/s]


Epoch [29/100], Validation Loss: 9.3834, Validation Accuracy: 28.29%



Iteration 0: Loss = 0.0646, Reg Loss = 7.2637, Reconstruct Loss = 0.0000, Cls Loss = 0.0639, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0551, Reg Loss = 7.5007, Reconstruct Loss = 0.0006, Cls Loss = 0.0538, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0540, Reg Loss = 7.3195, Reconstruct Loss = 0.0006, Cls Loss = 0.0527, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0554, Reg Loss = 7.3370, Reconstruct Loss = 0.0006, Cls Loss = 0.0540, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0560, Reg Loss = 7.4307, Reconstruct Loss = 0.0007, Cls Loss = 0.0545, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0553, Reg Loss = 7.5200, Reconstruct Loss = 0.0009, Cls Loss = 0.0537, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0554, Reg Loss = 7.5472, Reconstruct Loss = 0.0009, Cls Loss = 0.0537, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0552, Reg Loss = 7.5101, Reconstruct Loss = 0.0008, C

100%|██████████| 157/157 [00:03<00:00, 45.50it/s]


Epoch [30/100], Validation Loss: 3.0647, Validation Accuracy: 56.46%



Iteration 0: Loss = 0.0547, Reg Loss = 7.3584, Reconstruct Loss = 0.0000, Cls Loss = 0.0539, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0533, Reg Loss = 7.7469, Reconstruct Loss = 0.0014, Cls Loss = 0.0511, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0549, Reg Loss = 7.5954, Reconstruct Loss = 0.0014, Cls Loss = 0.0527, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0541, Reg Loss = 7.5285, Reconstruct Loss = 0.0012, Cls Loss = 0.0521, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0551, Reg Loss = 7.5387, Reconstruct Loss = 0.0015, Cls Loss = 0.0529, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0560, Reg Loss = 7.5197, Reconstruct Loss = 0.0013, Cls Loss = 0.0539, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0555, Reg Loss = 7.5232, Reconstruct Loss = 0.0013, Cls Loss = 0.0535, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0548, Reg Loss = 7.5034, Reconstruct Loss = 0.0012, C

100%|██████████| 157/157 [00:03<00:00, 45.17it/s]


Epoch [31/100], Validation Loss: 8.2322, Validation Accuracy: 33.41%



Iteration 0: Loss = 0.0196, Reg Loss = 6.9007, Reconstruct Loss = 0.0000, Cls Loss = 0.0189, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0539, Reg Loss = 7.8063, Reconstruct Loss = 0.0009, Cls Loss = 0.0522, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0564, Reg Loss = 7.7338, Reconstruct Loss = 0.0011, Cls Loss = 0.0546, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0570, Reg Loss = 7.6016, Reconstruct Loss = 0.0007, Cls Loss = 0.0555, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0560, Reg Loss = 7.5349, Reconstruct Loss = 0.0005, Cls Loss = 0.0547, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0575, Reg Loss = 7.5461, Reconstruct Loss = 0.0005, Cls Loss = 0.0562, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0585, Reg Loss = 7.5484, Reconstruct Loss = 0.0005, Cls Loss = 0.0573, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0580, Reg Loss = 7.5464, Reconstruct Loss = 0.0004, C

100%|██████████| 157/157 [00:03<00:00, 45.14it/s]


Epoch [32/100], Validation Loss: 4.0208, Validation Accuracy: 50.26%



Iteration 0: Loss = 0.0549, Reg Loss = 7.0563, Reconstruct Loss = 0.0000, Cls Loss = 0.0542, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0533, Reg Loss = 7.3620, Reconstruct Loss = 0.0002, Cls Loss = 0.0524, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0524, Reg Loss = 7.3997, Reconstruct Loss = 0.0005, Cls Loss = 0.0512, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0529, Reg Loss = 7.4372, Reconstruct Loss = 0.0005, Cls Loss = 0.0517, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0537, Reg Loss = 7.4653, Reconstruct Loss = 0.0003, Cls Loss = 0.0526, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0544, Reg Loss = 7.4964, Reconstruct Loss = 0.0005, Cls Loss = 0.0532, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0545, Reg Loss = 7.4416, Reconstruct Loss = 0.0005, Cls Loss = 0.0533, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0539, Reg Loss = 7.4446, Reconstruct Loss = 0.0005, C

100%|██████████| 157/157 [00:03<00:00, 45.45it/s]


Epoch [33/100], Validation Loss: 2.9741, Validation Accuracy: 59.62%



Iteration 0: Loss = 0.1018, Reg Loss = 7.2576, Reconstruct Loss = 0.0000, Cls Loss = 0.1011, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0598, Reg Loss = 7.6047, Reconstruct Loss = 0.0016, Cls Loss = 0.0574, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0521, Reg Loss = 7.6262, Reconstruct Loss = 0.0008, Cls Loss = 0.0505, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0523, Reg Loss = 7.6964, Reconstruct Loss = 0.0006, Cls Loss = 0.0509, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0528, Reg Loss = 7.6533, Reconstruct Loss = 0.0006, Cls Loss = 0.0514, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0523, Reg Loss = 7.6923, Reconstruct Loss = 0.0005, Cls Loss = 0.0511, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0530, Reg Loss = 7.7304, Reconstruct Loss = 0.0006, Cls Loss = 0.0517, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0530, Reg Loss = 7.7084, Reconstruct Loss = 0.0006, C

100%|██████████| 157/157 [00:03<00:00, 45.38it/s]


Epoch [34/100], Validation Loss: 8.5389, Validation Accuracy: 33.36%



Iteration 0: Loss = 0.0204, Reg Loss = 7.7713, Reconstruct Loss = 0.0000, Cls Loss = 0.0196, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0522, Reg Loss = 7.6230, Reconstruct Loss = 0.0003, Cls Loss = 0.0511, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0529, Reg Loss = 7.5254, Reconstruct Loss = 0.0007, Cls Loss = 0.0514, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0532, Reg Loss = 7.5194, Reconstruct Loss = 0.0008, Cls Loss = 0.0517, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0530, Reg Loss = 7.4401, Reconstruct Loss = 0.0007, Cls Loss = 0.0515, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0519, Reg Loss = 7.3715, Reconstruct Loss = 0.0007, Cls Loss = 0.0504, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0506, Reg Loss = 7.3519, Reconstruct Loss = 0.0007, Cls Loss = 0.0492, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0516, Reg Loss = 7.3372, Reconstruct Loss = 0.0007, C

100%|██████████| 157/157 [00:03<00:00, 45.00it/s]


Epoch [35/100], Validation Loss: 5.9737, Validation Accuracy: 39.34%



Iteration 0: Loss = 0.1316, Reg Loss = 7.5264, Reconstruct Loss = 0.0000, Cls Loss = 0.1309, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0512, Reg Loss = 7.6500, Reconstruct Loss = 0.0002, Cls Loss = 0.0502, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0498, Reg Loss = 7.6472, Reconstruct Loss = 0.0003, Cls Loss = 0.0487, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0498, Reg Loss = 7.6272, Reconstruct Loss = 0.0006, Cls Loss = 0.0484, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0509, Reg Loss = 7.5810, Reconstruct Loss = 0.0005, Cls Loss = 0.0497, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0508, Reg Loss = 7.5447, Reconstruct Loss = 0.0006, Cls Loss = 0.0494, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0514, Reg Loss = 7.5242, Reconstruct Loss = 0.0006, Cls Loss = 0.0501, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0524, Reg Loss = 7.5034, Reconstruct Loss = 0.0006, C

100%|██████████| 157/157 [00:03<00:00, 44.91it/s]


Epoch [36/100], Validation Loss: 5.3848, Validation Accuracy: 41.64%



Iteration 0: Loss = 0.0984, Reg Loss = 7.4095, Reconstruct Loss = 0.0000, Cls Loss = 0.0977, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0552, Reg Loss = 7.1722, Reconstruct Loss = 0.0003, Cls Loss = 0.0543, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0518, Reg Loss = 7.1618, Reconstruct Loss = 0.0006, Cls Loss = 0.0505, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0520, Reg Loss = 7.2415, Reconstruct Loss = 0.0007, Cls Loss = 0.0506, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0534, Reg Loss = 7.3108, Reconstruct Loss = 0.0007, Cls Loss = 0.0520, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0536, Reg Loss = 7.3156, Reconstruct Loss = 0.0007, Cls Loss = 0.0522, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0526, Reg Loss = 7.3566, Reconstruct Loss = 0.0006, Cls Loss = 0.0513, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0533, Reg Loss = 7.3698, Reconstruct Loss = 0.0005, C

100%|██████████| 157/157 [00:03<00:00, 45.02it/s]


Epoch [37/100], Validation Loss: 7.9469, Validation Accuracy: 32.86%



Iteration 0: Loss = 0.0234, Reg Loss = 7.5560, Reconstruct Loss = 0.0000, Cls Loss = 0.0227, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0479, Reg Loss = 7.5814, Reconstruct Loss = 0.0006, Cls Loss = 0.0465, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0505, Reg Loss = 7.6395, Reconstruct Loss = 0.0006, Cls Loss = 0.0491, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0498, Reg Loss = 7.6311, Reconstruct Loss = 0.0006, Cls Loss = 0.0485, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0493, Reg Loss = 7.6099, Reconstruct Loss = 0.0006, Cls Loss = 0.0479, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0499, Reg Loss = 7.5516, Reconstruct Loss = 0.0007, Cls Loss = 0.0485, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0499, Reg Loss = 7.5318, Reconstruct Loss = 0.0007, Cls Loss = 0.0485, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0499, Reg Loss = 7.5542, Reconstruct Loss = 0.0007, C

100%|██████████| 157/157 [00:03<00:00, 45.00it/s]


Epoch [38/100], Validation Loss: 6.0575, Validation Accuracy: 41.61%



Iteration 0: Loss = 0.0158, Reg Loss = 7.2646, Reconstruct Loss = 0.0000, Cls Loss = 0.0151, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0500, Reg Loss = 7.4475, Reconstruct Loss = 0.0003, Cls Loss = 0.0490, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0504, Reg Loss = 7.5654, Reconstruct Loss = 0.0003, Cls Loss = 0.0494, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0534, Reg Loss = 7.6723, Reconstruct Loss = 0.0006, Cls Loss = 0.0520, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0530, Reg Loss = 7.7542, Reconstruct Loss = 0.0011, Cls Loss = 0.0512, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0535, Reg Loss = 7.8441, Reconstruct Loss = 0.0009, Cls Loss = 0.0518, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0527, Reg Loss = 7.8456, Reconstruct Loss = 0.0009, Cls Loss = 0.0510, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0533, Reg Loss = 7.8560, Reconstruct Loss = 0.0009, C

100%|██████████| 157/157 [00:03<00:00, 44.82it/s]


Epoch [39/100], Validation Loss: 6.5357, Validation Accuracy: 35.55%



Iteration 0: Loss = 0.1121, Reg Loss = 7.7646, Reconstruct Loss = 0.0000, Cls Loss = 0.1113, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0463, Reg Loss = 7.9028, Reconstruct Loss = 0.0005, Cls Loss = 0.0450, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0470, Reg Loss = 7.8227, Reconstruct Loss = 0.0003, Cls Loss = 0.0459, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0476, Reg Loss = 7.8244, Reconstruct Loss = 0.0005, Cls Loss = 0.0463, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0471, Reg Loss = 7.7258, Reconstruct Loss = 0.0004, Cls Loss = 0.0459, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0473, Reg Loss = 7.6842, Reconstruct Loss = 0.0005, Cls Loss = 0.0460, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0481, Reg Loss = 7.6338, Reconstruct Loss = 0.0005, Cls Loss = 0.0468, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0484, Reg Loss = 7.5919, Reconstruct Loss = 0.0004, C

100%|██████████| 157/157 [00:03<00:00, 45.19it/s]


Epoch [40/100], Validation Loss: 5.9316, Validation Accuracy: 41.36%



Iteration 0: Loss = 0.0728, Reg Loss = 7.4697, Reconstruct Loss = 0.0000, Cls Loss = 0.0720, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0465, Reg Loss = 7.5910, Reconstruct Loss = 0.0014, Cls Loss = 0.0444, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0498, Reg Loss = 7.5734, Reconstruct Loss = 0.0010, Cls Loss = 0.0481, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0505, Reg Loss = 7.7816, Reconstruct Loss = 0.0009, Cls Loss = 0.0489, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0483, Reg Loss = 7.8400, Reconstruct Loss = 0.0007, Cls Loss = 0.0468, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0473, Reg Loss = 7.8350, Reconstruct Loss = 0.0007, Cls Loss = 0.0459, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0464, Reg Loss = 7.8294, Reconstruct Loss = 0.0007, Cls Loss = 0.0449, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0471, Reg Loss = 7.8571, Reconstruct Loss = 0.0007, C

100%|██████████| 157/157 [00:03<00:00, 45.37it/s]


Epoch [41/100], Validation Loss: 4.0288, Validation Accuracy: 52.19%



Iteration 0: Loss = 0.0451, Reg Loss = 7.6239, Reconstruct Loss = 0.0000, Cls Loss = 0.0443, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0577, Reg Loss = 7.9381, Reconstruct Loss = 0.0013, Cls Loss = 0.0556, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0564, Reg Loss = 7.7666, Reconstruct Loss = 0.0011, Cls Loss = 0.0545, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0532, Reg Loss = 7.7668, Reconstruct Loss = 0.0009, Cls Loss = 0.0515, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0543, Reg Loss = 7.7074, Reconstruct Loss = 0.0008, Cls Loss = 0.0527, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0523, Reg Loss = 7.6741, Reconstruct Loss = 0.0009, Cls Loss = 0.0506, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0510, Reg Loss = 7.6504, Reconstruct Loss = 0.0010, Cls Loss = 0.0493, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0510, Reg Loss = 7.6139, Reconstruct Loss = 0.0010, C

100%|██████████| 157/157 [00:03<00:00, 45.30it/s]


Epoch [42/100], Validation Loss: 4.6843, Validation Accuracy: 44.20%



Iteration 0: Loss = 0.0415, Reg Loss = 7.8942, Reconstruct Loss = 0.0000, Cls Loss = 0.0407, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0438, Reg Loss = 7.5300, Reconstruct Loss = 0.0002, Cls Loss = 0.0428, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0491, Reg Loss = 7.4392, Reconstruct Loss = 0.0005, Cls Loss = 0.0479, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0503, Reg Loss = 7.5292, Reconstruct Loss = 0.0006, Cls Loss = 0.0489, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0502, Reg Loss = 7.6196, Reconstruct Loss = 0.0009, Cls Loss = 0.0486, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0499, Reg Loss = 7.5900, Reconstruct Loss = 0.0008, Cls Loss = 0.0484, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0492, Reg Loss = 7.6073, Reconstruct Loss = 0.0007, Cls Loss = 0.0477, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0499, Reg Loss = 7.6158, Reconstruct Loss = 0.0007, C

100%|██████████| 157/157 [00:03<00:00, 45.14it/s]


Epoch [43/100], Validation Loss: 5.4224, Validation Accuracy: 43.86%



Iteration 0: Loss = 0.0836, Reg Loss = 8.3287, Reconstruct Loss = 0.0000, Cls Loss = 0.0828, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0501, Reg Loss = 8.1025, Reconstruct Loss = 0.0002, Cls Loss = 0.0490, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0558, Reg Loss = 8.1708, Reconstruct Loss = 0.0005, Cls Loss = 0.0544, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0539, Reg Loss = 8.1569, Reconstruct Loss = 0.0005, Cls Loss = 0.0527, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0532, Reg Loss = 8.0970, Reconstruct Loss = 0.0005, Cls Loss = 0.0519, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0525, Reg Loss = 8.0322, Reconstruct Loss = 0.0004, Cls Loss = 0.0513, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0519, Reg Loss = 7.9779, Reconstruct Loss = 0.0004, Cls Loss = 0.0506, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0527, Reg Loss = 7.9266, Reconstruct Loss = 0.0005, C

100%|██████████| 157/157 [00:03<00:00, 45.29it/s]


Epoch [44/100], Validation Loss: 5.2141, Validation Accuracy: 43.64%



Iteration 0: Loss = 0.0734, Reg Loss = 8.7285, Reconstruct Loss = 0.0000, Cls Loss = 0.0726, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0464, Reg Loss = 8.2833, Reconstruct Loss = 0.0007, Cls Loss = 0.0449, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0509, Reg Loss = 8.2177, Reconstruct Loss = 0.0006, Cls Loss = 0.0495, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0503, Reg Loss = 8.1668, Reconstruct Loss = 0.0009, Cls Loss = 0.0486, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0495, Reg Loss = 8.1157, Reconstruct Loss = 0.0009, Cls Loss = 0.0479, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0491, Reg Loss = 8.0848, Reconstruct Loss = 0.0007, Cls Loss = 0.0476, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0499, Reg Loss = 8.0706, Reconstruct Loss = 0.0007, Cls Loss = 0.0484, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0492, Reg Loss = 8.0374, Reconstruct Loss = 0.0006, C

100%|██████████| 157/157 [00:03<00:00, 44.92it/s]


Epoch [45/100], Validation Loss: 2.7773, Validation Accuracy: 58.32%



Iteration 0: Loss = 0.1203, Reg Loss = 8.1598, Reconstruct Loss = 0.0000, Cls Loss = 0.1194, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0424, Reg Loss = 7.9650, Reconstruct Loss = 0.0008, Cls Loss = 0.0408, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0463, Reg Loss = 7.8906, Reconstruct Loss = 0.0010, Cls Loss = 0.0446, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0455, Reg Loss = 7.9136, Reconstruct Loss = 0.0009, Cls Loss = 0.0438, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0463, Reg Loss = 7.9123, Reconstruct Loss = 0.0008, Cls Loss = 0.0447, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0480, Reg Loss = 7.8946, Reconstruct Loss = 0.0008, Cls Loss = 0.0464, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0488, Reg Loss = 7.8760, Reconstruct Loss = 0.0007, Cls Loss = 0.0473, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0487, Reg Loss = 7.8650, Reconstruct Loss = 0.0006, C

100%|██████████| 157/157 [00:03<00:00, 45.28it/s]


Epoch [46/100], Validation Loss: 4.5437, Validation Accuracy: 47.29%



Iteration 0: Loss = 0.0209, Reg Loss = 7.1788, Reconstruct Loss = 0.0000, Cls Loss = 0.0202, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0418, Reg Loss = 7.5076, Reconstruct Loss = 0.0005, Cls Loss = 0.0405, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0456, Reg Loss = 7.4089, Reconstruct Loss = 0.0004, Cls Loss = 0.0444, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0455, Reg Loss = 7.4847, Reconstruct Loss = 0.0004, Cls Loss = 0.0443, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0463, Reg Loss = 7.5873, Reconstruct Loss = 0.0004, Cls Loss = 0.0451, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0457, Reg Loss = 7.6838, Reconstruct Loss = 0.0005, Cls Loss = 0.0445, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0456, Reg Loss = 7.7124, Reconstruct Loss = 0.0004, Cls Loss = 0.0444, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0469, Reg Loss = 7.7533, Reconstruct Loss = 0.0005, C

100%|██████████| 157/157 [00:03<00:00, 44.89it/s]


Epoch [47/100], Validation Loss: 5.1616, Validation Accuracy: 45.47%



Iteration 0: Loss = 0.0278, Reg Loss = 7.5321, Reconstruct Loss = 0.0000, Cls Loss = 0.0270, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0434, Reg Loss = 7.9080, Reconstruct Loss = 0.0008, Cls Loss = 0.0418, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0456, Reg Loss = 7.9955, Reconstruct Loss = 0.0009, Cls Loss = 0.0439, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0476, Reg Loss = 8.0363, Reconstruct Loss = 0.0009, Cls Loss = 0.0459, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0462, Reg Loss = 8.0119, Reconstruct Loss = 0.0008, Cls Loss = 0.0445, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0454, Reg Loss = 7.9681, Reconstruct Loss = 0.0007, Cls Loss = 0.0439, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0458, Reg Loss = 7.9319, Reconstruct Loss = 0.0006, Cls Loss = 0.0444, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0465, Reg Loss = 7.9656, Reconstruct Loss = 0.0006, C

100%|██████████| 157/157 [00:03<00:00, 44.60it/s]


Epoch [48/100], Validation Loss: 2.6102, Validation Accuracy: 58.38%



Iteration 0: Loss = 0.0434, Reg Loss = 7.4379, Reconstruct Loss = 0.0000, Cls Loss = 0.0427, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0430, Reg Loss = 7.5664, Reconstruct Loss = 0.0010, Cls Loss = 0.0413, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0474, Reg Loss = 7.3924, Reconstruct Loss = 0.0006, Cls Loss = 0.0460, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0452, Reg Loss = 7.3563, Reconstruct Loss = 0.0006, Cls Loss = 0.0439, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0451, Reg Loss = 7.3573, Reconstruct Loss = 0.0007, Cls Loss = 0.0437, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0461, Reg Loss = 7.3645, Reconstruct Loss = 0.0007, Cls Loss = 0.0446, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0472, Reg Loss = 7.3660, Reconstruct Loss = 0.0007, Cls Loss = 0.0458, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0471, Reg Loss = 7.3675, Reconstruct Loss = 0.0006, C

100%|██████████| 157/157 [00:03<00:00, 44.91it/s]


Epoch [49/100], Validation Loss: 4.1334, Validation Accuracy: 49.40%



Iteration 0: Loss = 0.1030, Reg Loss = 7.7177, Reconstruct Loss = 0.0000, Cls Loss = 0.1023, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0478, Reg Loss = 7.7226, Reconstruct Loss = 0.0002, Cls Loss = 0.0468, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0488, Reg Loss = 7.7511, Reconstruct Loss = 0.0003, Cls Loss = 0.0477, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0452, Reg Loss = 7.7350, Reconstruct Loss = 0.0004, Cls Loss = 0.0440, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0456, Reg Loss = 7.6872, Reconstruct Loss = 0.0005, Cls Loss = 0.0443, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0469, Reg Loss = 7.6961, Reconstruct Loss = 0.0007, Cls Loss = 0.0454, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0464, Reg Loss = 7.7188, Reconstruct Loss = 0.0007, Cls Loss = 0.0449, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0467, Reg Loss = 7.7059, Reconstruct Loss = 0.0006, C

100%|██████████| 157/157 [00:03<00:00, 45.29it/s]


Epoch [50/100], Validation Loss: 3.7953, Validation Accuracy: 50.77%



Iteration 0: Loss = 0.0433, Reg Loss = 7.2004, Reconstruct Loss = 0.0000, Cls Loss = 0.0426, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0498, Reg Loss = 7.7248, Reconstruct Loss = 0.0002, Cls Loss = 0.0488, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0482, Reg Loss = 7.8490, Reconstruct Loss = 0.0007, Cls Loss = 0.0467, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0450, Reg Loss = 7.8583, Reconstruct Loss = 0.0006, Cls Loss = 0.0436, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0444, Reg Loss = 7.8551, Reconstruct Loss = 0.0006, Cls Loss = 0.0430, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0436, Reg Loss = 7.7860, Reconstruct Loss = 0.0005, Cls Loss = 0.0424, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0451, Reg Loss = 7.7432, Reconstruct Loss = 0.0004, Cls Loss = 0.0439, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0450, Reg Loss = 7.7187, Reconstruct Loss = 0.0005, C

100%|██████████| 157/157 [00:03<00:00, 44.68it/s]


Epoch [51/100], Validation Loss: 2.1888, Validation Accuracy: 62.49%



Iteration 0: Loss = 0.0302, Reg Loss = 7.5287, Reconstruct Loss = 0.0000, Cls Loss = 0.0294, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0448, Reg Loss = 7.8566, Reconstruct Loss = 0.0010, Cls Loss = 0.0430, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0441, Reg Loss = 7.7700, Reconstruct Loss = 0.0005, Cls Loss = 0.0428, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0434, Reg Loss = 7.7976, Reconstruct Loss = 0.0007, Cls Loss = 0.0419, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0449, Reg Loss = 7.7826, Reconstruct Loss = 0.0008, Cls Loss = 0.0434, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0454, Reg Loss = 7.7555, Reconstruct Loss = 0.0006, Cls Loss = 0.0440, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0450, Reg Loss = 7.6945, Reconstruct Loss = 0.0005, Cls Loss = 0.0437, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0458, Reg Loss = 7.6894, Reconstruct Loss = 0.0006, C

100%|██████████| 157/157 [00:03<00:00, 45.61it/s]


Epoch [52/100], Validation Loss: 4.7427, Validation Accuracy: 44.38%



Iteration 0: Loss = 0.0630, Reg Loss = 8.1193, Reconstruct Loss = 0.0000, Cls Loss = 0.0622, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0513, Reg Loss = 7.8338, Reconstruct Loss = 0.0008, Cls Loss = 0.0498, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0485, Reg Loss = 7.8434, Reconstruct Loss = 0.0005, Cls Loss = 0.0472, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0479, Reg Loss = 7.8381, Reconstruct Loss = 0.0005, Cls Loss = 0.0466, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0467, Reg Loss = 7.8775, Reconstruct Loss = 0.0007, Cls Loss = 0.0452, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0445, Reg Loss = 7.7875, Reconstruct Loss = 0.0007, Cls Loss = 0.0431, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0437, Reg Loss = 7.7642, Reconstruct Loss = 0.0007, Cls Loss = 0.0422, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0429, Reg Loss = 7.7480, Reconstruct Loss = 0.0008, C

100%|██████████| 157/157 [00:03<00:00, 44.85it/s]


Epoch [53/100], Validation Loss: 9.9346, Validation Accuracy: 28.70%



Iteration 0: Loss = 0.0234, Reg Loss = 7.8324, Reconstruct Loss = 0.0000, Cls Loss = 0.0227, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0417, Reg Loss = 7.8231, Reconstruct Loss = 0.0002, Cls Loss = 0.0407, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0454, Reg Loss = 7.9264, Reconstruct Loss = 0.0005, Cls Loss = 0.0442, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0475, Reg Loss = 7.9513, Reconstruct Loss = 0.0005, Cls Loss = 0.0463, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0468, Reg Loss = 7.9542, Reconstruct Loss = 0.0006, Cls Loss = 0.0454, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0456, Reg Loss = 8.0449, Reconstruct Loss = 0.0007, Cls Loss = 0.0441, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0462, Reg Loss = 8.0479, Reconstruct Loss = 0.0006, Cls Loss = 0.0448, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0465, Reg Loss = 8.0765, Reconstruct Loss = 0.0006, C

100%|██████████| 157/157 [00:03<00:00, 44.64it/s]


Epoch [54/100], Validation Loss: 4.8340, Validation Accuracy: 45.16%



Iteration 0: Loss = 0.0225, Reg Loss = 7.7705, Reconstruct Loss = 0.0000, Cls Loss = 0.0217, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0421, Reg Loss = 7.6309, Reconstruct Loss = 0.0008, Cls Loss = 0.0406, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0452, Reg Loss = 7.5838, Reconstruct Loss = 0.0007, Cls Loss = 0.0438, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0435, Reg Loss = 7.5613, Reconstruct Loss = 0.0005, Cls Loss = 0.0422, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0426, Reg Loss = 7.6191, Reconstruct Loss = 0.0005, Cls Loss = 0.0414, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0434, Reg Loss = 7.6756, Reconstruct Loss = 0.0005, Cls Loss = 0.0422, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0446, Reg Loss = 7.6541, Reconstruct Loss = 0.0005, Cls Loss = 0.0434, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0450, Reg Loss = 7.6536, Reconstruct Loss = 0.0004, C

100%|██████████| 157/157 [00:03<00:00, 45.50it/s]


Epoch [55/100], Validation Loss: 6.1970, Validation Accuracy: 40.89%



Iteration 0: Loss = 0.0142, Reg Loss = 7.2841, Reconstruct Loss = 0.0000, Cls Loss = 0.0134, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0469, Reg Loss = 7.6482, Reconstruct Loss = 0.0006, Cls Loss = 0.0456, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0460, Reg Loss = 7.6335, Reconstruct Loss = 0.0005, Cls Loss = 0.0447, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0482, Reg Loss = 7.4686, Reconstruct Loss = 0.0004, Cls Loss = 0.0471, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0477, Reg Loss = 7.4221, Reconstruct Loss = 0.0004, Cls Loss = 0.0466, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0471, Reg Loss = 7.4217, Reconstruct Loss = 0.0004, Cls Loss = 0.0460, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0476, Reg Loss = 7.4400, Reconstruct Loss = 0.0004, Cls Loss = 0.0465, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0462, Reg Loss = 7.4907, Reconstruct Loss = 0.0004, C

100%|██████████| 157/157 [00:03<00:00, 44.86it/s]


Epoch [56/100], Validation Loss: 3.8038, Validation Accuracy: 53.46%



Iteration 0: Loss = 0.0200, Reg Loss = 7.4749, Reconstruct Loss = 0.0000, Cls Loss = 0.0193, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0384, Reg Loss = 7.4836, Reconstruct Loss = 0.0006, Cls Loss = 0.0371, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0431, Reg Loss = 7.5885, Reconstruct Loss = 0.0004, Cls Loss = 0.0419, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0438, Reg Loss = 7.6910, Reconstruct Loss = 0.0004, Cls Loss = 0.0426, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0451, Reg Loss = 7.7413, Reconstruct Loss = 0.0004, Cls Loss = 0.0439, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0466, Reg Loss = 7.7572, Reconstruct Loss = 0.0004, Cls Loss = 0.0454, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0457, Reg Loss = 7.7341, Reconstruct Loss = 0.0004, Cls Loss = 0.0446, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0461, Reg Loss = 7.7126, Reconstruct Loss = 0.0004, C

100%|██████████| 157/157 [00:03<00:00, 44.93it/s]


Epoch [57/100], Validation Loss: 4.2068, Validation Accuracy: 50.06%



Iteration 0: Loss = 0.0347, Reg Loss = 7.5010, Reconstruct Loss = 0.0000, Cls Loss = 0.0339, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0451, Reg Loss = 7.8983, Reconstruct Loss = 0.0000, Cls Loss = 0.0443, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0446, Reg Loss = 7.7846, Reconstruct Loss = 0.0003, Cls Loss = 0.0436, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0436, Reg Loss = 7.7383, Reconstruct Loss = 0.0003, Cls Loss = 0.0425, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0434, Reg Loss = 7.7559, Reconstruct Loss = 0.0004, Cls Loss = 0.0422, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0428, Reg Loss = 7.7524, Reconstruct Loss = 0.0005, Cls Loss = 0.0416, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0442, Reg Loss = 7.7867, Reconstruct Loss = 0.0005, Cls Loss = 0.0429, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0449, Reg Loss = 7.8108, Reconstruct Loss = 0.0004, C

100%|██████████| 157/157 [00:03<00:00, 44.98it/s]


Epoch [58/100], Validation Loss: 2.8579, Validation Accuracy: 61.42%



Iteration 0: Loss = 0.1189, Reg Loss = 7.6465, Reconstruct Loss = 0.0000, Cls Loss = 0.1181, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0490, Reg Loss = 8.2110, Reconstruct Loss = 0.0006, Cls Loss = 0.0475, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0505, Reg Loss = 8.2125, Reconstruct Loss = 0.0006, Cls Loss = 0.0491, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0476, Reg Loss = 8.1392, Reconstruct Loss = 0.0006, Cls Loss = 0.0461, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0478, Reg Loss = 8.0875, Reconstruct Loss = 0.0005, Cls Loss = 0.0464, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0469, Reg Loss = 8.0599, Reconstruct Loss = 0.0006, Cls Loss = 0.0456, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0470, Reg Loss = 8.0892, Reconstruct Loss = 0.0006, Cls Loss = 0.0456, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0465, Reg Loss = 8.1054, Reconstruct Loss = 0.0006, C

100%|██████████| 157/157 [00:03<00:00, 44.97it/s]


Epoch [59/100], Validation Loss: 2.6564, Validation Accuracy: 59.69%



Iteration 0: Loss = 0.0032, Reg Loss = 8.1015, Reconstruct Loss = 0.0000, Cls Loss = 0.0024, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0493, Reg Loss = 8.0876, Reconstruct Loss = 0.0010, Cls Loss = 0.0476, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0459, Reg Loss = 8.0584, Reconstruct Loss = 0.0009, Cls Loss = 0.0442, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0448, Reg Loss = 8.0263, Reconstruct Loss = 0.0006, Cls Loss = 0.0434, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0442, Reg Loss = 8.0421, Reconstruct Loss = 0.0006, Cls Loss = 0.0428, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0452, Reg Loss = 8.0560, Reconstruct Loss = 0.0006, Cls Loss = 0.0439, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0448, Reg Loss = 8.0722, Reconstruct Loss = 0.0005, Cls Loss = 0.0435, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0459, Reg Loss = 8.0639, Reconstruct Loss = 0.0005, C

100%|██████████| 157/157 [00:03<00:00, 44.93it/s]


Epoch [60/100], Validation Loss: 3.7301, Validation Accuracy: 55.08%



Iteration 0: Loss = 0.0216, Reg Loss = 7.3716, Reconstruct Loss = 0.0000, Cls Loss = 0.0209, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0455, Reg Loss = 7.8539, Reconstruct Loss = 0.0007, Cls Loss = 0.0440, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0457, Reg Loss = 7.6995, Reconstruct Loss = 0.0006, Cls Loss = 0.0443, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0441, Reg Loss = 7.7218, Reconstruct Loss = 0.0007, Cls Loss = 0.0427, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0432, Reg Loss = 7.6320, Reconstruct Loss = 0.0005, Cls Loss = 0.0420, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0424, Reg Loss = 7.5853, Reconstruct Loss = 0.0005, Cls Loss = 0.0412, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0428, Reg Loss = 7.5772, Reconstruct Loss = 0.0005, Cls Loss = 0.0416, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0427, Reg Loss = 7.5988, Reconstruct Loss = 0.0006, C

100%|██████████| 157/157 [00:03<00:00, 44.76it/s]


Epoch [61/100], Validation Loss: 16.1722, Validation Accuracy: 19.87%



Iteration 0: Loss = 0.0800, Reg Loss = 7.4594, Reconstruct Loss = 0.0000, Cls Loss = 0.0792, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0488, Reg Loss = 7.9988, Reconstruct Loss = 0.0005, Cls Loss = 0.0475, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0468, Reg Loss = 7.8855, Reconstruct Loss = 0.0004, Cls Loss = 0.0456, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0467, Reg Loss = 7.9100, Reconstruct Loss = 0.0006, Cls Loss = 0.0453, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0457, Reg Loss = 7.9125, Reconstruct Loss = 0.0005, Cls Loss = 0.0444, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0459, Reg Loss = 7.8947, Reconstruct Loss = 0.0005, Cls Loss = 0.0446, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0450, Reg Loss = 7.8817, Reconstruct Loss = 0.0005, Cls Loss = 0.0438, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0455, Reg Loss = 7.8987, Reconstruct Loss = 0.0005, 

100%|██████████| 157/157 [00:03<00:00, 45.30it/s]


Epoch [62/100], Validation Loss: 4.2985, Validation Accuracy: 45.48%



Iteration 0: Loss = 0.0946, Reg Loss = 7.7409, Reconstruct Loss = 0.0000, Cls Loss = 0.0938, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0499, Reg Loss = 8.1505, Reconstruct Loss = 0.0004, Cls Loss = 0.0487, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0488, Reg Loss = 8.3446, Reconstruct Loss = 0.0006, Cls Loss = 0.0474, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0451, Reg Loss = 8.4163, Reconstruct Loss = 0.0006, Cls Loss = 0.0436, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0446, Reg Loss = 8.5083, Reconstruct Loss = 0.0006, Cls Loss = 0.0431, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0450, Reg Loss = 8.5356, Reconstruct Loss = 0.0007, Cls Loss = 0.0435, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0463, Reg Loss = 8.4699, Reconstruct Loss = 0.0007, Cls Loss = 0.0448, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0463, Reg Loss = 8.4637, Reconstruct Loss = 0.0006, C

100%|██████████| 157/157 [00:03<00:00, 45.51it/s]


Epoch [63/100], Validation Loss: 6.8245, Validation Accuracy: 33.96%



Iteration 0: Loss = 0.0216, Reg Loss = 8.7312, Reconstruct Loss = 0.0000, Cls Loss = 0.0207, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0520, Reg Loss = 8.6096, Reconstruct Loss = 0.0008, Cls Loss = 0.0503, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0504, Reg Loss = 8.2941, Reconstruct Loss = 0.0005, Cls Loss = 0.0491, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0500, Reg Loss = 8.1952, Reconstruct Loss = 0.0003, Cls Loss = 0.0488, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0486, Reg Loss = 8.2090, Reconstruct Loss = 0.0003, Cls Loss = 0.0475, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0469, Reg Loss = 8.1916, Reconstruct Loss = 0.0004, Cls Loss = 0.0457, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0465, Reg Loss = 8.1825, Reconstruct Loss = 0.0003, Cls Loss = 0.0453, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0462, Reg Loss = 8.1412, Reconstruct Loss = 0.0003, C

100%|██████████| 157/157 [00:03<00:00, 44.65it/s]


Epoch [64/100], Validation Loss: 6.5954, Validation Accuracy: 38.52%



Iteration 0: Loss = 0.0476, Reg Loss = 7.9949, Reconstruct Loss = 0.0000, Cls Loss = 0.0468, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0373, Reg Loss = 8.2377, Reconstruct Loss = 0.0005, Cls Loss = 0.0360, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0390, Reg Loss = 8.5016, Reconstruct Loss = 0.0006, Cls Loss = 0.0375, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0426, Reg Loss = 8.5155, Reconstruct Loss = 0.0006, Cls Loss = 0.0412, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0435, Reg Loss = 8.4612, Reconstruct Loss = 0.0007, Cls Loss = 0.0419, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0440, Reg Loss = 8.4106, Reconstruct Loss = 0.0006, Cls Loss = 0.0426, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0445, Reg Loss = 8.3406, Reconstruct Loss = 0.0006, Cls Loss = 0.0430, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0448, Reg Loss = 8.2838, Reconstruct Loss = 0.0006, C

100%|██████████| 157/157 [00:03<00:00, 45.41it/s]


Epoch [65/100], Validation Loss: 7.1512, Validation Accuracy: 35.74%



Iteration 0: Loss = 0.0424, Reg Loss = 7.5082, Reconstruct Loss = 0.0000, Cls Loss = 0.0416, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0543, Reg Loss = 7.7346, Reconstruct Loss = 0.0000, Cls Loss = 0.0535, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0491, Reg Loss = 7.8086, Reconstruct Loss = 0.0007, Cls Loss = 0.0476, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0479, Reg Loss = 7.6794, Reconstruct Loss = 0.0005, Cls Loss = 0.0466, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0464, Reg Loss = 7.5829, Reconstruct Loss = 0.0004, Cls Loss = 0.0453, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0451, Reg Loss = 7.6111, Reconstruct Loss = 0.0004, Cls Loss = 0.0440, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0453, Reg Loss = 7.6860, Reconstruct Loss = 0.0004, Cls Loss = 0.0441, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0455, Reg Loss = 7.7427, Reconstruct Loss = 0.0004, C

100%|██████████| 157/157 [00:03<00:00, 44.94it/s]


Epoch [66/100], Validation Loss: 4.4808, Validation Accuracy: 48.75%



Iteration 0: Loss = 0.0605, Reg Loss = 7.5216, Reconstruct Loss = 0.0000, Cls Loss = 0.0597, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0447, Reg Loss = 7.9428, Reconstruct Loss = 0.0003, Cls Loss = 0.0436, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0450, Reg Loss = 7.7770, Reconstruct Loss = 0.0001, Cls Loss = 0.0440, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0451, Reg Loss = 7.7880, Reconstruct Loss = 0.0002, Cls Loss = 0.0442, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0440, Reg Loss = 7.8653, Reconstruct Loss = 0.0003, Cls Loss = 0.0430, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0432, Reg Loss = 7.9032, Reconstruct Loss = 0.0002, Cls Loss = 0.0422, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0434, Reg Loss = 7.9071, Reconstruct Loss = 0.0002, Cls Loss = 0.0424, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0430, Reg Loss = 7.8655, Reconstruct Loss = 0.0002, C

100%|██████████| 157/157 [00:03<00:00, 44.98it/s]


Epoch [67/100], Validation Loss: 3.5971, Validation Accuracy: 51.79%



Iteration 0: Loss = 0.0204, Reg Loss = 7.3823, Reconstruct Loss = 0.0000, Cls Loss = 0.0197, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0402, Reg Loss = 8.0107, Reconstruct Loss = 0.0011, Cls Loss = 0.0383, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0397, Reg Loss = 8.0279, Reconstruct Loss = 0.0005, Cls Loss = 0.0384, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0425, Reg Loss = 7.9970, Reconstruct Loss = 0.0005, Cls Loss = 0.0412, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0425, Reg Loss = 7.9561, Reconstruct Loss = 0.0005, Cls Loss = 0.0412, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0430, Reg Loss = 7.9147, Reconstruct Loss = 0.0005, Cls Loss = 0.0416, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0425, Reg Loss = 7.8937, Reconstruct Loss = 0.0006, Cls Loss = 0.0412, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0430, Reg Loss = 7.9173, Reconstruct Loss = 0.0006, C

100%|██████████| 157/157 [00:03<00:00, 44.94it/s]


Epoch [68/100], Validation Loss: 1.9620, Validation Accuracy: 67.63%



Iteration 0: Loss = 0.0689, Reg Loss = 7.3738, Reconstruct Loss = 0.0000, Cls Loss = 0.0682, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0416, Reg Loss = 7.5721, Reconstruct Loss = 0.0000, Cls Loss = 0.0409, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0441, Reg Loss = 7.6408, Reconstruct Loss = 0.0003, Cls Loss = 0.0430, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0430, Reg Loss = 7.7955, Reconstruct Loss = 0.0003, Cls Loss = 0.0419, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0444, Reg Loss = 7.9210, Reconstruct Loss = 0.0004, Cls Loss = 0.0432, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0434, Reg Loss = 7.9344, Reconstruct Loss = 0.0004, Cls Loss = 0.0423, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0427, Reg Loss = 7.9524, Reconstruct Loss = 0.0003, Cls Loss = 0.0416, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0423, Reg Loss = 7.9300, Reconstruct Loss = 0.0003, C

100%|██████████| 157/157 [00:03<00:00, 44.92it/s]


Epoch [69/100], Validation Loss: 1.6436, Validation Accuracy: 71.60%



Checkpoint saved at epoch 68 with accuracy: 71.60%
Iteration 0: Loss = 0.0310, Reg Loss = 8.1785, Reconstruct Loss = 0.0000, Cls Loss = 0.0301, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0383, Reg Loss = 8.1830, Reconstruct Loss = 0.0000, Cls Loss = 0.0375, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0407, Reg Loss = 8.2077, Reconstruct Loss = 0.0005, Cls Loss = 0.0394, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0394, Reg Loss = 8.1567, Reconstruct Loss = 0.0005, Cls Loss = 0.0380, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0397, Reg Loss = 8.1062, Reconstruct Loss = 0.0006, Cls Loss = 0.0382, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0397, Reg Loss = 8.0654, Reconstruct Loss = 0.0006, Cls Loss = 0.0383, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0400, Reg Loss = 8.0318, Reconstruct Loss = 0.0005, Cls Loss = 0.0387, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.04

100%|██████████| 157/157 [00:03<00:00, 45.33it/s]


Epoch [70/100], Validation Loss: 4.3978, Validation Accuracy: 51.70%



Iteration 0: Loss = 0.0223, Reg Loss = 7.8588, Reconstruct Loss = 0.0000, Cls Loss = 0.0215, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0432, Reg Loss = 7.8713, Reconstruct Loss = 0.0006, Cls Loss = 0.0419, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0434, Reg Loss = 8.0407, Reconstruct Loss = 0.0009, Cls Loss = 0.0418, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0448, Reg Loss = 7.9919, Reconstruct Loss = 0.0007, Cls Loss = 0.0434, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0441, Reg Loss = 8.0039, Reconstruct Loss = 0.0007, Cls Loss = 0.0426, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0447, Reg Loss = 8.0433, Reconstruct Loss = 0.0007, Cls Loss = 0.0433, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0437, Reg Loss = 8.0928, Reconstruct Loss = 0.0008, Cls Loss = 0.0422, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0443, Reg Loss = 8.1462, Reconstruct Loss = 0.0007, C

100%|██████████| 157/157 [00:03<00:00, 45.47it/s]


Epoch [71/100], Validation Loss: 4.9192, Validation Accuracy: 47.28%



Iteration 0: Loss = 0.0295, Reg Loss = 7.8543, Reconstruct Loss = 0.0000, Cls Loss = 0.0287, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0415, Reg Loss = 8.2216, Reconstruct Loss = 0.0014, Cls Loss = 0.0393, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0426, Reg Loss = 8.1918, Reconstruct Loss = 0.0012, Cls Loss = 0.0405, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0463, Reg Loss = 8.2549, Reconstruct Loss = 0.0010, Cls Loss = 0.0445, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0443, Reg Loss = 8.2412, Reconstruct Loss = 0.0008, Cls Loss = 0.0427, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0452, Reg Loss = 8.1835, Reconstruct Loss = 0.0009, Cls Loss = 0.0435, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0450, Reg Loss = 8.1842, Reconstruct Loss = 0.0009, Cls Loss = 0.0433, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0451, Reg Loss = 8.2620, Reconstruct Loss = 0.0008, C

100%|██████████| 157/157 [00:03<00:00, 45.05it/s]


Epoch [72/100], Validation Loss: 3.2069, Validation Accuracy: 56.45%



Iteration 0: Loss = 0.0650, Reg Loss = 8.0606, Reconstruct Loss = 0.0000, Cls Loss = 0.0642, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0462, Reg Loss = 7.9919, Reconstruct Loss = 0.0006, Cls Loss = 0.0448, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0445, Reg Loss = 7.9917, Reconstruct Loss = 0.0007, Cls Loss = 0.0430, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0447, Reg Loss = 8.0349, Reconstruct Loss = 0.0008, Cls Loss = 0.0431, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0463, Reg Loss = 8.1120, Reconstruct Loss = 0.0008, Cls Loss = 0.0447, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0444, Reg Loss = 8.1173, Reconstruct Loss = 0.0007, Cls Loss = 0.0428, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0435, Reg Loss = 8.1164, Reconstruct Loss = 0.0007, Cls Loss = 0.0420, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0433, Reg Loss = 8.0877, Reconstruct Loss = 0.0007, C

100%|██████████| 157/157 [00:03<00:00, 42.48it/s]


Epoch [73/100], Validation Loss: 2.7644, Validation Accuracy: 61.25%



Iteration 0: Loss = 0.0234, Reg Loss = 8.1467, Reconstruct Loss = 0.0000, Cls Loss = 0.0226, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0444, Reg Loss = 8.0619, Reconstruct Loss = 0.0005, Cls Loss = 0.0431, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0437, Reg Loss = 8.2398, Reconstruct Loss = 0.0005, Cls Loss = 0.0424, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0433, Reg Loss = 8.3804, Reconstruct Loss = 0.0007, Cls Loss = 0.0418, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0443, Reg Loss = 8.3200, Reconstruct Loss = 0.0006, Cls Loss = 0.0429, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0438, Reg Loss = 8.2000, Reconstruct Loss = 0.0006, Cls Loss = 0.0424, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0451, Reg Loss = 8.1402, Reconstruct Loss = 0.0007, Cls Loss = 0.0436, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0449, Reg Loss = 8.1336, Reconstruct Loss = 0.0006, C

100%|██████████| 157/157 [00:03<00:00, 45.53it/s]


Epoch [74/100], Validation Loss: 3.2053, Validation Accuracy: 57.37%



Iteration 0: Loss = 0.0298, Reg Loss = 8.2724, Reconstruct Loss = 0.0000, Cls Loss = 0.0290, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0431, Reg Loss = 7.9788, Reconstruct Loss = 0.0002, Cls Loss = 0.0421, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0430, Reg Loss = 8.0458, Reconstruct Loss = 0.0006, Cls Loss = 0.0416, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0416, Reg Loss = 7.9480, Reconstruct Loss = 0.0007, Cls Loss = 0.0402, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0430, Reg Loss = 8.0226, Reconstruct Loss = 0.0007, Cls Loss = 0.0416, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0430, Reg Loss = 8.0315, Reconstruct Loss = 0.0007, Cls Loss = 0.0415, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0438, Reg Loss = 8.0020, Reconstruct Loss = 0.0006, Cls Loss = 0.0424, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0433, Reg Loss = 8.0517, Reconstruct Loss = 0.0006, C

100%|██████████| 157/157 [00:03<00:00, 45.18it/s]


Epoch [75/100], Validation Loss: 2.1679, Validation Accuracy: 67.22%



Iteration 0: Loss = 0.0386, Reg Loss = 7.4488, Reconstruct Loss = 0.0000, Cls Loss = 0.0378, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0523, Reg Loss = 7.9471, Reconstruct Loss = 0.0002, Cls Loss = 0.0513, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0479, Reg Loss = 7.7663, Reconstruct Loss = 0.0004, Cls Loss = 0.0467, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0438, Reg Loss = 7.7455, Reconstruct Loss = 0.0004, Cls Loss = 0.0427, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0439, Reg Loss = 7.7207, Reconstruct Loss = 0.0004, Cls Loss = 0.0427, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0437, Reg Loss = 7.7254, Reconstruct Loss = 0.0004, Cls Loss = 0.0425, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0435, Reg Loss = 7.7887, Reconstruct Loss = 0.0004, Cls Loss = 0.0423, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0446, Reg Loss = 7.8195, Reconstruct Loss = 0.0005, C

100%|██████████| 157/157 [00:03<00:00, 44.98it/s]


Epoch [76/100], Validation Loss: 3.9991, Validation Accuracy: 53.01%



Iteration 0: Loss = 0.0835, Reg Loss = 8.0422, Reconstruct Loss = 0.0000, Cls Loss = 0.0827, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0392, Reg Loss = 8.1849, Reconstruct Loss = 0.0002, Cls Loss = 0.0382, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0400, Reg Loss = 8.1739, Reconstruct Loss = 0.0002, Cls Loss = 0.0389, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0417, Reg Loss = 8.1870, Reconstruct Loss = 0.0003, Cls Loss = 0.0405, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0416, Reg Loss = 8.1738, Reconstruct Loss = 0.0005, Cls Loss = 0.0403, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0440, Reg Loss = 8.1237, Reconstruct Loss = 0.0005, Cls Loss = 0.0427, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0447, Reg Loss = 8.0985, Reconstruct Loss = 0.0005, Cls Loss = 0.0435, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0452, Reg Loss = 8.0869, Reconstruct Loss = 0.0005, C

100%|██████████| 157/157 [00:03<00:00, 45.25it/s]


Epoch [77/100], Validation Loss: 5.9603, Validation Accuracy: 43.48%



Iteration 0: Loss = 0.0152, Reg Loss = 8.1522, Reconstruct Loss = 0.0000, Cls Loss = 0.0143, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0406, Reg Loss = 8.3365, Reconstruct Loss = 0.0000, Cls Loss = 0.0398, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0407, Reg Loss = 8.3623, Reconstruct Loss = 0.0004, Cls Loss = 0.0395, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0416, Reg Loss = 8.3726, Reconstruct Loss = 0.0006, Cls Loss = 0.0402, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0411, Reg Loss = 8.3813, Reconstruct Loss = 0.0005, Cls Loss = 0.0398, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0408, Reg Loss = 8.3909, Reconstruct Loss = 0.0006, Cls Loss = 0.0394, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0408, Reg Loss = 8.3332, Reconstruct Loss = 0.0005, Cls Loss = 0.0395, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0420, Reg Loss = 8.3074, Reconstruct Loss = 0.0005, C

100%|██████████| 157/157 [00:03<00:00, 45.04it/s]


Epoch [78/100], Validation Loss: 7.1895, Validation Accuracy: 37.38%



Iteration 0: Loss = 0.0997, Reg Loss = 7.7986, Reconstruct Loss = 0.0000, Cls Loss = 0.0989, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0438, Reg Loss = 8.2770, Reconstruct Loss = 0.0007, Cls Loss = 0.0423, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0446, Reg Loss = 8.1826, Reconstruct Loss = 0.0006, Cls Loss = 0.0432, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0458, Reg Loss = 8.0374, Reconstruct Loss = 0.0005, Cls Loss = 0.0445, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0453, Reg Loss = 7.9969, Reconstruct Loss = 0.0006, Cls Loss = 0.0440, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0440, Reg Loss = 7.9788, Reconstruct Loss = 0.0005, Cls Loss = 0.0427, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0443, Reg Loss = 7.9557, Reconstruct Loss = 0.0005, Cls Loss = 0.0430, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0442, Reg Loss = 7.9671, Reconstruct Loss = 0.0005, C

100%|██████████| 157/157 [00:03<00:00, 45.44it/s]


Epoch [79/100], Validation Loss: 3.6477, Validation Accuracy: 50.67%



Iteration 0: Loss = 0.0323, Reg Loss = 7.5339, Reconstruct Loss = 0.0000, Cls Loss = 0.0315, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0369, Reg Loss = 7.6449, Reconstruct Loss = 0.0005, Cls Loss = 0.0356, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0384, Reg Loss = 7.6718, Reconstruct Loss = 0.0005, Cls Loss = 0.0372, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0387, Reg Loss = 7.6626, Reconstruct Loss = 0.0004, Cls Loss = 0.0375, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0402, Reg Loss = 7.7162, Reconstruct Loss = 0.0005, Cls Loss = 0.0389, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0409, Reg Loss = 7.7339, Reconstruct Loss = 0.0005, Cls Loss = 0.0396, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0424, Reg Loss = 7.8057, Reconstruct Loss = 0.0005, Cls Loss = 0.0411, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0425, Reg Loss = 7.8302, Reconstruct Loss = 0.0006, C

100%|██████████| 157/157 [00:03<00:00, 44.93it/s]


Epoch [80/100], Validation Loss: 4.1080, Validation Accuracy: 50.22%



Iteration 0: Loss = 0.0456, Reg Loss = 7.8536, Reconstruct Loss = 0.0000, Cls Loss = 0.0448, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0468, Reg Loss = 8.0386, Reconstruct Loss = 0.0010, Cls Loss = 0.0451, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0445, Reg Loss = 8.0727, Reconstruct Loss = 0.0006, Cls Loss = 0.0431, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0431, Reg Loss = 8.0312, Reconstruct Loss = 0.0005, Cls Loss = 0.0418, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0437, Reg Loss = 7.9957, Reconstruct Loss = 0.0006, Cls Loss = 0.0423, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0433, Reg Loss = 7.9605, Reconstruct Loss = 0.0005, Cls Loss = 0.0420, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0430, Reg Loss = 7.9718, Reconstruct Loss = 0.0005, Cls Loss = 0.0417, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0432, Reg Loss = 7.9522, Reconstruct Loss = 0.0005, C

100%|██████████| 157/157 [00:03<00:00, 45.25it/s]


Epoch [81/100], Validation Loss: 1.7672, Validation Accuracy: 68.46%



Iteration 0: Loss = 0.0657, Reg Loss = 7.7986, Reconstruct Loss = 0.0000, Cls Loss = 0.0649, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0423, Reg Loss = 8.1141, Reconstruct Loss = 0.0012, Cls Loss = 0.0403, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0452, Reg Loss = 8.0872, Reconstruct Loss = 0.0009, Cls Loss = 0.0435, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0464, Reg Loss = 7.9044, Reconstruct Loss = 0.0007, Cls Loss = 0.0449, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0444, Reg Loss = 7.8183, Reconstruct Loss = 0.0006, Cls Loss = 0.0430, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0440, Reg Loss = 7.7900, Reconstruct Loss = 0.0005, Cls Loss = 0.0428, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0432, Reg Loss = 7.7877, Reconstruct Loss = 0.0004, Cls Loss = 0.0420, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0441, Reg Loss = 7.8177, Reconstruct Loss = 0.0004, C

100%|██████████| 157/157 [00:03<00:00, 44.92it/s]


Epoch [82/100], Validation Loss: 2.6979, Validation Accuracy: 61.59%



Iteration 0: Loss = 0.1194, Reg Loss = 8.3093, Reconstruct Loss = 0.0000, Cls Loss = 0.1186, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0418, Reg Loss = 7.7981, Reconstruct Loss = 0.0000, Cls Loss = 0.0411, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0421, Reg Loss = 7.8885, Reconstruct Loss = 0.0003, Cls Loss = 0.0410, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0412, Reg Loss = 7.8375, Reconstruct Loss = 0.0004, Cls Loss = 0.0400, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0425, Reg Loss = 7.9006, Reconstruct Loss = 0.0004, Cls Loss = 0.0413, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0436, Reg Loss = 7.9324, Reconstruct Loss = 0.0005, Cls Loss = 0.0423, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0434, Reg Loss = 7.9326, Reconstruct Loss = 0.0005, Cls Loss = 0.0421, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0430, Reg Loss = 7.9592, Reconstruct Loss = 0.0006, C

100%|██████████| 157/157 [00:03<00:00, 45.57it/s]


Epoch [83/100], Validation Loss: 2.2268, Validation Accuracy: 64.41%



Iteration 0: Loss = 0.0157, Reg Loss = 8.1173, Reconstruct Loss = 0.0000, Cls Loss = 0.0149, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0504, Reg Loss = 8.4413, Reconstruct Loss = 0.0006, Cls Loss = 0.0489, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0462, Reg Loss = 8.3528, Reconstruct Loss = 0.0004, Cls Loss = 0.0449, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0480, Reg Loss = 8.3032, Reconstruct Loss = 0.0003, Cls Loss = 0.0469, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0451, Reg Loss = 8.3107, Reconstruct Loss = 0.0006, Cls Loss = 0.0436, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0445, Reg Loss = 8.2112, Reconstruct Loss = 0.0006, Cls Loss = 0.0432, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0434, Reg Loss = 8.1279, Reconstruct Loss = 0.0005, Cls Loss = 0.0421, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0431, Reg Loss = 8.0780, Reconstruct Loss = 0.0004, C

100%|██████████| 157/157 [00:03<00:00, 45.25it/s]


Epoch [84/100], Validation Loss: 1.9145, Validation Accuracy: 68.53%



Iteration 0: Loss = 0.0852, Reg Loss = 7.8163, Reconstruct Loss = 0.0000, Cls Loss = 0.0844, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0391, Reg Loss = 7.8429, Reconstruct Loss = 0.0000, Cls Loss = 0.0383, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0431, Reg Loss = 8.0233, Reconstruct Loss = 0.0008, Cls Loss = 0.0415, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0424, Reg Loss = 8.0559, Reconstruct Loss = 0.0010, Cls Loss = 0.0407, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0396, Reg Loss = 8.0885, Reconstruct Loss = 0.0008, Cls Loss = 0.0380, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0419, Reg Loss = 8.0558, Reconstruct Loss = 0.0008, Cls Loss = 0.0403, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0430, Reg Loss = 8.0279, Reconstruct Loss = 0.0007, Cls Loss = 0.0415, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0431, Reg Loss = 8.0342, Reconstruct Loss = 0.0006, C

100%|██████████| 157/157 [00:03<00:00, 45.41it/s]


Epoch [85/100], Validation Loss: 2.4159, Validation Accuracy: 63.63%



Iteration 0: Loss = 0.0074, Reg Loss = 7.7248, Reconstruct Loss = 0.0000, Cls Loss = 0.0066, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0359, Reg Loss = 8.1588, Reconstruct Loss = 0.0009, Cls Loss = 0.0342, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0368, Reg Loss = 8.1276, Reconstruct Loss = 0.0009, Cls Loss = 0.0350, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0392, Reg Loss = 8.0342, Reconstruct Loss = 0.0007, Cls Loss = 0.0376, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0392, Reg Loss = 8.0711, Reconstruct Loss = 0.0008, Cls Loss = 0.0376, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0410, Reg Loss = 8.0709, Reconstruct Loss = 0.0008, Cls Loss = 0.0395, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0417, Reg Loss = 8.0728, Reconstruct Loss = 0.0007, Cls Loss = 0.0401, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0421, Reg Loss = 8.0922, Reconstruct Loss = 0.0008, C

100%|██████████| 157/157 [00:03<00:00, 45.25it/s]


Epoch [86/100], Validation Loss: 3.4974, Validation Accuracy: 55.97%



Iteration 0: Loss = 0.0307, Reg Loss = 8.4204, Reconstruct Loss = 0.0000, Cls Loss = 0.0299, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0432, Reg Loss = 8.1913, Reconstruct Loss = 0.0002, Cls Loss = 0.0422, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0410, Reg Loss = 8.1170, Reconstruct Loss = 0.0006, Cls Loss = 0.0396, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0412, Reg Loss = 8.1500, Reconstruct Loss = 0.0006, Cls Loss = 0.0398, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0416, Reg Loss = 8.2164, Reconstruct Loss = 0.0005, Cls Loss = 0.0402, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0435, Reg Loss = 8.2060, Reconstruct Loss = 0.0005, Cls Loss = 0.0422, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0425, Reg Loss = 8.1468, Reconstruct Loss = 0.0005, Cls Loss = 0.0412, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0420, Reg Loss = 8.1354, Reconstruct Loss = 0.0004, C

100%|██████████| 157/157 [00:03<00:00, 45.06it/s]


Epoch [87/100], Validation Loss: 2.0604, Validation Accuracy: 66.13%



Iteration 0: Loss = 0.0203, Reg Loss = 7.7876, Reconstruct Loss = 0.0000, Cls Loss = 0.0195, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0447, Reg Loss = 7.9025, Reconstruct Loss = 0.0004, Cls Loss = 0.0436, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0476, Reg Loss = 8.0018, Reconstruct Loss = 0.0003, Cls Loss = 0.0465, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0470, Reg Loss = 8.0165, Reconstruct Loss = 0.0006, Cls Loss = 0.0456, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0452, Reg Loss = 8.0370, Reconstruct Loss = 0.0006, Cls Loss = 0.0438, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0453, Reg Loss = 8.0519, Reconstruct Loss = 0.0007, Cls Loss = 0.0438, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0464, Reg Loss = 8.0081, Reconstruct Loss = 0.0006, Cls Loss = 0.0450, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0458, Reg Loss = 8.0073, Reconstruct Loss = 0.0006, C

100%|██████████| 157/157 [00:03<00:00, 45.14it/s]


Epoch [88/100], Validation Loss: 3.8913, Validation Accuracy: 52.64%



Iteration 0: Loss = 0.0710, Reg Loss = 9.8477, Reconstruct Loss = 0.0235, Cls Loss = 0.0465, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0446, Reg Loss = 7.9891, Reconstruct Loss = 0.0022, Cls Loss = 0.0417, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0403, Reg Loss = 7.8726, Reconstruct Loss = 0.0014, Cls Loss = 0.0381, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0400, Reg Loss = 7.8807, Reconstruct Loss = 0.0013, Cls Loss = 0.0379, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0385, Reg Loss = 7.8466, Reconstruct Loss = 0.0010, Cls Loss = 0.0367, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0392, Reg Loss = 7.8560, Reconstruct Loss = 0.0009, Cls Loss = 0.0375, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0400, Reg Loss = 7.9116, Reconstruct Loss = 0.0009, Cls Loss = 0.0383, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0409, Reg Loss = 7.9342, Reconstruct Loss = 0.0007, C

100%|██████████| 157/157 [00:03<00:00, 45.43it/s]


Epoch [89/100], Validation Loss: 2.5653, Validation Accuracy: 65.09%



Iteration 0: Loss = 0.0650, Reg Loss = 8.5211, Reconstruct Loss = 0.0000, Cls Loss = 0.0642, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0465, Reg Loss = 8.3424, Reconstruct Loss = 0.0002, Cls Loss = 0.0454, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0454, Reg Loss = 8.3535, Reconstruct Loss = 0.0006, Cls Loss = 0.0439, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0427, Reg Loss = 8.1816, Reconstruct Loss = 0.0004, Cls Loss = 0.0415, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0414, Reg Loss = 8.1467, Reconstruct Loss = 0.0004, Cls Loss = 0.0402, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0426, Reg Loss = 8.0392, Reconstruct Loss = 0.0004, Cls Loss = 0.0414, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0422, Reg Loss = 7.9619, Reconstruct Loss = 0.0004, Cls Loss = 0.0409, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0421, Reg Loss = 7.9527, Reconstruct Loss = 0.0004, C

100%|██████████| 157/157 [00:03<00:00, 45.04it/s]


Epoch [90/100], Validation Loss: 6.0767, Validation Accuracy: 42.44%



Iteration 0: Loss = 0.0096, Reg Loss = 7.1565, Reconstruct Loss = 0.0000, Cls Loss = 0.0089, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0356, Reg Loss = 7.4596, Reconstruct Loss = 0.0004, Cls Loss = 0.0344, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0407, Reg Loss = 7.4901, Reconstruct Loss = 0.0002, Cls Loss = 0.0397, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0403, Reg Loss = 7.5287, Reconstruct Loss = 0.0004, Cls Loss = 0.0392, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0413, Reg Loss = 7.6842, Reconstruct Loss = 0.0007, Cls Loss = 0.0399, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0421, Reg Loss = 7.7054, Reconstruct Loss = 0.0006, Cls Loss = 0.0407, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0421, Reg Loss = 7.6782, Reconstruct Loss = 0.0005, Cls Loss = 0.0408, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0415, Reg Loss = 7.6833, Reconstruct Loss = 0.0005, C

100%|██████████| 157/157 [00:03<00:00, 45.66it/s]


Epoch [91/100], Validation Loss: 2.9853, Validation Accuracy: 57.80%



Iteration 0: Loss = 0.0488, Reg Loss = 7.7258, Reconstruct Loss = 0.0000, Cls Loss = 0.0481, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0416, Reg Loss = 7.8493, Reconstruct Loss = 0.0005, Cls Loss = 0.0403, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0384, Reg Loss = 7.8477, Reconstruct Loss = 0.0005, Cls Loss = 0.0371, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0383, Reg Loss = 7.6688, Reconstruct Loss = 0.0004, Cls Loss = 0.0371, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0386, Reg Loss = 7.6461, Reconstruct Loss = 0.0005, Cls Loss = 0.0373, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0399, Reg Loss = 7.6636, Reconstruct Loss = 0.0004, Cls Loss = 0.0387, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0394, Reg Loss = 7.6920, Reconstruct Loss = 0.0004, Cls Loss = 0.0382, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0396, Reg Loss = 7.6847, Reconstruct Loss = 0.0004, C

100%|██████████| 157/157 [00:03<00:00, 44.79it/s]


Epoch [92/100], Validation Loss: 1.8608, Validation Accuracy: 67.91%



Iteration 0: Loss = 0.0173, Reg Loss = 7.3572, Reconstruct Loss = 0.0000, Cls Loss = 0.0166, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0458, Reg Loss = 7.8117, Reconstruct Loss = 0.0010, Cls Loss = 0.0441, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0428, Reg Loss = 7.7489, Reconstruct Loss = 0.0006, Cls Loss = 0.0414, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0437, Reg Loss = 7.7637, Reconstruct Loss = 0.0007, Cls Loss = 0.0422, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0442, Reg Loss = 7.8149, Reconstruct Loss = 0.0007, Cls Loss = 0.0427, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0446, Reg Loss = 7.7982, Reconstruct Loss = 0.0007, Cls Loss = 0.0432, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0434, Reg Loss = 7.8383, Reconstruct Loss = 0.0006, Cls Loss = 0.0420, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0434, Reg Loss = 7.9050, Reconstruct Loss = 0.0006, C

100%|██████████| 157/157 [00:03<00:00, 45.25it/s]


Epoch [93/100], Validation Loss: 4.2751, Validation Accuracy: 49.93%



Iteration 0: Loss = 0.0278, Reg Loss = 7.2487, Reconstruct Loss = 0.0000, Cls Loss = 0.0271, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0369, Reg Loss = 7.9351, Reconstruct Loss = 0.0010, Cls Loss = 0.0351, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0416, Reg Loss = 7.8817, Reconstruct Loss = 0.0008, Cls Loss = 0.0401, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0418, Reg Loss = 7.8827, Reconstruct Loss = 0.0007, Cls Loss = 0.0402, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0411, Reg Loss = 7.8435, Reconstruct Loss = 0.0007, Cls Loss = 0.0396, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0425, Reg Loss = 7.8447, Reconstruct Loss = 0.0006, Cls Loss = 0.0410, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0425, Reg Loss = 7.8216, Reconstruct Loss = 0.0006, Cls Loss = 0.0410, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0430, Reg Loss = 7.8081, Reconstruct Loss = 0.0006, C

100%|██████████| 157/157 [00:03<00:00, 44.98it/s]


Epoch [94/100], Validation Loss: 4.5663, Validation Accuracy: 46.80%



Iteration 0: Loss = 0.1088, Reg Loss = 7.5102, Reconstruct Loss = 0.0000, Cls Loss = 0.1080, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0391, Reg Loss = 7.7116, Reconstruct Loss = 0.0002, Cls Loss = 0.0382, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0393, Reg Loss = 7.7606, Reconstruct Loss = 0.0006, Cls Loss = 0.0380, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0424, Reg Loss = 7.8512, Reconstruct Loss = 0.0007, Cls Loss = 0.0409, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0430, Reg Loss = 7.8641, Reconstruct Loss = 0.0008, Cls Loss = 0.0415, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0438, Reg Loss = 7.8636, Reconstruct Loss = 0.0008, Cls Loss = 0.0422, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0435, Reg Loss = 7.8257, Reconstruct Loss = 0.0007, Cls Loss = 0.0420, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0420, Reg Loss = 7.7882, Reconstruct Loss = 0.0006, C

100%|██████████| 157/157 [00:03<00:00, 44.82it/s]


Epoch [95/100], Validation Loss: 1.0078, Validation Accuracy: 76.72%



Checkpoint saved at epoch 94 with accuracy: 76.72%
Iteration 0: Loss = 0.0256, Reg Loss = 8.0805, Reconstruct Loss = 0.0000, Cls Loss = 0.0248, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0436, Reg Loss = 7.7323, Reconstruct Loss = 0.0000, Cls Loss = 0.0429, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0434, Reg Loss = 7.7918, Reconstruct Loss = 0.0002, Cls Loss = 0.0424, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0425, Reg Loss = 7.7992, Reconstruct Loss = 0.0003, Cls Loss = 0.0414, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0419, Reg Loss = 7.7554, Reconstruct Loss = 0.0005, Cls Loss = 0.0407, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0413, Reg Loss = 7.7365, Reconstruct Loss = 0.0006, Cls Loss = 0.0400, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0411, Reg Loss = 7.7404, Reconstruct Loss = 0.0006, Cls Loss = 0.0397, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.04

100%|██████████| 157/157 [00:03<00:00, 44.83it/s]


Epoch [96/100], Validation Loss: 5.9543, Validation Accuracy: 39.70%



Iteration 0: Loss = 0.0352, Reg Loss = 8.2207, Reconstruct Loss = 0.0000, Cls Loss = 0.0343, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0390, Reg Loss = 8.1518, Reconstruct Loss = 0.0012, Cls Loss = 0.0370, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0381, Reg Loss = 7.9533, Reconstruct Loss = 0.0007, Cls Loss = 0.0366, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0380, Reg Loss = 7.9602, Reconstruct Loss = 0.0006, Cls Loss = 0.0365, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0381, Reg Loss = 7.8662, Reconstruct Loss = 0.0005, Cls Loss = 0.0368, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0387, Reg Loss = 7.8398, Reconstruct Loss = 0.0005, Cls Loss = 0.0374, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0397, Reg Loss = 7.8856, Reconstruct Loss = 0.0005, Cls Loss = 0.0383, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0396, Reg Loss = 7.9066, Reconstruct Loss = 0.0006, C

100%|██████████| 157/157 [00:03<00:00, 44.91it/s]


Epoch [97/100], Validation Loss: 3.5738, Validation Accuracy: 55.11%



Iteration 0: Loss = 0.0416, Reg Loss = 8.1364, Reconstruct Loss = 0.0000, Cls Loss = 0.0408, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0410, Reg Loss = 8.0947, Reconstruct Loss = 0.0006, Cls Loss = 0.0397, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0435, Reg Loss = 7.9915, Reconstruct Loss = 0.0005, Cls Loss = 0.0422, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0410, Reg Loss = 8.0037, Reconstruct Loss = 0.0005, Cls Loss = 0.0397, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0415, Reg Loss = 8.0064, Reconstruct Loss = 0.0005, Cls Loss = 0.0402, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0423, Reg Loss = 8.0256, Reconstruct Loss = 0.0004, Cls Loss = 0.0411, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0417, Reg Loss = 8.0356, Reconstruct Loss = 0.0003, Cls Loss = 0.0405, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0414, Reg Loss = 8.0054, Reconstruct Loss = 0.0003, C

100%|██████████| 157/157 [00:03<00:00, 45.25it/s]


Epoch [98/100], Validation Loss: 1.9194, Validation Accuracy: 66.50%



Iteration 0: Loss = 0.0385, Reg Loss = 8.1310, Reconstruct Loss = 0.0000, Cls Loss = 0.0377, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0360, Reg Loss = 8.1450, Reconstruct Loss = 0.0006, Cls Loss = 0.0345, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0362, Reg Loss = 8.1697, Reconstruct Loss = 0.0006, Cls Loss = 0.0348, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0367, Reg Loss = 8.2303, Reconstruct Loss = 0.0007, Cls Loss = 0.0351, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0375, Reg Loss = 8.1436, Reconstruct Loss = 0.0006, Cls Loss = 0.0362, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0382, Reg Loss = 8.1300, Reconstruct Loss = 0.0005, Cls Loss = 0.0369, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0390, Reg Loss = 8.1467, Reconstruct Loss = 0.0006, Cls Loss = 0.0376, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0390, Reg Loss = 8.1410, Reconstruct Loss = 0.0006, C

100%|██████████| 157/157 [00:03<00:00, 45.00it/s]


Epoch [99/100], Validation Loss: 0.9585, Validation Accuracy: 80.11%



Checkpoint saved at epoch 98 with accuracy: 80.11%
Iteration 0: Loss = 0.0120, Reg Loss = 8.1267, Reconstruct Loss = 0.0000, Cls Loss = 0.0112, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0366, Reg Loss = 7.9520, Reconstruct Loss = 0.0006, Cls Loss = 0.0352, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0363, Reg Loss = 7.7419, Reconstruct Loss = 0.0005, Cls Loss = 0.0351, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0356, Reg Loss = 7.8346, Reconstruct Loss = 0.0006, Cls Loss = 0.0342, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0371, Reg Loss = 7.8252, Reconstruct Loss = 0.0005, Cls Loss = 0.0359, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0391, Reg Loss = 7.8316, Reconstruct Loss = 0.0005, Cls Loss = 0.0378, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0386, Reg Loss = 7.8599, Reconstruct Loss = 0.0005, Cls Loss = 0.0373, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.03

100%|██████████| 157/157 [00:03<00:00, 44.65it/s]

Epoch [100/100], Validation Loss: 1.0561, Validation Accuracy: 78.84%








In [39]:
wandb.finish()

0,1
Cls Loss,█▇▇▅▅▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss,██▅▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▃▁▁▁▁
Reconstruct Loss,█▅▅▁▅▅▅▄▄▅▁▁▂▁▃▂▃▂▂▂▂▃▂▂▂▂▂▁▂▂▃▃▂▂▂▃▇▃▂▃
Reg Loss,▃▁▂▂▂▆▆▆▆▇▆▇▇▆▇▇▇▇▇▇▇▇▇▇█▇████▇█▇▇██▇▇▇▇
Training accuracy,▁▂▃▄▅▇▇▇▇▇▇▇████████████████████████████
Validation Accuracy,▇▇▆▆▆▄▄▃▁▂▅▅▂▃▃▃▄▆▃▄▆▅▃▂▂▇▄▄▅▃▇▆▆▅▃▄█▃▅█
Validation Loss,▁▁▁▁▁▂▂▂▄▇▅▅█▅▂▂▃▄▃▃▂▂▄▄▃▁▃▃▂▂▄▁▂▂▂▄▁▃▁▁

0,1
Cls Loss,0.03999
Learning rate,0.001
Loss,0.04136
Reconstruct Loss,0.00058
Reg Loss,7.80665
Training accuracy,0.98698
Validation Accuracy,0.7884
Validation Loss,1.05606


### 7 Testing loop

In [40]:
saved_hypernet_path = args.training.save_model_path + '/cifar10_nerf_best.pth'

In [41]:
saved_hypernet_path

'toy/experiments_densenet/dense_5th_experiment/cifar10_nerf_best.pth'

In [42]:
hyper_model_test = get_hypernetwork(args, number_param)

Hyper model type: resmlp
Using scalar 0.1
num_freqs:  16 <class 'int'>


In [43]:
checkpoint = torch.load(saved_hypernet_path, map_location="cpu")  # or "cuda" if using GPU
hyper_model_test.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [44]:
for hidden_dim in range(12, 49):
    # Create a model for this given dimension
    model_trained = create_model(args.model.type,
                                 layers=args.model.layers,
                                 growth=args.model.growth,
                                 compression=args.model.compression,
                                 bottleneck=args.model.bottleneck,
                                 drop_rate=args.model.drop_rate,
                                 path=args.model.pretrained_path,
                                 hidden_dim=hidden_dim).to(device)
    
    # If EMA is specified, apply it
    if ema:
        print('Applying EMA')
        ema.apply()

    # Sample the merged model
    accumulated_model = sample_merge_model(hyper_model_test, model_trained, args, K=100)

    # Validate the merged model
    val_loss, acc = validate_single(accumulated_model, val_loader, val_criterion, args=args)

    # If EMA is specified, restore the original weights after applying EMA
    if ema:
        ema.restore()  # Restore the original weights after applying 
        
    # Save the model
    save_name = os.path.join(args.training.save_model_path, f"cifar10_{accumulated_model.__class__.__name__}_dim{hidden_dim}_single.pth")
    torch.save(accumulated_model.state_dict(),save_name)

    # Print the results
    print(f"Test using model {args.model}: hidden_dim {hidden_dim}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc*100:.2f}%")
    print('\n')

    # Define the directory and filename structure
    filename = f"cifar10_results_{args.experiment.name}.txt"
    filepath = os.path.join(args.training.save_model_path, filename)

    # Write the results. 'a' is used to append the results; a new file will be created if it doesn't exist.
    with open(filepath, "a") as file:
        file.write(f"Hidden_dim: {hidden_dim}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc*100:.2f}%\n")

Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 40.88it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 12, Validation Loss: 8.9631, Validation Accuracy: 36.18%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.50it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 13, Validation Loss: 5.7470, Validation Accuracy: 48.89%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.62it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 14, Validation Loss: 5.2920, Validation Accuracy: 49.75%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.82it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 15, Validation Loss: 9.0107, Validation Accuracy: 40.35%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.78it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 16, Validation Loss: 6.7997, Validation Accuracy: 38.42%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 42.75it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 17, Validation Loss: 6.6963, Validation Accuracy: 45.01%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.50it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 18, Validation Loss: 6.3075, Validation Accuracy: 48.19%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 42.54it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 19, Validation Loss: 8.3451, Validation Accuracy: 39.50%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.48it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 20, Validation Loss: 6.6597, Validation Accuracy: 42.52%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.40it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 21, Validation Loss: 5.9996, Validation Accuracy: 46.58%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.46it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 22, Validation Loss: 4.4435, Validation Accuracy: 53.80%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.69it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 23, Validation Loss: 7.2552, Validation Accuracy: 36.45%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.64it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 24, Validation Loss: 4.0813, Validation Accuracy: 52.04%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.45it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 25, Validation Loss: 3.1053, Validation Accuracy: 59.59%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.17it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 26, Validation Loss: 4.8966, Validation Accuracy: 49.71%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.07it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 27, Validation Loss: 4.8606, Validation Accuracy: 49.11%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 42.95it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 28, Validation Loss: 5.1713, Validation Accuracy: 45.46%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.47it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 29, Validation Loss: 3.6315, Validation Accuracy: 57.69%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 41.63it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 30, Validation Loss: 7.7839, Validation Accuracy: 39.87%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.76it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 31, Validation Loss: 3.7276, Validation Accuracy: 55.02%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.63it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 32, Validation Loss: 2.6439, Validation Accuracy: 63.68%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.19it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 33, Validation Loss: 3.4007, Validation Accuracy: 56.20%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.05it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 34, Validation Loss: 4.6943, Validation Accuracy: 51.23%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.59it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 35, Validation Loss: 4.3670, Validation Accuracy: 51.49%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.46it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 36, Validation Loss: 5.9282, Validation Accuracy: 42.80%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 42.19it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 37, Validation Loss: 3.7025, Validation Accuracy: 57.22%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 41.92it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 38, Validation Loss: 3.4243, Validation Accuracy: 55.97%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.23it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 39, Validation Loss: 2.3177, Validation Accuracy: 64.95%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 42.70it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 40, Validation Loss: 3.0878, Validation Accuracy: 62.05%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.55it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 41, Validation Loss: 4.1285, Validation Accuracy: 52.51%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.47it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 42, Validation Loss: 6.6551, Validation Accuracy: 40.64%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.24it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 43, Validation Loss: 4.8393, Validation Accuracy: 48.58%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 44.26it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 44, Validation Loss: 4.1625, Validation Accuracy: 53.15%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.49it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 45, Validation Loss: 8.2548, Validation Accuracy: 35.45%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.65it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 46, Validation Loss: 5.0075, Validation Accuracy: 46.05%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.20it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 47, Validation Loss: 3.9082, Validation Accuracy: 55.31%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.16it/s]

Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 48, Validation Loss: 2.1870, Validation Accuracy: 67.27%





