## Import

In [34]:
import os
import random

In [35]:
import torch
import torch.nn as nn

import wandb

from sklearn.metrics import accuracy_score

In [36]:
from neumeta.models import create_densenet_model as create_model
from neumeta.utils import (
    parse_args, print_omegaconf,
    load_checkpoint, save_checkpoint,
    set_seed,
    get_dataset,
    sample_coordinates, sample_subset, shuffle_coordinates_all,
    get_hypernetwork, get_optimizer,
    sample_weights,
    weighted_regression_loss, validate_single, AverageMeter, EMA,
    sample_merge_model
)

## Functions

### Find max dimension of the model

In [37]:
def find_max_dim(model_cls):
    """Find maximum dimension of the model"""
    # Get the learnable parameters of the model
    checkpoint = model_cls.learnable_parameter 

    # Set the maximum value to the length of the checkpoint
    max_value = len(checkpoint)

    # Iterate over the new model's weight
    for i, (k, tensor) in enumerate(checkpoint.items()):
        # Handle 2D tensors (e.g., weight matrices) 
        if len(tensor.shape) == 4:
            coords = [tensor.shape[0], tensor.shape[1]]
            max_value = max(max_value, max(coords))
        # Handle 1D tensors (e.g., biases)
        elif len(tensor.shape) == 1:
            max_value = max(max_value, tensor.shape[0])
    
    return max_value

### Initialize wandb

In [38]:
def initialize_wandb(config):
    import time
    """
    Initializes Weights and Biases (wandb) with the given configuration.
    
    Args:
        configuration (dict): Configuration parameters for the run.
    """
    # Name the run using current time and configuration name
    run_name = f"{time.strftime('%Y%m%d%H%M%S')}-{config.experiment.name}"
    
    wandb.init(project="dense-inr-trial", name=run_name, config=dict(config), group='cifar10')

### Init model dictionary

In [39]:
def init_model_dict(args, device):
    """
    Initializes a dictionary of models for each dimension in the given range, along with ground truth models for the starting dimension.

    Args:
        args: An object containing the arguments for initializing the models.

    Returns:
        dim_dict: A dictionary containing the models for each dimension, along with their corresponding coordinates, keys, indices, size, and ground truth models.
        gt_model_dict: A dictionary containing the ground truth models for the starting dimension.
    """
    dim_dict = {}
    gt_model_dict = {}
    
    # Create a model for each dimension in dimensions range
    for dim in args.dimensions.range:
        model_cls = create_model(args.model.type,
                                 layers=args.model.layers,
                                 growth=args.model.growth,
                                 compression=args.model.compression,
                                 bottleneck=args.model.bottleneck,
                                 drop_rate=args.model.drop_rate,
                                 hidden_dim=dim,
                                 path=args.model.pretrained_path).to(device)
        # Sample the coordinates, keys, indices, and the size for the model
        coords_tensor, keys_list, indices_list, size_list = sample_coordinates(model_cls)
        # Add the model, coordinates, keys, indices, size, and key mask to the dictionary
        dim_dict[f"{dim}"] = (model_cls, coords_tensor, keys_list, indices_list, size_list, None)

        # Print to makes line better
        print('\n')
        
        # If the dimension is the starting dimension (the dimension of pretrained_model), add the ground truth model to the dictionary
        if dim == args.dimensions.start:
            print(f"Loading model for dim {dim}")
            model_trained = create_model(args.model.type,
                                         layers=args.model.layers,
                                         growth=args.model.growth,
                                         compression=args.model.compression,
                                         bottleneck=args.model.bottleneck,
                                         drop_rate=args.model.drop_rate,
                                         path=args.model.pretrained_path,
                                         smooth=True,
                                         hidden_dim=dim).to(device)
            model_trained.eval()
            gt_model_dict[f'{dim}'] = model_trained

    
    return dim_dict, gt_model_dict

### Training function

In [40]:
# Function to train the model for one epoch
def train_one_epoch(model, train_loader, optimizer, criterion, dim_dict, gt_model_dict, epoch_idx, ema=None, args=None, device='cpu'):
    # Set the model to training mode
    model.train()
    total_loss = 0.0

    # Initialize AverageMeter objects to track the losses
    losses = AverageMeter()
    cls_losses = AverageMeter()
    reg_losses = AverageMeter()
    reconstruct_losses = AverageMeter()

    # Training accuracy
    preds = []
    gt = []

    # Iterate over the training data
    for batch_idx, (x, target) in enumerate(train_loader):
        # Zero the gradients
        optimizer.zero_grad()

        # Preprocess input
        # ------------------------------------------------------------------------------------------------------
        # Move the data to the device
        x, target = x.to(device), target.to(device)
        # Choose a random hidden dimension
        hidden_dim = random.choice(args.dimensions.range)
        # Get the model class, coordinates, keys, indices, size, and key mask for the chosen dimension
        model_cls, coords_tensor, keys_list, indices_list, size_list, key_mask = dim_dict[f"{hidden_dim}"]
        # Sample a subset the input tensor of the coordinates, keys, indices, size, and selected keys
        coords_tensor, keys_list, indices_list, size_list, selected_keys = sample_subset(coords_tensor,
                                                                                         keys_list,
                                                                                         indices_list,
                                                                                         size_list,
                                                                                         key_mask,
                                                                                         ratio=args.ratio)
        # Add noise to the coordinates if specified
        if args.training.coordinate_noise > 0.0:
            coords_tensor = coords_tensor + (torch.rand_like(coords_tensor) - 0.5) * args.training.coordinate_noise


        # Main task of hypernetwork and target network
        # ------------------------------------------------------------------------------------------------------
        # Sample the weights for the target model using hypernetwork
        model_cls, reconstructed_weights = sample_weights(model, model_cls,
                                                          coords_tensor, keys_list, indices_list, size_list, key_mask, selected_keys,
                                                          device=device, NORM=args.dimensions.norm)
        # Forward pass
        predict = model_cls(x)
        
        # Sample test model to see training accuracy

        pred = torch.argmax(predict, dim=-1)

        preds.append(pred)
        gt.append(target)

        # Compute losses
        # ------------------------------------------------------------------------------------------------------
        # Compute classification loss
        cls_loss = criterion(predict, target) 
        # Compute regularization loss
        reg_loss = sum([torch.norm(w, p=2) for w in reconstructed_weights])
        # Compute reconstruction loss if ground truth model is available
        if f"{hidden_dim}" in gt_model_dict:
            gt_model = gt_model_dict[f"{hidden_dim}"]
            gt_selected_weights = [
                w for k, w in gt_model.learnable_parameter.items() if k in selected_keys]

            reconstruct_loss = weighted_regression_loss(
                reconstructed_weights, gt_selected_weights)
        else:
            reconstruct_loss = torch.tensor(0.0)
        # Compute the total loss
        loss = args.hyper_model.loss_weight.ce_weight * cls_loss + args.hyper_model.loss_weight.reg_weight * \
            reg_loss + args.hyper_model.loss_weight.recon_weight * reconstruct_loss


        # Compute gradients and update weights
        # ------------------------------------------------------------------------------------------------------
        # Zero the gradients of the updated weights
        for updated_weight in model_cls.parameters():
            updated_weight.grad = None

        # Compute the gradients of the reconstructed weights
        loss.backward(retain_graph=True)
        torch.autograd.backward(reconstructed_weights, [
                                w.grad for k, w in model_cls.named_parameters() if k in selected_keys])
        
        # Clip the gradients if specified
        if args.training.get('clip_grad', 0.0) > 0:
            torch.nn.utils.clip_grad_value_(
                model.parameters(), args.training.clip_grad)
            
        # Update the weights
        optimizer.step()

        # Update the EMA if specified
        if ema:
            ema.update()  # Update the EMA after each training step
        total_loss += loss.item()

        # Update the AverageMeter objects
        losses.update(loss.item())
        cls_losses.update(cls_loss.item())
        reg_losses.update(reg_loss.item())
        reconstruct_losses.update(reconstruct_loss.item())

        # Log (or plot) losses
        # ------------------------------------------------------------------------------------------------------
        # Log the losses and learning rate to wandb
        if batch_idx % args.experiment.log_interval == 0:
            wandb.log({
                "Loss": losses.avg,
                "Cls Loss": cls_losses.avg,
                "Reg Loss": reg_losses.avg,
                "Reconstruct Loss": reconstruct_losses.avg,
                "Learning rate": optimizer.param_groups[0]['lr']
            }, step=batch_idx + epoch_idx * len(train_loader))
            # Print the losses and learning rate
            print(
                f"Iteration {batch_idx}: Loss = {losses.avg:.4f}, Reg Loss = {reg_losses.avg:.4f}, Reconstruct Loss = {reconstruct_losses.avg:.4f}, Cls Loss = {cls_losses.avg:.4f}, Learning rate = {optimizer.param_groups[0]['lr']:.4e}")
    
    train_acc = accuracy_score(torch.cat(gt).cpu().numpy(), torch.cat(preds).cpu().numpy())

    wandb.log({
        "Training accuracy": train_acc
    })

    # Returns the training loss, structure of network in each dimension, and the original structure of pretrained network
    return losses.avg, dim_dict, gt_model_dict, train_acc

## Main

### 0 Set device to GPU

In [41]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

### 1 Parsing arguments for input

In [42]:
CONFIG_PATH = 'neumeta/config/densenet_inr_train/dense_27th_experiment.yaml'
RATIO = '1.0'
CHECKPOINT_PATH = 'toy/experiments_densenet/dense_27th_experiment/cifar10_nerf_best.pth'

In [43]:
argv_train = ['--config', CONFIG_PATH, '--ratio', RATIO, '--resume_from', CHECKPOINT_PATH]

In [44]:
args = parse_args(argv_train)  # Parse arguments
print_omegaconf(args)  # Print arguments

+--------------------------------------+------------------------------------------------------------------------------------------------------+
|                 Key                  |                                                Value                                                 |
+--------------------------------------+------------------------------------------------------------------------------------------------------+
|           experiment.name            |                                        dense_27th_experiment                                         |
|        experiment.num_epochs         |                                                 200                                                  |
|       experiment.log_interval        |                                                 100                                                  |
|       experiment.eval_interval       |                                                  1                                             

In [45]:
set_seed(args.experiment.seed)

Setting seed... 42 for reproducibility


### 2 Get training and validation dataloader

In [46]:
train_loader, val_loader = get_dataset('cifar10', args.training.batch_size, strong_transform=args.training.get('strong_aug', None))

Using dataset: cifar10 with batch size: 64 and strong transform: None


### 3 Create target model

#### 3.0 Create the model

In [47]:
model = create_model(args.model.type,
                     layers=args.model.layers,
                     growth=args.model.growth,
                     compression=args.model.compression,
                     bottleneck=args.model.bottleneck,
                     drop_rate=args.model.drop_rate,
                     hidden_dim=args.dimensions.start,
                     path=args.model.pretrained_path).to(device)

Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


#### 3.1 Print the structure and shape of the model

In [48]:
model

DenseNet3(
  (conv1): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (block1): DenseBlock(
    (layer): Sequential(
      (0): BottleneckBlock(
        (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv1): Conv2d(24, 48, kernel_size=(1, 1), stride=(1, 1))
        (bn2): Identity()
        (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (dropout): Dropout(p=0.0, inplace=False)
      )
      (1): BottleneckBlock(
        (bn1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv1): Conv2d(36, 48, kernel_size=(1, 1), stride=(1, 1))
        (bn2): Identity()
        (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (dropout): Dropout(p=0.0, inplace=False)
      )
      (2): BottleneckBlock(
        (bn1): Bat

In [49]:
for i, (k, tensor) in enumerate(model.learnable_parameter.items()):
    print(k, tensor.shape)

block3.layer.5.conv1.weight torch.Size([48, 120, 1, 1])
block3.layer.5.conv1.bias torch.Size([48])
block3.layer.5.conv2.weight torch.Size([12, 48, 3, 3])


In [50]:
# Print the maximum dimension of the model
print(f'Maximum DIM: {find_max_dim(model)}')

Maximum DIM: 120


#### 3.2 Validate the accuracy of pretrained model

In [51]:
# Validate the model for the starting dimension (its pretrained form)
val_loss, acc = validate_single(model, val_loader, nn.CrossEntropyLoss(), args=args)
print(f'Initial Permutated model Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc * 100:.2f}%')

100%|██████████| 157/157 [00:03<00:00, 42.12it/s]

Initial Permutated model Validation Loss: 0.3239, Validation Accuracy: 91.93%





In [52]:
checkpoint = model.learnable_parameter
number_param = len(checkpoint)

In [53]:
# Print the keys of the parameters and the number of parameters
print(f"Parameters keys: {model.keys}")
print(f"Number of parameters to be learned: {number_param}")

Parameters keys: ['block3.layer.5.conv1.weight', 'block3.layer.5.conv1.bias', 'block3.layer.5.conv2.weight']
Number of parameters to be learned: 3


### 4 Create hypernetwork

#### 4.0 Create the model

In [54]:
# Get the hypermodel
hyper_model = get_hypernetwork(args, number_param)

Hyper model type: resmlp
Using scalar 0.1
num_freqs:  16 <class 'int'>


#### 4.1 Print model structure

In [55]:
hyper_model

NeRF_ResMLP_Compose(
  (positional_encoding): PositionalEncoding()
  (model): ModuleList(
    (0-2): 3 x NeRF_MLP_Residual_Scaled(
      (initial_layer): Linear(in_features=198, out_features=256, bias=True)
      (residual_blocks): ModuleList(
        (0-6): 7 x Linear(in_features=256, out_features=256, bias=True)
      )
      (scalars): ParameterList(
          (0): Parameter containing: [torch.float32 of size  (cuda:0)]
          (1): Parameter containing: [torch.float32 of size  (cuda:0)]
          (2): Parameter containing: [torch.float32 of size  (cuda:0)]
          (3): Parameter containing: [torch.float32 of size  (cuda:0)]
          (4): Parameter containing: [torch.float32 of size  (cuda:0)]
          (5): Parameter containing: [torch.float32 of size  (cuda:0)]
          (6): Parameter containing: [torch.float32 of size  (cuda:0)]
      )
      (act): ReLU(inplace=True)
      (output_layer): Linear(in_features=256, out_features=9, bias=True)
    )
  )
)

#### 4.2 Initialize EMA to track only a smooth version of the model weight

In [56]:
# Initialize the EMA
ema = EMA(hyper_model, decay=args.hyper_model.ema_decay)

### 5 Get loss function, optimizer and scheduler

In [57]:
criterion, val_criterion, optimizer, scheduler = get_optimizer(args, hyper_model)

In [58]:
print(f'Criterion: {criterion}\nVal_criterion: {val_criterion}\nOptimizer: {optimizer}\nScheduler: {scheduler}')

Criterion: CrossEntropyLoss()
Val_criterion: CrossEntropyLoss()
Optimizer: AdamW (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    initial_lr: 0.001
    lr: 0.001
    maximize: False
    weight_decay: 0.01
)
Scheduler: <torch.optim.lr_scheduler.MultiStepLR object at 0x000002559E19F190>


### 6 Training loop

#### 6.1 Initialize training parameters

In [59]:
# Initialize the starting epoch and best accuracy
start_epoch = 0
best_acc = 0.0

#### 6.2 Directory to save the model

In [60]:
# Create the directory to save the model
os.makedirs(args.training.save_model_path, exist_ok=True)

#### 6.3 Resume training loop

In [61]:
args.resume_from

'toy/experiments_densenet/dense_27th_experiment/cifar10_nerf_best.pth'

In [62]:
# args.resume_from = False

In [63]:
if args.resume_from:
        print(f"Resuming from checkpoint: {args.resume_from}")
        checkpoint_info = load_checkpoint(args.resume_from, hyper_model, optimizer, ema)
        start_epoch = checkpoint_info['epoch']
        best_acc = checkpoint_info['best_acc']
        print(f"Resuming from epoch: {start_epoch}, best accuracy: {best_acc*100:.2f}%")
        # Note: If there are more elements to retrieve, do so here.

Resuming from checkpoint: toy/experiments_densenet/dense_27th_experiment/cifar10_nerf_best.pth
Resuming from epoch: 130, best accuracy: 70.22%


#### 6.4 Initialize model dictionary for each dimension and shuffle it

In [64]:
# Initialize model dictionary
dim_dict, gt_model_dict = init_model_dict(args, device)

Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/exper

In [65]:
gt_model_dict['48']

DenseNet3(
  (conv1): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (block1): DenseBlock(
    (layer): Sequential(
      (0): BottleneckBlock(
        (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv1): Conv2d(24, 48, kernel_size=(1, 1), stride=(1, 1))
        (bn2): Identity()
        (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (dropout): Dropout(p=0.0, inplace=False)
      )
      (1): BottleneckBlock(
        (bn1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv1): Conv2d(36, 48, kernel_size=(1, 1), stride=(1, 1))
        (bn2): Identity()
        (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (dropout): Dropout(p=0.0, inplace=False)
      )
      (2): BottleneckBlock(
        (bn1): Bat

In [66]:
# Validate the model for the starting dimension (its pretrained form)
val_loss, acc = validate_single(gt_model_dict['48'], val_loader, nn.CrossEntropyLoss(), args=args)
print(f'Initial Permutated model Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc * 100:.2f}%')

100%|██████████| 157/157 [00:03<00:00, 43.85it/s]

Initial Permutated model Validation Loss: 0.3239, Validation Accuracy: 91.92%





In [67]:
dim_dict

{'24': (DenseNet3(
    (conv1): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (block1): DenseBlock(
      (layer): Sequential(
        (0): BottleneckBlock(
          (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv1): Conv2d(24, 48, kernel_size=(1, 1), stride=(1, 1))
          (bn2): Identity()
          (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (dropout): Dropout(p=0.0, inplace=False)
        )
        (1): BottleneckBlock(
          (bn1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv1): Conv2d(36, 48, kernel_size=(1, 1), stride=(1, 1))
          (bn2): Identity()
          (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (dropout): Dropout(p=0.0, inplace=False)
        )


In [68]:
dim_dict = shuffle_coordinates_all(dim_dict)
dim_dict

{'24': (DenseNet3(
    (conv1): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (block1): DenseBlock(
      (layer): Sequential(
        (0): BottleneckBlock(
          (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv1): Conv2d(24, 48, kernel_size=(1, 1), stride=(1, 1))
          (bn2): Identity()
          (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (dropout): Dropout(p=0.0, inplace=False)
        )
        (1): BottleneckBlock(
          (bn1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv1): Conv2d(36, 48, kernel_size=(1, 1), stride=(1, 1))
          (bn2): Identity()
          (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (dropout): Dropout(p=0.0, inplace=False)
        )


#### 6.5 Initialize wandb for plotting

In [69]:
initialize_wandb(args)

[34m[1mwandb[0m: Currently logged in as: [33mefradosuryadi[0m ([33mefradosuryadi-universitas-indonesia[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


#### 6.6 Hypernetwork training loop

In [70]:
args.experiment.num_epochs

200

In [71]:
# Iterate over the epochs
for epoch in range(start_epoch, args.experiment.num_epochs):
    # Train the hypernetwork to generate a model with random dimension for one epoch
    train_loss, dim_dict, gt_model_dict, train_acc = train_one_epoch(hyper_model, train_loader, optimizer, criterion, 
                                                                     dim_dict, gt_model_dict, epoch_idx=epoch, ema=ema, 
                                                                     args=args, device=device)
    # Step the scheduler
    scheduler.step()

    # Print the training loss and learning rate
    print(f"Epoch [{epoch+1}/{args.experiment.num_epochs}], Training Loss: {train_loss:.4f}, Training Accuracy: {train_acc*100:.2f}, Learning Rate: {scheduler.get_last_lr()[0]:.6f}")

    # If it's time to evaluate the model
    if (epoch + 1) % args.experiment.eval_interval == 0:
        # Apply EMA if it is specified
        if ema:
            ema.apply()  # Save the weights of original model created before training_loop
        
        # Sample the merged model (create model of same structure before training loop by using the hypernetwork)
        # And then test the performance of the hypernetwork by seeing how good it is in generating the weights
        model = sample_merge_model(hyper_model, model, args) 
        # Validate the merged model
        val_loss, acc = validate_single(model, val_loader, val_criterion, args=args)

        # If EMA is specified, restore the original weights
        if ema:
            ema.restore()  # Restore the original weights to the weights of the pretrained networks

        # Log the validation loss and accuracy to wandb
        wandb.log({
            "Validation Loss": val_loss,
            "Validation Accuracy": acc
        })
        # Print the validation loss and accuracy
        print(f"Epoch [{epoch+1}/{args.experiment.num_epochs}], Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc*100:.2f}%")
        print('\n\n')

        # Save the checkpoint if the accuracy is better than the previous best
        if acc > best_acc:
            best_acc = acc
            save_checkpoint(f"{args.training.save_model_path}/cifar10_nerf_best.pth",hyper_model,optimizer,ema,epoch,best_acc)
            print(f"Checkpoint saved at epoch {epoch} with accuracy: {best_acc*100:.2f}%")


Iteration 0: Loss = 0.0296, Reg Loss = 7.9791, Reconstruct Loss = 0.0000, Cls Loss = 0.0288, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0342, Reg Loss = 7.8723, Reconstruct Loss = 0.0010, Cls Loss = 0.0324, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0370, Reg Loss = 7.9378, Reconstruct Loss = 0.0019, Cls Loss = 0.0344, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0355, Reg Loss = 7.9065, Reconstruct Loss = 0.0016, Cls Loss = 0.0332, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0344, Reg Loss = 7.8539, Reconstruct Loss = 0.0013, Cls Loss = 0.0324, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0348, Reg Loss = 7.8305, Reconstruct Loss = 0.0012, Cls Loss = 0.0328, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0346, Reg Loss = 7.8058, Reconstruct Loss = 0.0011, Cls Loss = 0.0328, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0354, Reg Loss = 7.8061, Reconstruct Loss = 0.0012, Cls Loss = 0.0334, Learning rate = 1.0000e-04
Epoch [131/200], Training 

100%|██████████| 157/157 [00:03<00:00, 43.51it/s]


Epoch [131/200], Validation Loss: 2.1766, Validation Accuracy: 65.40%



Iteration 0: Loss = 0.0186, Reg Loss = 7.7551, Reconstruct Loss = 0.0000, Cls Loss = 0.0178, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0427, Reg Loss = 7.8543, Reconstruct Loss = 0.0023, Cls Loss = 0.0397, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0389, Reg Loss = 7.8416, Reconstruct Loss = 0.0018, Cls Loss = 0.0363, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0380, Reg Loss = 7.8317, Reconstruct Loss = 0.0017, Cls Loss = 0.0355, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0372, Reg Loss = 7.8258, Reconstruct Loss = 0.0016, Cls Loss = 0.0348, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0365, Reg Loss = 7.8191, Reconstruct Loss = 0.0016, Cls Loss = 0.0342, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0370, Reg Loss = 7.8137, Reconstruct Loss = 0.0015, Cls Loss = 0.0347, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0365, Reg Loss = 7.8190, Reconstruct Loss = 0.0015,

100%|██████████| 157/157 [00:03<00:00, 43.68it/s]


Epoch [132/200], Validation Loss: 2.2898, Validation Accuracy: 64.98%



Iteration 0: Loss = 0.0701, Reg Loss = 7.8105, Reconstruct Loss = 0.0000, Cls Loss = 0.0693, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0361, Reg Loss = 7.6942, Reconstruct Loss = 0.0009, Cls Loss = 0.0344, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0349, Reg Loss = 7.7201, Reconstruct Loss = 0.0012, Cls Loss = 0.0329, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0353, Reg Loss = 7.7190, Reconstruct Loss = 0.0013, Cls Loss = 0.0333, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0363, Reg Loss = 7.7296, Reconstruct Loss = 0.0014, Cls Loss = 0.0341, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0362, Reg Loss = 7.7164, Reconstruct Loss = 0.0012, Cls Loss = 0.0342, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0363, Reg Loss = 7.7302, Reconstruct Loss = 0.0014, Cls Loss = 0.0341, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0365, Reg Loss = 7.7327, Reconstruct Loss = 0.0013,

100%|██████████| 157/157 [00:03<00:00, 43.54it/s]


Epoch [133/200], Validation Loss: 2.3646, Validation Accuracy: 63.58%



Iteration 0: Loss = 0.0403, Reg Loss = 7.5855, Reconstruct Loss = 0.0000, Cls Loss = 0.0395, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0341, Reg Loss = 7.7108, Reconstruct Loss = 0.0007, Cls Loss = 0.0326, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0366, Reg Loss = 7.7200, Reconstruct Loss = 0.0010, Cls Loss = 0.0349, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0349, Reg Loss = 7.6921, Reconstruct Loss = 0.0009, Cls Loss = 0.0332, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0352, Reg Loss = 7.6818, Reconstruct Loss = 0.0010, Cls Loss = 0.0335, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0346, Reg Loss = 7.6751, Reconstruct Loss = 0.0010, Cls Loss = 0.0329, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0343, Reg Loss = 7.6735, Reconstruct Loss = 0.0011, Cls Loss = 0.0325, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0339, Reg Loss = 7.6722, Reconstruct Loss = 0.0011,

100%|██████████| 157/157 [00:03<00:00, 42.21it/s]


Epoch [134/200], Validation Loss: 2.0397, Validation Accuracy: 66.64%



Iteration 0: Loss = 0.0554, Reg Loss = 7.6218, Reconstruct Loss = 0.0000, Cls Loss = 0.0546, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0398, Reg Loss = 7.8027, Reconstruct Loss = 0.0013, Cls Loss = 0.0377, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0370, Reg Loss = 7.7969, Reconstruct Loss = 0.0013, Cls Loss = 0.0350, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0348, Reg Loss = 7.7639, Reconstruct Loss = 0.0011, Cls Loss = 0.0330, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0355, Reg Loss = 7.7503, Reconstruct Loss = 0.0012, Cls Loss = 0.0335, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0352, Reg Loss = 7.7187, Reconstruct Loss = 0.0010, Cls Loss = 0.0334, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0357, Reg Loss = 7.7051, Reconstruct Loss = 0.0010, Cls Loss = 0.0340, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0352, Reg Loss = 7.6796, Reconstruct Loss = 0.0009,

100%|██████████| 157/157 [00:03<00:00, 42.78it/s]


Epoch [135/200], Validation Loss: 1.8738, Validation Accuracy: 65.34%



Iteration 0: Loss = 0.0340, Reg Loss = 7.6487, Reconstruct Loss = 0.0000, Cls Loss = 0.0333, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0348, Reg Loss = 7.6626, Reconstruct Loss = 0.0012, Cls Loss = 0.0328, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0343, Reg Loss = 7.6394, Reconstruct Loss = 0.0011, Cls Loss = 0.0324, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0334, Reg Loss = 7.6402, Reconstruct Loss = 0.0011, Cls Loss = 0.0315, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0343, Reg Loss = 7.6265, Reconstruct Loss = 0.0011, Cls Loss = 0.0324, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0347, Reg Loss = 7.6432, Reconstruct Loss = 0.0012, Cls Loss = 0.0328, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0348, Reg Loss = 7.6401, Reconstruct Loss = 0.0010, Cls Loss = 0.0330, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0347, Reg Loss = 7.6352, Reconstruct Loss = 0.0010,

100%|██████████| 157/157 [00:03<00:00, 42.92it/s]


Epoch [136/200], Validation Loss: 2.6699, Validation Accuracy: 62.56%



Iteration 0: Loss = 0.0141, Reg Loss = 7.6162, Reconstruct Loss = 0.0000, Cls Loss = 0.0134, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0354, Reg Loss = 7.5560, Reconstruct Loss = 0.0002, Cls Loss = 0.0344, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0362, Reg Loss = 7.5939, Reconstruct Loss = 0.0006, Cls Loss = 0.0349, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0366, Reg Loss = 7.6172, Reconstruct Loss = 0.0007, Cls Loss = 0.0352, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0362, Reg Loss = 7.6301, Reconstruct Loss = 0.0008, Cls Loss = 0.0346, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0355, Reg Loss = 7.6305, Reconstruct Loss = 0.0007, Cls Loss = 0.0340, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0359, Reg Loss = 7.6529, Reconstruct Loss = 0.0009, Cls Loss = 0.0342, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0357, Reg Loss = 7.6522, Reconstruct Loss = 0.0009,

100%|██████████| 157/157 [00:03<00:00, 42.96it/s]


Epoch [137/200], Validation Loss: 2.6518, Validation Accuracy: 61.14%



Iteration 0: Loss = 0.0178, Reg Loss = 7.8418, Reconstruct Loss = 0.0000, Cls Loss = 0.0171, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0348, Reg Loss = 7.5826, Reconstruct Loss = 0.0006, Cls Loss = 0.0333, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0341, Reg Loss = 7.6498, Reconstruct Loss = 0.0009, Cls Loss = 0.0325, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0353, Reg Loss = 7.6603, Reconstruct Loss = 0.0010, Cls Loss = 0.0336, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0346, Reg Loss = 7.6556, Reconstruct Loss = 0.0011, Cls Loss = 0.0328, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0351, Reg Loss = 7.6428, Reconstruct Loss = 0.0010, Cls Loss = 0.0333, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0345, Reg Loss = 7.6325, Reconstruct Loss = 0.0009, Cls Loss = 0.0328, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0350, Reg Loss = 7.6278, Reconstruct Loss = 0.0009,

100%|██████████| 157/157 [00:03<00:00, 42.88it/s]


Epoch [138/200], Validation Loss: 1.9537, Validation Accuracy: 66.06%



Iteration 0: Loss = 0.0340, Reg Loss = 7.3934, Reconstruct Loss = 0.0000, Cls Loss = 0.0333, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0445, Reg Loss = 7.6122, Reconstruct Loss = 0.0013, Cls Loss = 0.0424, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0389, Reg Loss = 7.5545, Reconstruct Loss = 0.0010, Cls Loss = 0.0371, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0358, Reg Loss = 7.5314, Reconstruct Loss = 0.0009, Cls Loss = 0.0342, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0355, Reg Loss = 7.5351, Reconstruct Loss = 0.0010, Cls Loss = 0.0337, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0355, Reg Loss = 7.5283, Reconstruct Loss = 0.0009, Cls Loss = 0.0338, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0350, Reg Loss = 7.5305, Reconstruct Loss = 0.0009, Cls Loss = 0.0333, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0352, Reg Loss = 7.5336, Reconstruct Loss = 0.0009,

100%|██████████| 157/157 [00:03<00:00, 43.05it/s]


Epoch [139/200], Validation Loss: 2.0765, Validation Accuracy: 62.15%



Iteration 0: Loss = 0.0618, Reg Loss = 9.4905, Reconstruct Loss = 0.0209, Cls Loss = 0.0400, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0398, Reg Loss = 7.4872, Reconstruct Loss = 0.0008, Cls Loss = 0.0382, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0355, Reg Loss = 7.4478, Reconstruct Loss = 0.0007, Cls Loss = 0.0340, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0360, Reg Loss = 7.4589, Reconstruct Loss = 0.0009, Cls Loss = 0.0344, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0355, Reg Loss = 7.4640, Reconstruct Loss = 0.0010, Cls Loss = 0.0337, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0352, Reg Loss = 7.4521, Reconstruct Loss = 0.0008, Cls Loss = 0.0336, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0348, Reg Loss = 7.4650, Reconstruct Loss = 0.0009, Cls Loss = 0.0332, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0355, Reg Loss = 7.4710, Reconstruct Loss = 0.0009,

100%|██████████| 157/157 [00:03<00:00, 42.91it/s]


Epoch [140/200], Validation Loss: 1.8166, Validation Accuracy: 67.82%



Iteration 0: Loss = 0.0388, Reg Loss = 7.2021, Reconstruct Loss = 0.0000, Cls Loss = 0.0381, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0366, Reg Loss = 7.3981, Reconstruct Loss = 0.0004, Cls Loss = 0.0355, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0342, Reg Loss = 7.3935, Reconstruct Loss = 0.0005, Cls Loss = 0.0330, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0344, Reg Loss = 7.4209, Reconstruct Loss = 0.0007, Cls Loss = 0.0330, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0340, Reg Loss = 7.4298, Reconstruct Loss = 0.0008, Cls Loss = 0.0325, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0349, Reg Loss = 7.4165, Reconstruct Loss = 0.0007, Cls Loss = 0.0334, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0343, Reg Loss = 7.4109, Reconstruct Loss = 0.0007, Cls Loss = 0.0329, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0349, Reg Loss = 7.4058, Reconstruct Loss = 0.0007,

100%|██████████| 157/157 [00:03<00:00, 42.53it/s]


Epoch [141/200], Validation Loss: 1.9503, Validation Accuracy: 65.27%



Iteration 0: Loss = 0.0294, Reg Loss = 7.0650, Reconstruct Loss = 0.0000, Cls Loss = 0.0287, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0395, Reg Loss = 7.4624, Reconstruct Loss = 0.0020, Cls Loss = 0.0368, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0386, Reg Loss = 7.4130, Reconstruct Loss = 0.0012, Cls Loss = 0.0367, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0391, Reg Loss = 7.4097, Reconstruct Loss = 0.0012, Cls Loss = 0.0371, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0379, Reg Loss = 7.3901, Reconstruct Loss = 0.0011, Cls Loss = 0.0361, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0374, Reg Loss = 7.3838, Reconstruct Loss = 0.0010, Cls Loss = 0.0356, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0367, Reg Loss = 7.3766, Reconstruct Loss = 0.0010, Cls Loss = 0.0350, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0367, Reg Loss = 7.3645, Reconstruct Loss = 0.0009,

100%|██████████| 157/157 [00:03<00:00, 42.77it/s]


Epoch [142/200], Validation Loss: 2.4743, Validation Accuracy: 61.67%



Iteration 0: Loss = 0.0264, Reg Loss = 7.0273, Reconstruct Loss = 0.0000, Cls Loss = 0.0257, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0334, Reg Loss = 7.2933, Reconstruct Loss = 0.0009, Cls Loss = 0.0318, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0374, Reg Loss = 7.3322, Reconstruct Loss = 0.0011, Cls Loss = 0.0355, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0357, Reg Loss = 7.3558, Reconstruct Loss = 0.0011, Cls Loss = 0.0338, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0351, Reg Loss = 7.3807, Reconstruct Loss = 0.0012, Cls Loss = 0.0331, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0351, Reg Loss = 7.3964, Reconstruct Loss = 0.0012, Cls Loss = 0.0332, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0347, Reg Loss = 7.4093, Reconstruct Loss = 0.0011, Cls Loss = 0.0329, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0341, Reg Loss = 7.4139, Reconstruct Loss = 0.0010,

100%|██████████| 157/157 [00:03<00:00, 42.99it/s]


Epoch [143/200], Validation Loss: 1.5198, Validation Accuracy: 69.79%



Iteration 0: Loss = 0.0336, Reg Loss = 7.2748, Reconstruct Loss = 0.0000, Cls Loss = 0.0329, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0360, Reg Loss = 7.4173, Reconstruct Loss = 0.0008, Cls Loss = 0.0344, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0357, Reg Loss = 7.3702, Reconstruct Loss = 0.0005, Cls Loss = 0.0344, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0361, Reg Loss = 7.3632, Reconstruct Loss = 0.0005, Cls Loss = 0.0348, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0347, Reg Loss = 7.3685, Reconstruct Loss = 0.0006, Cls Loss = 0.0334, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0342, Reg Loss = 7.3679, Reconstruct Loss = 0.0006, Cls Loss = 0.0329, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0336, Reg Loss = 7.3570, Reconstruct Loss = 0.0005, Cls Loss = 0.0323, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0342, Reg Loss = 7.3444, Reconstruct Loss = 0.0006,

100%|██████████| 157/157 [00:03<00:00, 42.71it/s]


Epoch [144/200], Validation Loss: 2.4711, Validation Accuracy: 61.34%



Iteration 0: Loss = 0.0327, Reg Loss = 7.1112, Reconstruct Loss = 0.0000, Cls Loss = 0.0320, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0358, Reg Loss = 7.3234, Reconstruct Loss = 0.0008, Cls Loss = 0.0343, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0342, Reg Loss = 7.3161, Reconstruct Loss = 0.0007, Cls Loss = 0.0328, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0362, Reg Loss = 7.3189, Reconstruct Loss = 0.0007, Cls Loss = 0.0347, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0361, Reg Loss = 7.3299, Reconstruct Loss = 0.0007, Cls Loss = 0.0346, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0361, Reg Loss = 7.3466, Reconstruct Loss = 0.0007, Cls Loss = 0.0347, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0363, Reg Loss = 7.3473, Reconstruct Loss = 0.0007, Cls Loss = 0.0349, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0357, Reg Loss = 7.3528, Reconstruct Loss = 0.0007,

100%|██████████| 157/157 [00:03<00:00, 42.59it/s]


Epoch [145/200], Validation Loss: 1.7134, Validation Accuracy: 69.34%



Iteration 0: Loss = 0.0140, Reg Loss = 7.3431, Reconstruct Loss = 0.0000, Cls Loss = 0.0133, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0372, Reg Loss = 7.3747, Reconstruct Loss = 0.0006, Cls Loss = 0.0359, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0356, Reg Loss = 7.4252, Reconstruct Loss = 0.0009, Cls Loss = 0.0339, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0357, Reg Loss = 7.4426, Reconstruct Loss = 0.0009, Cls Loss = 0.0340, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0360, Reg Loss = 7.4388, Reconstruct Loss = 0.0009, Cls Loss = 0.0344, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0355, Reg Loss = 7.4366, Reconstruct Loss = 0.0008, Cls Loss = 0.0339, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0349, Reg Loss = 7.4415, Reconstruct Loss = 0.0008, Cls Loss = 0.0334, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0353, Reg Loss = 7.4325, Reconstruct Loss = 0.0008,

100%|██████████| 157/157 [00:03<00:00, 42.58it/s]


Epoch [146/200], Validation Loss: 1.9297, Validation Accuracy: 67.91%



Iteration 0: Loss = 0.1173, Reg Loss = 7.6694, Reconstruct Loss = 0.0000, Cls Loss = 0.1165, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0385, Reg Loss = 7.4175, Reconstruct Loss = 0.0008, Cls Loss = 0.0370, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0384, Reg Loss = 7.4044, Reconstruct Loss = 0.0010, Cls Loss = 0.0367, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0360, Reg Loss = 7.3684, Reconstruct Loss = 0.0008, Cls Loss = 0.0345, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0357, Reg Loss = 7.3534, Reconstruct Loss = 0.0007, Cls Loss = 0.0343, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0349, Reg Loss = 7.3444, Reconstruct Loss = 0.0007, Cls Loss = 0.0335, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0348, Reg Loss = 7.3401, Reconstruct Loss = 0.0007, Cls Loss = 0.0334, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0348, Reg Loss = 7.3387, Reconstruct Loss = 0.0006,

100%|██████████| 157/157 [00:03<00:00, 42.85it/s]


Epoch [147/200], Validation Loss: 2.0781, Validation Accuracy: 62.94%



Iteration 0: Loss = 0.0188, Reg Loss = 7.6196, Reconstruct Loss = 0.0000, Cls Loss = 0.0181, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0372, Reg Loss = 7.3260, Reconstruct Loss = 0.0005, Cls Loss = 0.0359, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0338, Reg Loss = 7.3397, Reconstruct Loss = 0.0008, Cls Loss = 0.0322, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0351, Reg Loss = 7.3019, Reconstruct Loss = 0.0007, Cls Loss = 0.0337, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0352, Reg Loss = 7.2912, Reconstruct Loss = 0.0007, Cls Loss = 0.0338, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0355, Reg Loss = 7.2799, Reconstruct Loss = 0.0007, Cls Loss = 0.0341, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0356, Reg Loss = 7.2746, Reconstruct Loss = 0.0007, Cls Loss = 0.0341, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0354, Reg Loss = 7.2719, Reconstruct Loss = 0.0007,

100%|██████████| 157/157 [00:03<00:00, 42.54it/s]


Epoch [148/200], Validation Loss: 1.6290, Validation Accuracy: 71.53%



Checkpoint saved at epoch 147 with accuracy: 71.53%
Iteration 0: Loss = 0.0202, Reg Loss = 7.0840, Reconstruct Loss = 0.0000, Cls Loss = 0.0195, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0355, Reg Loss = 7.3066, Reconstruct Loss = 0.0009, Cls Loss = 0.0338, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0356, Reg Loss = 7.3548, Reconstruct Loss = 0.0011, Cls Loss = 0.0338, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0346, Reg Loss = 7.3409, Reconstruct Loss = 0.0010, Cls Loss = 0.0329, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0340, Reg Loss = 7.3275, Reconstruct Loss = 0.0008, Cls Loss = 0.0325, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0340, Reg Loss = 7.3007, Reconstruct Loss = 0.0007, Cls Loss = 0.0325, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0337, Reg Loss = 7.2927, Reconstruct Loss = 0.0007, Cls Loss = 0.0323, Learning rate = 1.0000e-04
Iteration 700: Loss = 0

100%|██████████| 157/157 [00:03<00:00, 42.74it/s]


Epoch [149/200], Validation Loss: 1.9870, Validation Accuracy: 65.41%



Iteration 0: Loss = 0.0306, Reg Loss = 7.2356, Reconstruct Loss = 0.0000, Cls Loss = 0.0299, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0329, Reg Loss = 7.2516, Reconstruct Loss = 0.0009, Cls Loss = 0.0313, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0313, Reg Loss = 7.1887, Reconstruct Loss = 0.0007, Cls Loss = 0.0299, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0327, Reg Loss = 7.1586, Reconstruct Loss = 0.0006, Cls Loss = 0.0313, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0339, Reg Loss = 7.1473, Reconstruct Loss = 0.0005, Cls Loss = 0.0326, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0340, Reg Loss = 7.1515, Reconstruct Loss = 0.0006, Cls Loss = 0.0327, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0342, Reg Loss = 7.1450, Reconstruct Loss = 0.0005, Cls Loss = 0.0329, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0341, Reg Loss = 7.1462, Reconstruct Loss = 0.0005,

100%|██████████| 157/157 [00:03<00:00, 42.65it/s]


Epoch [150/200], Validation Loss: 1.8651, Validation Accuracy: 66.43%



Iteration 0: Loss = 0.0083, Reg Loss = 6.8771, Reconstruct Loss = 0.0000, Cls Loss = 0.0076, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0335, Reg Loss = 7.1792, Reconstruct Loss = 0.0005, Cls Loss = 0.0323, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0342, Reg Loss = 7.1849, Reconstruct Loss = 0.0006, Cls Loss = 0.0328, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0331, Reg Loss = 7.2000, Reconstruct Loss = 0.0008, Cls Loss = 0.0316, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0337, Reg Loss = 7.1898, Reconstruct Loss = 0.0008, Cls Loss = 0.0322, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0334, Reg Loss = 7.1818, Reconstruct Loss = 0.0007, Cls Loss = 0.0320, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0336, Reg Loss = 7.1843, Reconstruct Loss = 0.0007, Cls Loss = 0.0321, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0334, Reg Loss = 7.1908, Reconstruct Loss = 0.0008,

100%|██████████| 157/157 [00:03<00:00, 42.72it/s]


Epoch [151/200], Validation Loss: 1.8034, Validation Accuracy: 67.73%



Iteration 0: Loss = 0.0270, Reg Loss = 7.1847, Reconstruct Loss = 0.0000, Cls Loss = 0.0263, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0350, Reg Loss = 7.2186, Reconstruct Loss = 0.0008, Cls Loss = 0.0335, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0363, Reg Loss = 7.2069, Reconstruct Loss = 0.0010, Cls Loss = 0.0346, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0358, Reg Loss = 7.1965, Reconstruct Loss = 0.0009, Cls Loss = 0.0342, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0358, Reg Loss = 7.1735, Reconstruct Loss = 0.0008, Cls Loss = 0.0343, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0357, Reg Loss = 7.1769, Reconstruct Loss = 0.0008, Cls Loss = 0.0342, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0350, Reg Loss = 7.1648, Reconstruct Loss = 0.0007, Cls Loss = 0.0336, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0347, Reg Loss = 7.1589, Reconstruct Loss = 0.0007,

100%|██████████| 157/157 [00:03<00:00, 42.19it/s]


Epoch [152/200], Validation Loss: 1.7259, Validation Accuracy: 70.89%



Iteration 0: Loss = 0.0328, Reg Loss = 6.6488, Reconstruct Loss = 0.0000, Cls Loss = 0.0322, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0403, Reg Loss = 7.1719, Reconstruct Loss = 0.0014, Cls Loss = 0.0381, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0369, Reg Loss = 7.1550, Reconstruct Loss = 0.0011, Cls Loss = 0.0350, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0360, Reg Loss = 7.1375, Reconstruct Loss = 0.0008, Cls Loss = 0.0345, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0348, Reg Loss = 7.1272, Reconstruct Loss = 0.0007, Cls Loss = 0.0334, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0343, Reg Loss = 7.1623, Reconstruct Loss = 0.0009, Cls Loss = 0.0327, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0344, Reg Loss = 7.1615, Reconstruct Loss = 0.0008, Cls Loss = 0.0329, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0352, Reg Loss = 7.1536, Reconstruct Loss = 0.0008,

100%|██████████| 157/157 [00:03<00:00, 42.75it/s]


Epoch [153/200], Validation Loss: 1.5086, Validation Accuracy: 69.36%



Iteration 0: Loss = 0.0630, Reg Loss = 6.9449, Reconstruct Loss = 0.0000, Cls Loss = 0.0623, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0413, Reg Loss = 7.1813, Reconstruct Loss = 0.0012, Cls Loss = 0.0394, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0371, Reg Loss = 7.2122, Reconstruct Loss = 0.0011, Cls Loss = 0.0353, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0350, Reg Loss = 7.2193, Reconstruct Loss = 0.0011, Cls Loss = 0.0332, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0363, Reg Loss = 7.2327, Reconstruct Loss = 0.0011, Cls Loss = 0.0344, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0362, Reg Loss = 7.2141, Reconstruct Loss = 0.0010, Cls Loss = 0.0345, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0359, Reg Loss = 7.1988, Reconstruct Loss = 0.0009, Cls Loss = 0.0343, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0362, Reg Loss = 7.1992, Reconstruct Loss = 0.0009,

100%|██████████| 157/157 [00:03<00:00, 42.89it/s]


Epoch [154/200], Validation Loss: 1.5764, Validation Accuracy: 68.88%



Iteration 0: Loss = 0.0481, Reg Loss = 6.8966, Reconstruct Loss = 0.0000, Cls Loss = 0.0474, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0368, Reg Loss = 7.2823, Reconstruct Loss = 0.0010, Cls Loss = 0.0351, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0386, Reg Loss = 7.2911, Reconstruct Loss = 0.0010, Cls Loss = 0.0369, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0362, Reg Loss = 7.2562, Reconstruct Loss = 0.0008, Cls Loss = 0.0346, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0358, Reg Loss = 7.2592, Reconstruct Loss = 0.0008, Cls Loss = 0.0343, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0361, Reg Loss = 7.2503, Reconstruct Loss = 0.0007, Cls Loss = 0.0347, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0364, Reg Loss = 7.2441, Reconstruct Loss = 0.0008, Cls Loss = 0.0349, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0364, Reg Loss = 7.2237, Reconstruct Loss = 0.0007,

100%|██████████| 157/157 [00:03<00:00, 42.84it/s]


Epoch [155/200], Validation Loss: 1.9910, Validation Accuracy: 67.03%



Iteration 0: Loss = 0.0164, Reg Loss = 6.9830, Reconstruct Loss = 0.0000, Cls Loss = 0.0157, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0378, Reg Loss = 7.1806, Reconstruct Loss = 0.0003, Cls Loss = 0.0368, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0356, Reg Loss = 7.2323, Reconstruct Loss = 0.0005, Cls Loss = 0.0344, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0350, Reg Loss = 7.2004, Reconstruct Loss = 0.0004, Cls Loss = 0.0339, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0350, Reg Loss = 7.1887, Reconstruct Loss = 0.0005, Cls Loss = 0.0338, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0350, Reg Loss = 7.1919, Reconstruct Loss = 0.0005, Cls Loss = 0.0338, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0349, Reg Loss = 7.1893, Reconstruct Loss = 0.0005, Cls Loss = 0.0337, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0345, Reg Loss = 7.1843, Reconstruct Loss = 0.0005,

100%|██████████| 157/157 [00:03<00:00, 42.63it/s]


Epoch [156/200], Validation Loss: 2.1010, Validation Accuracy: 63.40%



Iteration 0: Loss = 0.0426, Reg Loss = 6.9658, Reconstruct Loss = 0.0000, Cls Loss = 0.0419, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0363, Reg Loss = 7.1502, Reconstruct Loss = 0.0012, Cls Loss = 0.0344, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0375, Reg Loss = 7.1057, Reconstruct Loss = 0.0009, Cls Loss = 0.0359, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0361, Reg Loss = 7.0766, Reconstruct Loss = 0.0007, Cls Loss = 0.0346, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0357, Reg Loss = 7.0735, Reconstruct Loss = 0.0006, Cls Loss = 0.0343, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0365, Reg Loss = 7.0832, Reconstruct Loss = 0.0007, Cls Loss = 0.0351, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0357, Reg Loss = 7.0860, Reconstruct Loss = 0.0007, Cls Loss = 0.0343, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0346, Reg Loss = 7.0678, Reconstruct Loss = 0.0006,

100%|██████████| 157/157 [00:03<00:00, 43.02it/s]


Epoch [157/200], Validation Loss: 1.6933, Validation Accuracy: 66.09%



Iteration 0: Loss = 0.0382, Reg Loss = 7.0262, Reconstruct Loss = 0.0000, Cls Loss = 0.0375, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0334, Reg Loss = 7.0801, Reconstruct Loss = 0.0005, Cls Loss = 0.0322, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0340, Reg Loss = 7.0609, Reconstruct Loss = 0.0005, Cls Loss = 0.0329, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0332, Reg Loss = 7.0678, Reconstruct Loss = 0.0004, Cls Loss = 0.0321, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0329, Reg Loss = 7.0701, Reconstruct Loss = 0.0005, Cls Loss = 0.0317, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0334, Reg Loss = 7.0823, Reconstruct Loss = 0.0006, Cls Loss = 0.0321, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0333, Reg Loss = 7.0780, Reconstruct Loss = 0.0006, Cls Loss = 0.0319, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0328, Reg Loss = 7.0603, Reconstruct Loss = 0.0006,

100%|██████████| 157/157 [00:03<00:00, 42.80it/s]


Epoch [158/200], Validation Loss: 1.9851, Validation Accuracy: 66.71%



Iteration 0: Loss = 0.0391, Reg Loss = 6.7533, Reconstruct Loss = 0.0000, Cls Loss = 0.0384, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0371, Reg Loss = 6.8904, Reconstruct Loss = 0.0003, Cls Loss = 0.0361, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0363, Reg Loss = 6.9271, Reconstruct Loss = 0.0005, Cls Loss = 0.0351, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0359, Reg Loss = 6.9777, Reconstruct Loss = 0.0009, Cls Loss = 0.0343, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0362, Reg Loss = 7.0068, Reconstruct Loss = 0.0010, Cls Loss = 0.0345, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0367, Reg Loss = 6.9959, Reconstruct Loss = 0.0010, Cls Loss = 0.0350, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0363, Reg Loss = 6.9809, Reconstruct Loss = 0.0008, Cls Loss = 0.0348, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0362, Reg Loss = 6.9760, Reconstruct Loss = 0.0008,

100%|██████████| 157/157 [00:03<00:00, 42.74it/s]


Epoch [159/200], Validation Loss: 1.6229, Validation Accuracy: 71.02%



Iteration 0: Loss = 0.0107, Reg Loss = 6.8870, Reconstruct Loss = 0.0000, Cls Loss = 0.0100, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0298, Reg Loss = 6.9983, Reconstruct Loss = 0.0006, Cls Loss = 0.0284, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0325, Reg Loss = 6.9621, Reconstruct Loss = 0.0007, Cls Loss = 0.0312, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0334, Reg Loss = 6.9698, Reconstruct Loss = 0.0008, Cls Loss = 0.0319, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0327, Reg Loss = 6.9428, Reconstruct Loss = 0.0007, Cls Loss = 0.0313, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0330, Reg Loss = 6.9338, Reconstruct Loss = 0.0008, Cls Loss = 0.0316, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0334, Reg Loss = 6.9286, Reconstruct Loss = 0.0008, Cls Loss = 0.0320, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0334, Reg Loss = 6.9196, Reconstruct Loss = 0.0007,

100%|██████████| 157/157 [00:03<00:00, 42.92it/s]


Epoch [160/200], Validation Loss: 2.0794, Validation Accuracy: 65.90%



Iteration 0: Loss = 0.0076, Reg Loss = 6.5132, Reconstruct Loss = 0.0000, Cls Loss = 0.0070, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0353, Reg Loss = 7.0102, Reconstruct Loss = 0.0015, Cls Loss = 0.0331, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0364, Reg Loss = 6.9538, Reconstruct Loss = 0.0013, Cls Loss = 0.0344, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0380, Reg Loss = 6.9244, Reconstruct Loss = 0.0011, Cls Loss = 0.0363, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0366, Reg Loss = 6.8731, Reconstruct Loss = 0.0009, Cls Loss = 0.0351, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0364, Reg Loss = 6.8753, Reconstruct Loss = 0.0009, Cls Loss = 0.0348, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0361, Reg Loss = 6.8744, Reconstruct Loss = 0.0009, Cls Loss = 0.0346, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0358, Reg Loss = 6.8684, Reconstruct Loss = 0.0008,

100%|██████████| 157/157 [00:03<00:00, 42.74it/s]


Epoch [161/200], Validation Loss: 1.5073, Validation Accuracy: 69.14%



Iteration 0: Loss = 0.0629, Reg Loss = 6.7359, Reconstruct Loss = 0.0000, Cls Loss = 0.0623, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0381, Reg Loss = 6.9029, Reconstruct Loss = 0.0010, Cls Loss = 0.0364, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0339, Reg Loss = 6.7972, Reconstruct Loss = 0.0005, Cls Loss = 0.0327, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0338, Reg Loss = 6.7742, Reconstruct Loss = 0.0004, Cls Loss = 0.0327, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0341, Reg Loss = 6.7800, Reconstruct Loss = 0.0004, Cls Loss = 0.0330, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0339, Reg Loss = 6.7989, Reconstruct Loss = 0.0005, Cls Loss = 0.0327, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0339, Reg Loss = 6.8135, Reconstruct Loss = 0.0005, Cls Loss = 0.0327, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0345, Reg Loss = 6.8243, Reconstruct Loss = 0.0006,

100%|██████████| 157/157 [00:03<00:00, 42.96it/s]


Epoch [162/200], Validation Loss: 2.1052, Validation Accuracy: 63.81%



Iteration 0: Loss = 0.0680, Reg Loss = 6.5583, Reconstruct Loss = 0.0000, Cls Loss = 0.0673, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0330, Reg Loss = 6.8193, Reconstruct Loss = 0.0006, Cls Loss = 0.0317, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0325, Reg Loss = 6.7883, Reconstruct Loss = 0.0004, Cls Loss = 0.0315, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0345, Reg Loss = 6.8185, Reconstruct Loss = 0.0005, Cls Loss = 0.0333, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0345, Reg Loss = 6.8227, Reconstruct Loss = 0.0005, Cls Loss = 0.0333, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0350, Reg Loss = 6.8354, Reconstruct Loss = 0.0006, Cls Loss = 0.0337, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0348, Reg Loss = 6.8352, Reconstruct Loss = 0.0006, Cls Loss = 0.0335, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0351, Reg Loss = 6.8356, Reconstruct Loss = 0.0006,

100%|██████████| 157/157 [00:03<00:00, 42.79it/s]


Epoch [163/200], Validation Loss: 1.9242, Validation Accuracy: 66.20%



Iteration 0: Loss = 0.0200, Reg Loss = 6.9824, Reconstruct Loss = 0.0000, Cls Loss = 0.0193, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0360, Reg Loss = 6.9019, Reconstruct Loss = 0.0009, Cls Loss = 0.0345, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0357, Reg Loss = 6.8835, Reconstruct Loss = 0.0007, Cls Loss = 0.0343, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0342, Reg Loss = 6.8593, Reconstruct Loss = 0.0006, Cls Loss = 0.0329, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0349, Reg Loss = 6.8730, Reconstruct Loss = 0.0006, Cls Loss = 0.0336, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0344, Reg Loss = 6.8598, Reconstruct Loss = 0.0006, Cls Loss = 0.0332, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0340, Reg Loss = 6.8489, Reconstruct Loss = 0.0005, Cls Loss = 0.0328, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0337, Reg Loss = 6.8547, Reconstruct Loss = 0.0006,

100%|██████████| 157/157 [00:03<00:00, 42.71it/s]


Epoch [164/200], Validation Loss: 1.7802, Validation Accuracy: 68.30%



Iteration 0: Loss = 0.0251, Reg Loss = 6.6633, Reconstruct Loss = 0.0000, Cls Loss = 0.0244, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0344, Reg Loss = 6.8236, Reconstruct Loss = 0.0006, Cls Loss = 0.0331, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0328, Reg Loss = 6.8258, Reconstruct Loss = 0.0007, Cls Loss = 0.0314, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0337, Reg Loss = 6.8155, Reconstruct Loss = 0.0006, Cls Loss = 0.0324, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0340, Reg Loss = 6.8314, Reconstruct Loss = 0.0007, Cls Loss = 0.0327, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0339, Reg Loss = 6.8244, Reconstruct Loss = 0.0006, Cls Loss = 0.0326, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0344, Reg Loss = 6.8169, Reconstruct Loss = 0.0006, Cls Loss = 0.0331, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0338, Reg Loss = 6.8117, Reconstruct Loss = 0.0006,

100%|██████████| 157/157 [00:03<00:00, 42.80it/s]


Epoch [165/200], Validation Loss: 1.7989, Validation Accuracy: 68.14%



Iteration 0: Loss = 0.0248, Reg Loss = 6.4595, Reconstruct Loss = 0.0000, Cls Loss = 0.0242, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0349, Reg Loss = 6.7435, Reconstruct Loss = 0.0003, Cls Loss = 0.0339, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0336, Reg Loss = 6.7493, Reconstruct Loss = 0.0005, Cls Loss = 0.0324, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0348, Reg Loss = 6.7548, Reconstruct Loss = 0.0005, Cls Loss = 0.0335, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0345, Reg Loss = 6.7409, Reconstruct Loss = 0.0005, Cls Loss = 0.0333, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0341, Reg Loss = 6.7398, Reconstruct Loss = 0.0005, Cls Loss = 0.0329, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0338, Reg Loss = 6.7255, Reconstruct Loss = 0.0005, Cls Loss = 0.0327, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0341, Reg Loss = 6.7347, Reconstruct Loss = 0.0005,

100%|██████████| 157/157 [00:03<00:00, 39.79it/s]


Epoch [166/200], Validation Loss: 1.6244, Validation Accuracy: 69.37%



Iteration 0: Loss = 0.0403, Reg Loss = 6.4709, Reconstruct Loss = 0.0000, Cls Loss = 0.0396, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0344, Reg Loss = 6.6698, Reconstruct Loss = 0.0006, Cls Loss = 0.0331, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0328, Reg Loss = 6.6897, Reconstruct Loss = 0.0006, Cls Loss = 0.0315, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0335, Reg Loss = 6.6894, Reconstruct Loss = 0.0006, Cls Loss = 0.0323, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0339, Reg Loss = 6.6874, Reconstruct Loss = 0.0005, Cls Loss = 0.0328, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0342, Reg Loss = 6.7056, Reconstruct Loss = 0.0005, Cls Loss = 0.0330, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0336, Reg Loss = 6.7247, Reconstruct Loss = 0.0006, Cls Loss = 0.0324, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0334, Reg Loss = 6.7260, Reconstruct Loss = 0.0006,

100%|██████████| 157/157 [00:03<00:00, 42.99it/s]


Epoch [167/200], Validation Loss: 1.6955, Validation Accuracy: 69.96%



Iteration 0: Loss = 0.0100, Reg Loss = 6.8167, Reconstruct Loss = 0.0000, Cls Loss = 0.0094, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0331, Reg Loss = 6.7690, Reconstruct Loss = 0.0005, Cls Loss = 0.0319, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0328, Reg Loss = 6.7487, Reconstruct Loss = 0.0006, Cls Loss = 0.0315, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0332, Reg Loss = 6.7469, Reconstruct Loss = 0.0006, Cls Loss = 0.0319, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0333, Reg Loss = 6.7287, Reconstruct Loss = 0.0005, Cls Loss = 0.0321, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0343, Reg Loss = 6.7088, Reconstruct Loss = 0.0005, Cls Loss = 0.0331, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0340, Reg Loss = 6.7153, Reconstruct Loss = 0.0005, Cls Loss = 0.0328, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0339, Reg Loss = 6.7118, Reconstruct Loss = 0.0005,

100%|██████████| 157/157 [00:03<00:00, 42.98it/s]


Epoch [168/200], Validation Loss: 1.7746, Validation Accuracy: 67.25%



Iteration 0: Loss = 0.0098, Reg Loss = 6.6315, Reconstruct Loss = 0.0000, Cls Loss = 0.0091, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0344, Reg Loss = 6.5920, Reconstruct Loss = 0.0003, Cls Loss = 0.0335, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0345, Reg Loss = 6.7031, Reconstruct Loss = 0.0009, Cls Loss = 0.0330, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0342, Reg Loss = 6.6849, Reconstruct Loss = 0.0008, Cls Loss = 0.0328, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0343, Reg Loss = 6.6745, Reconstruct Loss = 0.0007, Cls Loss = 0.0329, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0344, Reg Loss = 6.6739, Reconstruct Loss = 0.0007, Cls Loss = 0.0331, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0352, Reg Loss = 6.6800, Reconstruct Loss = 0.0006, Cls Loss = 0.0339, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0354, Reg Loss = 6.6892, Reconstruct Loss = 0.0006,

100%|██████████| 157/157 [00:03<00:00, 42.75it/s]


Epoch [169/200], Validation Loss: 1.7896, Validation Accuracy: 67.01%



Iteration 0: Loss = 0.0252, Reg Loss = 6.8243, Reconstruct Loss = 0.0000, Cls Loss = 0.0246, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0328, Reg Loss = 6.5841, Reconstruct Loss = 0.0003, Cls Loss = 0.0318, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0324, Reg Loss = 6.6245, Reconstruct Loss = 0.0004, Cls Loss = 0.0314, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0335, Reg Loss = 6.6373, Reconstruct Loss = 0.0005, Cls Loss = 0.0322, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0328, Reg Loss = 6.6533, Reconstruct Loss = 0.0006, Cls Loss = 0.0316, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0330, Reg Loss = 6.6716, Reconstruct Loss = 0.0007, Cls Loss = 0.0317, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0331, Reg Loss = 6.6670, Reconstruct Loss = 0.0006, Cls Loss = 0.0318, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0335, Reg Loss = 6.6823, Reconstruct Loss = 0.0008,

100%|██████████| 157/157 [00:03<00:00, 42.73it/s]


Epoch [170/200], Validation Loss: 1.6610, Validation Accuracy: 69.05%



Iteration 0: Loss = 0.1035, Reg Loss = 6.3608, Reconstruct Loss = 0.0000, Cls Loss = 0.1028, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0360, Reg Loss = 6.7252, Reconstruct Loss = 0.0007, Cls Loss = 0.0346, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0343, Reg Loss = 6.6856, Reconstruct Loss = 0.0006, Cls Loss = 0.0330, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0351, Reg Loss = 6.6910, Reconstruct Loss = 0.0006, Cls Loss = 0.0338, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0345, Reg Loss = 6.7016, Reconstruct Loss = 0.0007, Cls Loss = 0.0332, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0351, Reg Loss = 6.6934, Reconstruct Loss = 0.0006, Cls Loss = 0.0339, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0344, Reg Loss = 6.7023, Reconstruct Loss = 0.0006, Cls Loss = 0.0332, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0344, Reg Loss = 6.6926, Reconstruct Loss = 0.0006,

100%|██████████| 157/157 [00:03<00:00, 42.87it/s]


Epoch [171/200], Validation Loss: 2.0321, Validation Accuracy: 66.55%



Iteration 0: Loss = 0.0831, Reg Loss = 6.7140, Reconstruct Loss = 0.0000, Cls Loss = 0.0825, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0386, Reg Loss = 6.8207, Reconstruct Loss = 0.0012, Cls Loss = 0.0368, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0353, Reg Loss = 6.7741, Reconstruct Loss = 0.0008, Cls Loss = 0.0338, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0356, Reg Loss = 6.7975, Reconstruct Loss = 0.0010, Cls Loss = 0.0340, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0355, Reg Loss = 6.7824, Reconstruct Loss = 0.0009, Cls Loss = 0.0338, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0344, Reg Loss = 6.7632, Reconstruct Loss = 0.0009, Cls Loss = 0.0328, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0336, Reg Loss = 6.7461, Reconstruct Loss = 0.0009, Cls Loss = 0.0321, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0339, Reg Loss = 6.7330, Reconstruct Loss = 0.0008,

100%|██████████| 157/157 [00:03<00:00, 42.68it/s]


Epoch [172/200], Validation Loss: 1.5670, Validation Accuracy: 68.34%



Iteration 0: Loss = 0.0139, Reg Loss = 6.6880, Reconstruct Loss = 0.0000, Cls Loss = 0.0133, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0374, Reg Loss = 6.6144, Reconstruct Loss = 0.0005, Cls Loss = 0.0363, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0375, Reg Loss = 6.6606, Reconstruct Loss = 0.0007, Cls Loss = 0.0362, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0371, Reg Loss = 6.6377, Reconstruct Loss = 0.0006, Cls Loss = 0.0359, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0357, Reg Loss = 6.6359, Reconstruct Loss = 0.0006, Cls Loss = 0.0345, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0359, Reg Loss = 6.6187, Reconstruct Loss = 0.0005, Cls Loss = 0.0348, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0356, Reg Loss = 6.6009, Reconstruct Loss = 0.0005, Cls Loss = 0.0344, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0350, Reg Loss = 6.5920, Reconstruct Loss = 0.0004,

100%|██████████| 157/157 [00:03<00:00, 42.94it/s]


Epoch [173/200], Validation Loss: 1.9595, Validation Accuracy: 62.66%



Iteration 0: Loss = 0.0110, Reg Loss = 6.6417, Reconstruct Loss = 0.0000, Cls Loss = 0.0103, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0348, Reg Loss = 6.5222, Reconstruct Loss = 0.0005, Cls Loss = 0.0337, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0323, Reg Loss = 6.5558, Reconstruct Loss = 0.0004, Cls Loss = 0.0312, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0306, Reg Loss = 6.5643, Reconstruct Loss = 0.0004, Cls Loss = 0.0296, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0321, Reg Loss = 6.5741, Reconstruct Loss = 0.0004, Cls Loss = 0.0310, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0322, Reg Loss = 6.5966, Reconstruct Loss = 0.0005, Cls Loss = 0.0310, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0329, Reg Loss = 6.5992, Reconstruct Loss = 0.0005, Cls Loss = 0.0317, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0321, Reg Loss = 6.5973, Reconstruct Loss = 0.0005,

100%|██████████| 157/157 [00:03<00:00, 42.96it/s]


Epoch [174/200], Validation Loss: 1.3936, Validation Accuracy: 69.80%



Iteration 0: Loss = 0.0276, Reg Loss = 6.5687, Reconstruct Loss = 0.0000, Cls Loss = 0.0269, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0330, Reg Loss = 6.5751, Reconstruct Loss = 0.0005, Cls Loss = 0.0318, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0329, Reg Loss = 6.6395, Reconstruct Loss = 0.0007, Cls Loss = 0.0315, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0329, Reg Loss = 6.6347, Reconstruct Loss = 0.0006, Cls Loss = 0.0316, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0329, Reg Loss = 6.6183, Reconstruct Loss = 0.0005, Cls Loss = 0.0318, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0329, Reg Loss = 6.6160, Reconstruct Loss = 0.0005, Cls Loss = 0.0318, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0330, Reg Loss = 6.5927, Reconstruct Loss = 0.0004, Cls Loss = 0.0320, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0333, Reg Loss = 6.5779, Reconstruct Loss = 0.0004,

100%|██████████| 157/157 [00:03<00:00, 42.94it/s]


Epoch [175/200], Validation Loss: 1.7669, Validation Accuracy: 67.33%



Iteration 0: Loss = 0.0108, Reg Loss = 6.7540, Reconstruct Loss = 0.0000, Cls Loss = 0.0101, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0332, Reg Loss = 6.6439, Reconstruct Loss = 0.0009, Cls Loss = 0.0316, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0320, Reg Loss = 6.6091, Reconstruct Loss = 0.0008, Cls Loss = 0.0306, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0323, Reg Loss = 6.5921, Reconstruct Loss = 0.0007, Cls Loss = 0.0310, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0324, Reg Loss = 6.5692, Reconstruct Loss = 0.0005, Cls Loss = 0.0312, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0328, Reg Loss = 6.5689, Reconstruct Loss = 0.0005, Cls Loss = 0.0316, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0333, Reg Loss = 6.5781, Reconstruct Loss = 0.0006, Cls Loss = 0.0321, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0346, Reg Loss = 6.6035, Reconstruct Loss = 0.0006,

100%|██████████| 157/157 [00:03<00:00, 42.60it/s]


Epoch [176/200], Validation Loss: 1.6330, Validation Accuracy: 68.91%



Iteration 0: Loss = 0.0376, Reg Loss = 6.3544, Reconstruct Loss = 0.0000, Cls Loss = 0.0370, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0314, Reg Loss = 6.5685, Reconstruct Loss = 0.0004, Cls Loss = 0.0304, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0328, Reg Loss = 6.5532, Reconstruct Loss = 0.0004, Cls Loss = 0.0318, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0338, Reg Loss = 6.5467, Reconstruct Loss = 0.0004, Cls Loss = 0.0327, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0331, Reg Loss = 6.5448, Reconstruct Loss = 0.0004, Cls Loss = 0.0320, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0329, Reg Loss = 6.5399, Reconstruct Loss = 0.0004, Cls Loss = 0.0318, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0333, Reg Loss = 6.5512, Reconstruct Loss = 0.0004, Cls Loss = 0.0322, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0331, Reg Loss = 6.5482, Reconstruct Loss = 0.0004,

100%|██████████| 157/157 [00:03<00:00, 42.59it/s]


Epoch [177/200], Validation Loss: 1.5123, Validation Accuracy: 69.78%



Iteration 0: Loss = 0.0334, Reg Loss = 6.2825, Reconstruct Loss = 0.0000, Cls Loss = 0.0327, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0346, Reg Loss = 6.6753, Reconstruct Loss = 0.0009, Cls Loss = 0.0330, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0340, Reg Loss = 6.6620, Reconstruct Loss = 0.0008, Cls Loss = 0.0325, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0324, Reg Loss = 6.6035, Reconstruct Loss = 0.0006, Cls Loss = 0.0311, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0333, Reg Loss = 6.5880, Reconstruct Loss = 0.0006, Cls Loss = 0.0320, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0333, Reg Loss = 6.5862, Reconstruct Loss = 0.0007, Cls Loss = 0.0320, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0334, Reg Loss = 6.5701, Reconstruct Loss = 0.0006, Cls Loss = 0.0321, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0336, Reg Loss = 6.5592, Reconstruct Loss = 0.0006,

100%|██████████| 157/157 [00:03<00:00, 43.05it/s]


Epoch [178/200], Validation Loss: 1.6970, Validation Accuracy: 69.06%



Iteration 0: Loss = 0.0694, Reg Loss = 6.5431, Reconstruct Loss = 0.0000, Cls Loss = 0.0688, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0330, Reg Loss = 6.5889, Reconstruct Loss = 0.0006, Cls Loss = 0.0317, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0316, Reg Loss = 6.5600, Reconstruct Loss = 0.0006, Cls Loss = 0.0304, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0342, Reg Loss = 6.5613, Reconstruct Loss = 0.0006, Cls Loss = 0.0329, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0340, Reg Loss = 6.5524, Reconstruct Loss = 0.0006, Cls Loss = 0.0328, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0329, Reg Loss = 6.5533, Reconstruct Loss = 0.0006, Cls Loss = 0.0317, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0329, Reg Loss = 6.5610, Reconstruct Loss = 0.0006, Cls Loss = 0.0317, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0330, Reg Loss = 6.5687, Reconstruct Loss = 0.0006,

100%|██████████| 157/157 [00:03<00:00, 43.01it/s]


Epoch [179/200], Validation Loss: 1.3048, Validation Accuracy: 72.11%



Checkpoint saved at epoch 178 with accuracy: 72.11%
Iteration 0: Loss = 0.0847, Reg Loss = 6.6929, Reconstruct Loss = 0.0000, Cls Loss = 0.0840, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0350, Reg Loss = 6.5359, Reconstruct Loss = 0.0003, Cls Loss = 0.0341, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0354, Reg Loss = 6.5563, Reconstruct Loss = 0.0004, Cls Loss = 0.0343, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0351, Reg Loss = 6.5615, Reconstruct Loss = 0.0005, Cls Loss = 0.0340, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0351, Reg Loss = 6.5347, Reconstruct Loss = 0.0004, Cls Loss = 0.0340, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0355, Reg Loss = 6.5256, Reconstruct Loss = 0.0005, Cls Loss = 0.0344, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0350, Reg Loss = 6.5123, Reconstruct Loss = 0.0004, Cls Loss = 0.0339, Learning rate = 1.0000e-04
Iteration 700: Loss = 0

100%|██████████| 157/157 [00:03<00:00, 42.43it/s]


Epoch [180/200], Validation Loss: 1.9885, Validation Accuracy: 63.74%



Iteration 0: Loss = 0.0032, Reg Loss = 6.2080, Reconstruct Loss = 0.0000, Cls Loss = 0.0025, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0361, Reg Loss = 6.5243, Reconstruct Loss = 0.0006, Cls Loss = 0.0348, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0326, Reg Loss = 6.5388, Reconstruct Loss = 0.0005, Cls Loss = 0.0314, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0330, Reg Loss = 6.5046, Reconstruct Loss = 0.0004, Cls Loss = 0.0320, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0333, Reg Loss = 6.5187, Reconstruct Loss = 0.0005, Cls Loss = 0.0322, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0335, Reg Loss = 6.5066, Reconstruct Loss = 0.0004, Cls Loss = 0.0324, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0340, Reg Loss = 6.5052, Reconstruct Loss = 0.0004, Cls Loss = 0.0329, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0340, Reg Loss = 6.4882, Reconstruct Loss = 0.0004,

100%|██████████| 157/157 [00:03<00:00, 42.23it/s]


Epoch [181/200], Validation Loss: 1.8185, Validation Accuracy: 65.06%



Iteration 0: Loss = 0.0256, Reg Loss = 6.6153, Reconstruct Loss = 0.0000, Cls Loss = 0.0249, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0370, Reg Loss = 6.5105, Reconstruct Loss = 0.0005, Cls Loss = 0.0358, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0377, Reg Loss = 6.5521, Reconstruct Loss = 0.0007, Cls Loss = 0.0363, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0361, Reg Loss = 6.5183, Reconstruct Loss = 0.0005, Cls Loss = 0.0350, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0355, Reg Loss = 6.5417, Reconstruct Loss = 0.0006, Cls Loss = 0.0342, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0342, Reg Loss = 6.5255, Reconstruct Loss = 0.0006, Cls Loss = 0.0330, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0345, Reg Loss = 6.5192, Reconstruct Loss = 0.0006, Cls Loss = 0.0332, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0339, Reg Loss = 6.5259, Reconstruct Loss = 0.0007,

100%|██████████| 157/157 [00:03<00:00, 42.79it/s]


Epoch [182/200], Validation Loss: 1.6794, Validation Accuracy: 68.29%



Iteration 0: Loss = 0.0052, Reg Loss = 6.8094, Reconstruct Loss = 0.0000, Cls Loss = 0.0045, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0340, Reg Loss = 6.4761, Reconstruct Loss = 0.0005, Cls Loss = 0.0328, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0356, Reg Loss = 6.5109, Reconstruct Loss = 0.0007, Cls Loss = 0.0343, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0337, Reg Loss = 6.5139, Reconstruct Loss = 0.0007, Cls Loss = 0.0323, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0332, Reg Loss = 6.5136, Reconstruct Loss = 0.0007, Cls Loss = 0.0319, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0329, Reg Loss = 6.5098, Reconstruct Loss = 0.0007, Cls Loss = 0.0315, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0332, Reg Loss = 6.5009, Reconstruct Loss = 0.0007, Cls Loss = 0.0319, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0333, Reg Loss = 6.4912, Reconstruct Loss = 0.0006,

100%|██████████| 157/157 [00:03<00:00, 42.58it/s]


Epoch [183/200], Validation Loss: 1.4918, Validation Accuracy: 70.29%



Iteration 0: Loss = 0.0454, Reg Loss = 6.2736, Reconstruct Loss = 0.0000, Cls Loss = 0.0448, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0379, Reg Loss = 6.4101, Reconstruct Loss = 0.0004, Cls Loss = 0.0369, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0343, Reg Loss = 6.4180, Reconstruct Loss = 0.0005, Cls Loss = 0.0331, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0340, Reg Loss = 6.4134, Reconstruct Loss = 0.0005, Cls Loss = 0.0328, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0349, Reg Loss = 6.3893, Reconstruct Loss = 0.0005, Cls Loss = 0.0338, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0340, Reg Loss = 6.3918, Reconstruct Loss = 0.0005, Cls Loss = 0.0329, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0339, Reg Loss = 6.4081, Reconstruct Loss = 0.0005, Cls Loss = 0.0328, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0337, Reg Loss = 6.4132, Reconstruct Loss = 0.0005,

100%|██████████| 157/157 [00:03<00:00, 42.70it/s]


Epoch [184/200], Validation Loss: 1.7476, Validation Accuracy: 68.46%



Iteration 0: Loss = 0.0489, Reg Loss = 6.4975, Reconstruct Loss = 0.0000, Cls Loss = 0.0483, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0379, Reg Loss = 6.3808, Reconstruct Loss = 0.0005, Cls Loss = 0.0368, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0380, Reg Loss = 6.3639, Reconstruct Loss = 0.0004, Cls Loss = 0.0370, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0366, Reg Loss = 6.3565, Reconstruct Loss = 0.0004, Cls Loss = 0.0356, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0365, Reg Loss = 6.3519, Reconstruct Loss = 0.0004, Cls Loss = 0.0355, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0352, Reg Loss = 6.3675, Reconstruct Loss = 0.0004, Cls Loss = 0.0342, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0346, Reg Loss = 6.3647, Reconstruct Loss = 0.0004, Cls Loss = 0.0336, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0343, Reg Loss = 6.3781, Reconstruct Loss = 0.0004,

100%|██████████| 157/157 [00:03<00:00, 40.88it/s]


Epoch [185/200], Validation Loss: 1.6376, Validation Accuracy: 66.52%



Iteration 0: Loss = 0.0135, Reg Loss = 6.2823, Reconstruct Loss = 0.0000, Cls Loss = 0.0129, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0335, Reg Loss = 6.4363, Reconstruct Loss = 0.0005, Cls Loss = 0.0324, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0329, Reg Loss = 6.3952, Reconstruct Loss = 0.0004, Cls Loss = 0.0319, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0321, Reg Loss = 6.3728, Reconstruct Loss = 0.0003, Cls Loss = 0.0311, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0323, Reg Loss = 6.3672, Reconstruct Loss = 0.0003, Cls Loss = 0.0313, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0328, Reg Loss = 6.3709, Reconstruct Loss = 0.0004, Cls Loss = 0.0318, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0329, Reg Loss = 6.3848, Reconstruct Loss = 0.0004, Cls Loss = 0.0319, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0330, Reg Loss = 6.3853, Reconstruct Loss = 0.0004,

100%|██████████| 157/157 [00:03<00:00, 42.97it/s]


Epoch [186/200], Validation Loss: 1.3827, Validation Accuracy: 70.51%



Iteration 0: Loss = 0.0244, Reg Loss = 6.1832, Reconstruct Loss = 0.0000, Cls Loss = 0.0238, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0337, Reg Loss = 6.3805, Reconstruct Loss = 0.0004, Cls Loss = 0.0327, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0335, Reg Loss = 6.3589, Reconstruct Loss = 0.0003, Cls Loss = 0.0326, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0337, Reg Loss = 6.3602, Reconstruct Loss = 0.0003, Cls Loss = 0.0328, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0343, Reg Loss = 6.3721, Reconstruct Loss = 0.0003, Cls Loss = 0.0333, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0328, Reg Loss = 6.3607, Reconstruct Loss = 0.0003, Cls Loss = 0.0319, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0325, Reg Loss = 6.3893, Reconstruct Loss = 0.0004, Cls Loss = 0.0314, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0331, Reg Loss = 6.3808, Reconstruct Loss = 0.0004,

100%|██████████| 157/157 [00:03<00:00, 43.18it/s]


Epoch [187/200], Validation Loss: 1.7947, Validation Accuracy: 65.51%



Iteration 0: Loss = 0.0509, Reg Loss = 6.0013, Reconstruct Loss = 0.0000, Cls Loss = 0.0503, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0332, Reg Loss = 6.3259, Reconstruct Loss = 0.0003, Cls Loss = 0.0323, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0338, Reg Loss = 6.3556, Reconstruct Loss = 0.0003, Cls Loss = 0.0328, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0334, Reg Loss = 6.3904, Reconstruct Loss = 0.0004, Cls Loss = 0.0324, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0333, Reg Loss = 6.3905, Reconstruct Loss = 0.0004, Cls Loss = 0.0323, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0329, Reg Loss = 6.3967, Reconstruct Loss = 0.0004, Cls Loss = 0.0319, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0326, Reg Loss = 6.3906, Reconstruct Loss = 0.0004, Cls Loss = 0.0316, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0323, Reg Loss = 6.3840, Reconstruct Loss = 0.0004,

100%|██████████| 157/157 [00:03<00:00, 43.06it/s]


Epoch [188/200], Validation Loss: 1.7400, Validation Accuracy: 67.50%



Iteration 0: Loss = 0.0239, Reg Loss = 5.9763, Reconstruct Loss = 0.0000, Cls Loss = 0.0233, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0322, Reg Loss = 6.3131, Reconstruct Loss = 0.0004, Cls Loss = 0.0311, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0330, Reg Loss = 6.2856, Reconstruct Loss = 0.0005, Cls Loss = 0.0319, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0318, Reg Loss = 6.3092, Reconstruct Loss = 0.0005, Cls Loss = 0.0307, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0326, Reg Loss = 6.3118, Reconstruct Loss = 0.0005, Cls Loss = 0.0315, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0331, Reg Loss = 6.2909, Reconstruct Loss = 0.0004, Cls Loss = 0.0321, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0327, Reg Loss = 6.2952, Reconstruct Loss = 0.0005, Cls Loss = 0.0315, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0331, Reg Loss = 6.3103, Reconstruct Loss = 0.0006,

100%|██████████| 157/157 [00:03<00:00, 43.07it/s]


Epoch [189/200], Validation Loss: 1.5644, Validation Accuracy: 67.59%



Iteration 0: Loss = 0.0146, Reg Loss = 5.9276, Reconstruct Loss = 0.0000, Cls Loss = 0.0140, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0346, Reg Loss = 6.3830, Reconstruct Loss = 0.0008, Cls Loss = 0.0332, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0319, Reg Loss = 6.3438, Reconstruct Loss = 0.0006, Cls Loss = 0.0307, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0334, Reg Loss = 6.3158, Reconstruct Loss = 0.0005, Cls Loss = 0.0323, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0331, Reg Loss = 6.3362, Reconstruct Loss = 0.0005, Cls Loss = 0.0320, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0328, Reg Loss = 6.3270, Reconstruct Loss = 0.0005, Cls Loss = 0.0317, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0322, Reg Loss = 6.3216, Reconstruct Loss = 0.0005, Cls Loss = 0.0311, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0322, Reg Loss = 6.3201, Reconstruct Loss = 0.0004,

100%|██████████| 157/157 [00:03<00:00, 42.77it/s]


Epoch [190/200], Validation Loss: 1.7520, Validation Accuracy: 65.84%



Iteration 0: Loss = 0.0121, Reg Loss = 6.0286, Reconstruct Loss = 0.0000, Cls Loss = 0.0115, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0352, Reg Loss = 6.4340, Reconstruct Loss = 0.0006, Cls Loss = 0.0339, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0354, Reg Loss = 6.3566, Reconstruct Loss = 0.0005, Cls Loss = 0.0343, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0360, Reg Loss = 6.3388, Reconstruct Loss = 0.0005, Cls Loss = 0.0349, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0349, Reg Loss = 6.3442, Reconstruct Loss = 0.0005, Cls Loss = 0.0337, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0342, Reg Loss = 6.3354, Reconstruct Loss = 0.0005, Cls Loss = 0.0330, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0347, Reg Loss = 6.3230, Reconstruct Loss = 0.0004, Cls Loss = 0.0336, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0346, Reg Loss = 6.3265, Reconstruct Loss = 0.0005,

100%|██████████| 157/157 [00:03<00:00, 42.79it/s]


Epoch [191/200], Validation Loss: 1.6456, Validation Accuracy: 69.08%



Iteration 0: Loss = 0.0091, Reg Loss = 6.0863, Reconstruct Loss = 0.0000, Cls Loss = 0.0085, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0334, Reg Loss = 6.2990, Reconstruct Loss = 0.0005, Cls Loss = 0.0323, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0345, Reg Loss = 6.3053, Reconstruct Loss = 0.0005, Cls Loss = 0.0334, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0344, Reg Loss = 6.2955, Reconstruct Loss = 0.0004, Cls Loss = 0.0333, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0333, Reg Loss = 6.3037, Reconstruct Loss = 0.0004, Cls Loss = 0.0322, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0329, Reg Loss = 6.3206, Reconstruct Loss = 0.0005, Cls Loss = 0.0318, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0328, Reg Loss = 6.3117, Reconstruct Loss = 0.0004, Cls Loss = 0.0317, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0334, Reg Loss = 6.3235, Reconstruct Loss = 0.0004,

100%|██████████| 157/157 [00:03<00:00, 43.10it/s]


Epoch [192/200], Validation Loss: 1.6000, Validation Accuracy: 69.59%



Iteration 0: Loss = 0.0233, Reg Loss = 6.2235, Reconstruct Loss = 0.0000, Cls Loss = 0.0227, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0315, Reg Loss = 6.3410, Reconstruct Loss = 0.0005, Cls Loss = 0.0304, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0318, Reg Loss = 6.3657, Reconstruct Loss = 0.0006, Cls Loss = 0.0306, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0321, Reg Loss = 6.3782, Reconstruct Loss = 0.0007, Cls Loss = 0.0308, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0312, Reg Loss = 6.3556, Reconstruct Loss = 0.0006, Cls Loss = 0.0300, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0315, Reg Loss = 6.3324, Reconstruct Loss = 0.0005, Cls Loss = 0.0304, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0321, Reg Loss = 6.3070, Reconstruct Loss = 0.0005, Cls Loss = 0.0310, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0321, Reg Loss = 6.2930, Reconstruct Loss = 0.0005,

100%|██████████| 157/157 [00:03<00:00, 42.48it/s]


Epoch [193/200], Validation Loss: 1.7330, Validation Accuracy: 66.25%



Iteration 0: Loss = 0.0078, Reg Loss = 6.2219, Reconstruct Loss = 0.0000, Cls Loss = 0.0072, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0344, Reg Loss = 6.2973, Reconstruct Loss = 0.0005, Cls Loss = 0.0332, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0344, Reg Loss = 6.2457, Reconstruct Loss = 0.0003, Cls Loss = 0.0335, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0343, Reg Loss = 6.2399, Reconstruct Loss = 0.0003, Cls Loss = 0.0333, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0332, Reg Loss = 6.2311, Reconstruct Loss = 0.0003, Cls Loss = 0.0322, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0328, Reg Loss = 6.2253, Reconstruct Loss = 0.0004, Cls Loss = 0.0319, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0329, Reg Loss = 6.2058, Reconstruct Loss = 0.0003, Cls Loss = 0.0319, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0337, Reg Loss = 6.1955, Reconstruct Loss = 0.0003,

100%|██████████| 157/157 [00:03<00:00, 43.09it/s]


Epoch [194/200], Validation Loss: 1.5441, Validation Accuracy: 66.69%



Iteration 0: Loss = 0.0281, Reg Loss = 6.1047, Reconstruct Loss = 0.0000, Cls Loss = 0.0275, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0363, Reg Loss = 6.2411, Reconstruct Loss = 0.0006, Cls Loss = 0.0352, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0362, Reg Loss = 6.2310, Reconstruct Loss = 0.0006, Cls Loss = 0.0350, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0356, Reg Loss = 6.1855, Reconstruct Loss = 0.0005, Cls Loss = 0.0345, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0354, Reg Loss = 6.1430, Reconstruct Loss = 0.0004, Cls Loss = 0.0344, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0345, Reg Loss = 6.1518, Reconstruct Loss = 0.0004, Cls Loss = 0.0335, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0345, Reg Loss = 6.1682, Reconstruct Loss = 0.0005, Cls Loss = 0.0334, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0345, Reg Loss = 6.1880, Reconstruct Loss = 0.0006,

100%|██████████| 157/157 [00:03<00:00, 42.71it/s]


Epoch [195/200], Validation Loss: 1.4597, Validation Accuracy: 68.84%



Iteration 0: Loss = 0.0518, Reg Loss = 5.7452, Reconstruct Loss = 0.0000, Cls Loss = 0.0512, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0329, Reg Loss = 6.1844, Reconstruct Loss = 0.0006, Cls Loss = 0.0318, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0326, Reg Loss = 6.1386, Reconstruct Loss = 0.0003, Cls Loss = 0.0317, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0336, Reg Loss = 6.1334, Reconstruct Loss = 0.0003, Cls Loss = 0.0327, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0339, Reg Loss = 6.1315, Reconstruct Loss = 0.0004, Cls Loss = 0.0330, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0342, Reg Loss = 6.1273, Reconstruct Loss = 0.0004, Cls Loss = 0.0332, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0341, Reg Loss = 6.1315, Reconstruct Loss = 0.0004, Cls Loss = 0.0331, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0337, Reg Loss = 6.1305, Reconstruct Loss = 0.0004,

100%|██████████| 157/157 [00:03<00:00, 43.06it/s]


Epoch [196/200], Validation Loss: 1.5756, Validation Accuracy: 66.56%



Iteration 0: Loss = 0.0388, Reg Loss = 5.8328, Reconstruct Loss = 0.0000, Cls Loss = 0.0382, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0308, Reg Loss = 6.0027, Reconstruct Loss = 0.0001, Cls Loss = 0.0301, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0310, Reg Loss = 6.0306, Reconstruct Loss = 0.0002, Cls Loss = 0.0302, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0307, Reg Loss = 6.0050, Reconstruct Loss = 0.0001, Cls Loss = 0.0300, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0326, Reg Loss = 6.0089, Reconstruct Loss = 0.0001, Cls Loss = 0.0318, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0332, Reg Loss = 6.0205, Reconstruct Loss = 0.0001, Cls Loss = 0.0325, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0336, Reg Loss = 6.0419, Reconstruct Loss = 0.0002, Cls Loss = 0.0328, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0331, Reg Loss = 6.0422, Reconstruct Loss = 0.0002,

100%|██████████| 157/157 [00:03<00:00, 43.04it/s]


Epoch [197/200], Validation Loss: 1.5323, Validation Accuracy: 68.80%



Iteration 0: Loss = 0.0145, Reg Loss = 5.9510, Reconstruct Loss = 0.0000, Cls Loss = 0.0139, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0323, Reg Loss = 6.1088, Reconstruct Loss = 0.0004, Cls Loss = 0.0312, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0340, Reg Loss = 6.1163, Reconstruct Loss = 0.0004, Cls Loss = 0.0330, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0346, Reg Loss = 6.1502, Reconstruct Loss = 0.0005, Cls Loss = 0.0335, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0350, Reg Loss = 6.1298, Reconstruct Loss = 0.0005, Cls Loss = 0.0340, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0357, Reg Loss = 6.1285, Reconstruct Loss = 0.0005, Cls Loss = 0.0346, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0348, Reg Loss = 6.1192, Reconstruct Loss = 0.0005, Cls Loss = 0.0337, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0347, Reg Loss = 6.0981, Reconstruct Loss = 0.0004,

100%|██████████| 157/157 [00:03<00:00, 42.94it/s]


Epoch [198/200], Validation Loss: 1.4041, Validation Accuracy: 70.83%



Iteration 0: Loss = 0.0158, Reg Loss = 5.7267, Reconstruct Loss = 0.0000, Cls Loss = 0.0153, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0323, Reg Loss = 6.0012, Reconstruct Loss = 0.0002, Cls Loss = 0.0315, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0341, Reg Loss = 6.0144, Reconstruct Loss = 0.0003, Cls Loss = 0.0332, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0343, Reg Loss = 6.0131, Reconstruct Loss = 0.0003, Cls Loss = 0.0334, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0342, Reg Loss = 6.0299, Reconstruct Loss = 0.0003, Cls Loss = 0.0333, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0343, Reg Loss = 6.0534, Reconstruct Loss = 0.0003, Cls Loss = 0.0334, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0339, Reg Loss = 6.0863, Reconstruct Loss = 0.0004, Cls Loss = 0.0329, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0337, Reg Loss = 6.0827, Reconstruct Loss = 0.0004,

100%|██████████| 157/157 [00:03<00:00, 42.87it/s]


Epoch [199/200], Validation Loss: 1.3447, Validation Accuracy: 69.71%



Iteration 0: Loss = 0.0046, Reg Loss = 5.8738, Reconstruct Loss = 0.0000, Cls Loss = 0.0040, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0365, Reg Loss = 6.1273, Reconstruct Loss = 0.0003, Cls Loss = 0.0356, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0346, Reg Loss = 6.1375, Reconstruct Loss = 0.0005, Cls Loss = 0.0335, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0333, Reg Loss = 6.1056, Reconstruct Loss = 0.0004, Cls Loss = 0.0323, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.0332, Reg Loss = 6.1009, Reconstruct Loss = 0.0004, Cls Loss = 0.0322, Learning rate = 1.0000e-04
Iteration 500: Loss = 0.0331, Reg Loss = 6.1053, Reconstruct Loss = 0.0004, Cls Loss = 0.0320, Learning rate = 1.0000e-04
Iteration 600: Loss = 0.0333, Reg Loss = 6.0845, Reconstruct Loss = 0.0004, Cls Loss = 0.0323, Learning rate = 1.0000e-04
Iteration 700: Loss = 0.0331, Reg Loss = 6.0974, Reconstruct Loss = 0.0004,

100%|██████████| 157/157 [00:03<00:00, 42.53it/s]

Epoch [200/200], Validation Loss: 1.5814, Validation Accuracy: 68.02%








In [72]:
wandb.finish()

0,1
Cls Loss,▃▃▃▃▃▃▃▃▃▃█▃▃▃▃▃▃▃▃▃▁▃▃▃▂▁▂▃▃▃▃▃▃▃▃▃▂▃▃▃
Learning rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss,▄▄█▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▁▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄
Reconstruct Loss,█▇▇▁▁▆▆▆▁▃▁▆▅▁▇▄▄▅▅▆▅▄▄▆▄▄▃▁▅▄▄▄▃▁▄▄▃▃▁▂
Reg Loss,█▇▇█▇▇▆▆▆▆▆▅▆▆▅▅▅▅▅▅▃▄▄▄▄▄▄▄▄▃▃▃▃▃▃▁▁▂▂▂
Training accuracy,▁▁▄▂▄▂▃▄▄▁▆▄▄▂▅▄▆▄▄▄▂▆▄▃▆▅▄▃▅▄▄▅▅▆▃█▄▂▅▅
Validation Accuracy,▄▃▂▄▃▄▃▁█▄▆▅▂▄▄▄▆▄▅▅▇▅▅▆▅▆▅▆▆█▇▆▄▇▆▆▄▆▇▅
Validation Loss,▅▆▆▅▄█▄▅▃▄▇▃▄▂▄▃▃▂▃▄▅▂▅▃▃▄▁▃▂▃▄▃▂▃▁▃▂▃▂▂

0,1
Cls Loss,0.03206
Learning rate,0.0001
Loss,0.03312
Reconstruct Loss,0.00045
Reg Loss,6.09745
Training accuracy,0.9901
Validation Accuracy,0.6802
Validation Loss,1.58143


### 7 Testing loop

In [73]:
saved_hypernet_path = args.training.save_model_path + '/cifar10_nerf_best.pth'

In [74]:
saved_hypernet_path

'toy/experiments_densenet/dense_27th_experiment/cifar10_nerf_best.pth'

In [75]:
hyper_model_test = get_hypernetwork(args, number_param)

Hyper model type: resmlp
Using scalar 0.1
num_freqs:  16 <class 'int'>


In [76]:
checkpoint = torch.load(saved_hypernet_path, map_location="cpu")  # or "cuda" if using GPU
hyper_model_test.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [77]:
for hidden_dim in range(12, 49):
    # Create a model for this given dimension
    model_trained = create_model(args.model.type,
                                 layers=args.model.layers,
                                 growth=args.model.growth,
                                 compression=args.model.compression,
                                 bottleneck=args.model.bottleneck,
                                 drop_rate=args.model.drop_rate,
                                 path=args.model.pretrained_path,
                                 hidden_dim=hidden_dim).to(device)
    
    # If EMA is specified, apply it
    if ema:
        print('Applying EMA')
        ema.apply()

    # Sample the merged model
    accumulated_model = sample_merge_model(hyper_model_test, model_trained, args, K=100)

    # Validate the merged model
    val_loss, acc = validate_single(accumulated_model, val_loader, val_criterion, args=args)

    # If EMA is specified, restore the original weights after applying EMA
    if ema:
        ema.restore()  # Restore the original weights after applying 
        
    # Save the model
    save_name = os.path.join(args.training.save_model_path, f"cifar10_{accumulated_model.__class__.__name__}_dim{hidden_dim}_single.pth")
    torch.save(accumulated_model.state_dict(),save_name)

    # Print the results
    print(f"Test using model {args.model}: hidden_dim {hidden_dim}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc*100:.2f}%")
    print('\n')

    # Define the directory and filename structure
    filename = f"cifar10_results_{args.experiment.name}.txt"
    filepath = os.path.join(args.training.save_model_path, filename)

    # Write the results. 'a' is used to append the results; a new file will be created if it doesn't exist.
    with open(filepath, "a") as file:
        file.write(f"Hidden_dim: {hidden_dim}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc*100:.2f}%\n")

Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 41.13it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 12, Validation Loss: 1.7081, Validation Accuracy: 69.67%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.76it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 13, Validation Loss: 2.1276, Validation Accuracy: 65.57%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.16it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 14, Validation Loss: 1.6590, Validation Accuracy: 69.03%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.74it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 15, Validation Loss: 1.7688, Validation Accuracy: 67.21%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.68it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 16, Validation Loss: 1.7494, Validation Accuracy: 66.06%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.15it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 17, Validation Loss: 1.7536, Validation Accuracy: 69.89%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 42.91it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 18, Validation Loss: 1.7271, Validation Accuracy: 69.80%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.39it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 19, Validation Loss: 2.0852, Validation Accuracy: 64.32%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.39it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 20, Validation Loss: 1.9742, Validation Accuracy: 66.46%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.64it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 21, Validation Loss: 2.0327, Validation Accuracy: 66.38%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.21it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 22, Validation Loss: 2.2269, Validation Accuracy: 65.04%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.67it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 23, Validation Loss: 1.7365, Validation Accuracy: 69.25%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 42.66it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 24, Validation Loss: 1.7203, Validation Accuracy: 68.68%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.17it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 25, Validation Loss: 1.8274, Validation Accuracy: 66.62%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.37it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 26, Validation Loss: 1.9827, Validation Accuracy: 67.94%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.68it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 27, Validation Loss: 1.8372, Validation Accuracy: 67.14%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.20it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 28, Validation Loss: 1.8756, Validation Accuracy: 67.11%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.04it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 29, Validation Loss: 1.7863, Validation Accuracy: 68.01%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.79it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 30, Validation Loss: 1.7074, Validation Accuracy: 68.26%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 42.95it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 31, Validation Loss: 1.8176, Validation Accuracy: 67.15%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 42.51it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 32, Validation Loss: 1.7440, Validation Accuracy: 68.62%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 42.99it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 33, Validation Loss: 1.7588, Validation Accuracy: 69.14%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.13it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 34, Validation Loss: 1.7246, Validation Accuracy: 69.24%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.54it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 35, Validation Loss: 1.7692, Validation Accuracy: 68.01%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.51it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 36, Validation Loss: 1.9250, Validation Accuracy: 65.85%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 42.74it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 37, Validation Loss: 1.9390, Validation Accuracy: 66.74%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.58it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 38, Validation Loss: 1.8914, Validation Accuracy: 67.98%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.44it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 39, Validation Loss: 1.8607, Validation Accuracy: 67.63%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 42.77it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 40, Validation Loss: 1.9570, Validation Accuracy: 66.33%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.33it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 41, Validation Loss: 1.9986, Validation Accuracy: 66.82%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 42.44it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 42, Validation Loss: 1.9056, Validation Accuracy: 67.05%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.29it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 43, Validation Loss: 1.7248, Validation Accuracy: 68.58%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 42.44it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 44, Validation Loss: 1.7044, Validation Accuracy: 68.64%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.79it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 45, Validation Loss: 1.7596, Validation Accuracy: 68.91%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 42.99it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 46, Validation Loss: 1.8328, Validation Accuracy: 67.31%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.42it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 47, Validation Loss: 1.7939, Validation Accuracy: 67.92%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.55it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 48, Validation Loss: 1.4997, Validation Accuracy: 69.83%


