## Import

In [1]:
import os
import random

In [2]:
import torch
import torch.nn as nn

import wandb

from sklearn.metrics import accuracy_score

In [3]:
from neumeta.models import create_densenet_model as create_model
from neumeta.utils import (
    parse_args, print_omegaconf,
    load_checkpoint, save_checkpoint,
    set_seed,
    get_dataset,
    sample_coordinates, sample_subset, shuffle_coordinates_all,
    get_hypernetwork, get_optimizer,
    sample_weights,
    weighted_regression_loss, validate_single, AverageMeter, EMA,
    sample_merge_model
)

## Functions

### Find max dimension of the model

In [4]:
def find_max_dim(model_cls):
    """Find maximum dimension of the model"""
    # Get the learnable parameters of the model
    checkpoint = model_cls.learnable_parameter 

    # Set the maximum value to the length of the checkpoint
    max_value = len(checkpoint)

    # Iterate over the new model's weight
    for i, (k, tensor) in enumerate(checkpoint.items()):
        # Handle 2D tensors (e.g., weight matrices) 
        if len(tensor.shape) == 4:
            coords = [tensor.shape[0], tensor.shape[1]]
            max_value = max(max_value, max(coords))
        # Handle 1D tensors (e.g., biases)
        elif len(tensor.shape) == 1:
            max_value = max(max_value, tensor.shape[0])
    
    return max_value

### Initialize wandb

In [5]:
def initialize_wandb(config):
    import time
    """
    Initializes Weights and Biases (wandb) with the given configuration.
    
    Args:
        configuration (dict): Configuration parameters for the run.
    """
    # Name the run using current time and configuration name
    run_name = f"{time.strftime('%Y%m%d%H%M%S')}-{config.experiment.name}"
    
    wandb.init(project="dense-inr-trial", name=run_name, config=dict(config), group='cifar10')

### Init model dictionary

In [6]:
def init_model_dict(args, device):
    """
    Initializes a dictionary of models for each dimension in the given range, along with ground truth models for the starting dimension.

    Args:
        args: An object containing the arguments for initializing the models.

    Returns:
        dim_dict: A dictionary containing the models for each dimension, along with their corresponding coordinates, keys, indices, size, and ground truth models.
        gt_model_dict: A dictionary containing the ground truth models for the starting dimension.
    """
    dim_dict = {}
    gt_model_dict = {}
    
    # Create a model for each dimension in dimensions range
    for dim in args.dimensions.range:
        model_cls = create_model(args.model.type,
                                 layers=args.model.layers,
                                 growth=args.model.growth,
                                 compression=args.model.compression,
                                 bottleneck=args.model.bottleneck,
                                 drop_rate=args.model.drop_rate,
                                 hidden_dim=dim,
                                 path=args.model.pretrained_path).to(device)
        # Sample the coordinates, keys, indices, and the size for the model
        coords_tensor, keys_list, indices_list, size_list = sample_coordinates(model_cls)
        # Add the model, coordinates, keys, indices, size, and key mask to the dictionary
        dim_dict[f"{dim}"] = (model_cls, coords_tensor, keys_list, indices_list, size_list, None)

        # Print to makes line better
        print('\n')
        
        # If the dimension is the starting dimension (the dimension of pretrained_model), add the ground truth model to the dictionary
        if dim == args.dimensions.start:
            print(f"Loading model for dim {dim}")
            model_trained = create_model(args.model.type,
                                         layers=args.model.layers,
                                         growth=args.model.growth,
                                         compression=args.model.compression,
                                         bottleneck=args.model.bottleneck,
                                         drop_rate=args.model.drop_rate,
                                         path=args.model.pretrained_path,
                                         smooth=True,
                                         hidden_dim=dim).to(device)
            model_trained.eval()
            gt_model_dict[f'{dim}'] = model_trained

    
    return dim_dict, gt_model_dict

### Training function

In [7]:
# Function to train the model for one epoch
def train_one_epoch(model, train_loader, optimizer, criterion, dim_dict, gt_model_dict, epoch_idx, ema=None, args=None, device='cpu'):
    # Set the model to training mode
    model.train()
    total_loss = 0.0

    # Initialize AverageMeter objects to track the losses
    losses = AverageMeter()
    cls_losses = AverageMeter()
    reg_losses = AverageMeter()
    reconstruct_losses = AverageMeter()

    # Training accuracy
    preds = []
    gt = []

    # Iterate over the training data
    for batch_idx, (x, target) in enumerate(train_loader):
        # Zero the gradients
        optimizer.zero_grad()

        # Preprocess input
        # ------------------------------------------------------------------------------------------------------
        # Move the data to the device
        x, target = x.to(device), target.to(device)
        # Choose a random hidden dimension
        hidden_dim = random.choice(args.dimensions.range)
        # Get the model class, coordinates, keys, indices, size, and key mask for the chosen dimension
        model_cls, coords_tensor, keys_list, indices_list, size_list, key_mask = dim_dict[f"{hidden_dim}"]
        # Sample a subset the input tensor of the coordinates, keys, indices, size, and selected keys
        coords_tensor, keys_list, indices_list, size_list, selected_keys = sample_subset(coords_tensor,
                                                                                         keys_list,
                                                                                         indices_list,
                                                                                         size_list,
                                                                                         key_mask,
                                                                                         ratio=args.ratio)
        # Add noise to the coordinates if specified
        if args.training.coordinate_noise > 0.0:
            coords_tensor = coords_tensor + (torch.rand_like(coords_tensor) - 0.5) * args.training.coordinate_noise


        # Main task of hypernetwork and target network
        # ------------------------------------------------------------------------------------------------------
        # Sample the weights for the target model using hypernetwork
        model_cls, reconstructed_weights = sample_weights(model, model_cls,
                                                          coords_tensor, keys_list, indices_list, size_list, key_mask, selected_keys,
                                                          device=device, NORM=args.dimensions.norm)
        # Forward pass
        predict = model_cls(x)
        
        # Sample test model to see training accuracy

        pred = torch.argmax(predict, dim=-1)

        preds.append(pred)
        gt.append(target)

        # Compute losses
        # ------------------------------------------------------------------------------------------------------
        # Compute classification loss
        cls_loss = criterion(predict, target) 
        # Compute regularization loss
        reg_loss = sum([torch.norm(w, p=2) for w in reconstructed_weights])
        # Compute reconstruction loss if ground truth model is available
        if f"{hidden_dim}" in gt_model_dict:
            gt_model = gt_model_dict[f"{hidden_dim}"]
            gt_selected_weights = [
                w for k, w in gt_model.learnable_parameter.items() if k in selected_keys]

            reconstruct_loss = weighted_regression_loss(
                reconstructed_weights, gt_selected_weights)
        else:
            reconstruct_loss = torch.tensor(0.0)
        # Compute the total loss
        loss = args.hyper_model.loss_weight.ce_weight * cls_loss + args.hyper_model.loss_weight.reg_weight * \
            reg_loss + args.hyper_model.loss_weight.recon_weight * reconstruct_loss


        # Compute gradients and update weights
        # ------------------------------------------------------------------------------------------------------
        # Zero the gradients of the updated weights
        for updated_weight in model_cls.parameters():
            updated_weight.grad = None

        # Compute the gradients of the reconstructed weights
        loss.backward(retain_graph=True)
        torch.autograd.backward(reconstructed_weights, [
                                w.grad for k, w in model_cls.named_parameters() if k in selected_keys])
        
        # Clip the gradients if specified
        if args.training.get('clip_grad', 0.0) > 0:
            torch.nn.utils.clip_grad_value_(
                model.parameters(), args.training.clip_grad)
            
        # Update the weights
        optimizer.step()

        # Update the EMA if specified
        if ema:
            ema.update()  # Update the EMA after each training step
        total_loss += loss.item()

        # Update the AverageMeter objects
        losses.update(loss.item())
        cls_losses.update(cls_loss.item())
        reg_losses.update(reg_loss.item())
        reconstruct_losses.update(reconstruct_loss.item())

        # Log (or plot) losses
        # ------------------------------------------------------------------------------------------------------
        # Log the losses and learning rate to wandb
        if batch_idx % args.experiment.log_interval == 0:
            wandb.log({
                "Loss": losses.avg,
                "Cls Loss": cls_losses.avg,
                "Reg Loss": reg_losses.avg,
                "Reconstruct Loss": reconstruct_losses.avg,
                "Learning rate": optimizer.param_groups[0]['lr']
            }, step=batch_idx + epoch_idx * len(train_loader))
            # Print the losses and learning rate
            print(
                f"Iteration {batch_idx}: Loss = {losses.avg:.4f}, Reg Loss = {reg_losses.avg:.4f}, Reconstruct Loss = {reconstruct_losses.avg:.4f}, Cls Loss = {cls_losses.avg:.4f}, Learning rate = {optimizer.param_groups[0]['lr']:.4e}")
    
    train_acc = accuracy_score(torch.cat(gt).cpu().numpy(), torch.cat(preds).cpu().numpy())

    wandb.log({
        "Training accuracy": train_acc
    })

    # Returns the training loss, structure of network in each dimension, and the original structure of pretrained network
    return losses.avg, dim_dict, gt_model_dict, train_acc

## Main

### 0 Set device to GPU

In [8]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

### 1 Parsing arguments for input

In [9]:
CONFIG_PATH = 'neumeta/config/densenet_inr_train/dense_7th_experiment.yaml'
RATIO = '1.0'
CHECKPOINT_PATH = 'toy/experiments_densenet/dense_7th_experiment/cifar10_nerf_best.pth'

In [10]:
argv_train = ['--config', CONFIG_PATH, '--ratio', RATIO, '--resume_from', CHECKPOINT_PATH]

In [11]:
args = parse_args(argv_train)  # Parse arguments
print_omegaconf(args)  # Print arguments

+--------------------------------------+------------------------------------------------------------------------------------------------------+
|                 Key                  |                                                Value                                                 |
+--------------------------------------+------------------------------------------------------------------------------------------------------+
|           experiment.name            |                                         dense_7th_experiment                                         |
|        experiment.num_epochs         |                                                  50                                                  |
|       experiment.log_interval        |                                                  50                                                  |
|       experiment.eval_interval       |                                                  1                                             

In [12]:
set_seed(args.experiment.seed)

Setting seed... 42 for reproducibility


### 2 Get training and validation dataloader

In [13]:
train_loader, val_loader = get_dataset('cifar10', args.training.batch_size, strong_transform=args.training.get('strong_aug', None))

Using dataset: cifar10 with batch size: 64 and strong transform: None


### 3 Create target model

#### 3.0 Create the model

In [14]:
model = create_model(args.model.type,
                     layers=args.model.layers,
                     growth=args.model.growth,
                     compression=args.model.compression,
                     bottleneck=args.model.bottleneck,
                     drop_rate=args.model.drop_rate,
                     hidden_dim=args.dimensions.start,
                     path=args.model.pretrained_path).to(device)

Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


#### 3.1 Print the structure and shape of the model

In [15]:
model

DenseNet3(
  (conv1): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (block1): DenseBlock(
    (layer): Sequential(
      (0): BottleneckBlock(
        (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv1): Conv2d(24, 48, kernel_size=(1, 1), stride=(1, 1))
        (bn2): Identity()
        (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (dropout): Dropout(p=0.0, inplace=False)
      )
      (1): BottleneckBlock(
        (bn1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv1): Conv2d(36, 48, kernel_size=(1, 1), stride=(1, 1))
        (bn2): Identity()
        (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (dropout): Dropout(p=0.0, inplace=False)
      )
      (2): BottleneckBlock(
        (bn1): Bat

In [16]:
for i, (k, tensor) in enumerate(model.learnable_parameter.items()):
    print(k, tensor.shape)

block3.layer.5.conv1.weight torch.Size([48, 120, 1, 1])
block3.layer.5.conv1.bias torch.Size([48])
block3.layer.5.conv2.weight torch.Size([12, 48, 3, 3])


In [17]:
# Print the maximum dimension of the model
print(f'Maximum DIM: {find_max_dim(model)}')

Maximum DIM: 120


#### 3.2 Validate the accuracy of pretrained model

In [18]:
# Validate the model for the starting dimension (its pretrained form)
val_loss, acc = validate_single(model, val_loader, nn.CrossEntropyLoss(), args=args)
print(f'Initial Permutated model Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc * 100:.2f}%')

100%|██████████| 157/157 [00:03<00:00, 41.35it/s]

Initial Permutated model Validation Loss: 0.3239, Validation Accuracy: 91.93%





In [19]:
checkpoint = model.learnable_parameter
number_param = len(checkpoint)

In [20]:
# Print the keys of the parameters and the number of parameters
print(f"Parameters keys: {model.keys}")
print(f"Number of parameters to be learned: {number_param}")

Parameters keys: ['block3.layer.5.conv1.weight', 'block3.layer.5.conv1.bias', 'block3.layer.5.conv2.weight']
Number of parameters to be learned: 3


### 4 Create hypernetwork

#### 4.0 Create the model

In [21]:
# Get the hypermodel
hyper_model = get_hypernetwork(args, number_param)

Hyper model type: resmlp
Using scalar 0.1
num_freqs:  16 <class 'int'>


#### 4.1 Print model structure

In [22]:
hyper_model

NeRF_ResMLP_Compose(
  (positional_encoding): PositionalEncoding()
  (model): ModuleList(
    (0-2): 3 x NeRF_MLP_Residual_Scaled(
      (initial_layer): Linear(in_features=198, out_features=128, bias=True)
      (residual_blocks): ModuleList(
        (0-2): 3 x Linear(in_features=128, out_features=128, bias=True)
      )
      (scalars): ParameterList(
          (0): Parameter containing: [torch.float32 of size  (cuda:0)]
          (1): Parameter containing: [torch.float32 of size  (cuda:0)]
          (2): Parameter containing: [torch.float32 of size  (cuda:0)]
      )
      (act): ReLU(inplace=True)
      (output_layer): Linear(in_features=128, out_features=9, bias=True)
    )
  )
)

#### 4.2 Initialize EMA to track only a smooth version of the model weight

In [23]:
# Initialize the EMA
ema = EMA(hyper_model, decay=args.hyper_model.ema_decay)

### 5 Get loss function, optimizer and scheduler

In [24]:
criterion, val_criterion, optimizer, scheduler = get_optimizer(args, hyper_model)

In [25]:
print(f'Criterion: {criterion}\nVal_criterion: {val_criterion}\nOptimizer: {optimizer}\nScheduler: {scheduler}')

Criterion: CrossEntropyLoss()
Val_criterion: CrossEntropyLoss()
Optimizer: AdamW (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    initial_lr: 0.0001
    lr: 0.0001
    maximize: False
    weight_decay: 0.01
)
Scheduler: <torch.optim.lr_scheduler.MultiStepLR object at 0x00000209D6438490>


### 6 Training loop

#### 6.1 Initialize training parameters

In [26]:
# Initialize the starting epoch and best accuracy
start_epoch = 0
best_acc = 0.0

#### 6.2 Directory to save the model

In [27]:
# Create the directory to save the model
os.makedirs(args.training.save_model_path, exist_ok=True)

#### 6.3 Resume training loop

In [28]:
args.resume_from

'toy/experiments_densenet/dense_7th_experiment/cifar10_nerf_best.pth'

In [29]:
args.resume_from = False

In [30]:
if args.resume_from:
        print(f"Resuming from checkpoint: {args.resume_from}")
        checkpoint_info = load_checkpoint(args.resume_from, hyper_model, optimizer, ema)
        start_epoch = checkpoint_info['epoch']
        best_acc = checkpoint_info['best_acc']
        print(f"Resuming from epoch: {start_epoch}, best accuracy: {best_acc*100:.2f}%")
        # Note: If there are more elements to retrieve, do so here.

#### 6.4 Initialize model dictionary for each dimension and shuffle it

In [31]:
# Initialize model dictionary
dim_dict, gt_model_dict = init_model_dict(args, device)

Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/exper

In [32]:
gt_model_dict['48']

DenseNet3(
  (conv1): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (block1): DenseBlock(
    (layer): Sequential(
      (0): BottleneckBlock(
        (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv1): Conv2d(24, 48, kernel_size=(1, 1), stride=(1, 1))
        (bn2): Identity()
        (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (dropout): Dropout(p=0.0, inplace=False)
      )
      (1): BottleneckBlock(
        (bn1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv1): Conv2d(36, 48, kernel_size=(1, 1), stride=(1, 1))
        (bn2): Identity()
        (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (dropout): Dropout(p=0.0, inplace=False)
      )
      (2): BottleneckBlock(
        (bn1): Bat

In [33]:
# Validate the model for the starting dimension (its pretrained form)
val_loss, acc = validate_single(gt_model_dict['48'], val_loader, nn.CrossEntropyLoss(), args=args)
print(f'Initial Permutated model Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc * 100:.2f}%')

100%|██████████| 157/157 [00:03<00:00, 43.28it/s]

Initial Permutated model Validation Loss: 0.3239, Validation Accuracy: 91.93%





In [34]:
dim_dict

{'24': (DenseNet3(
    (conv1): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (block1): DenseBlock(
      (layer): Sequential(
        (0): BottleneckBlock(
          (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv1): Conv2d(24, 48, kernel_size=(1, 1), stride=(1, 1))
          (bn2): Identity()
          (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (dropout): Dropout(p=0.0, inplace=False)
        )
        (1): BottleneckBlock(
          (bn1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv1): Conv2d(36, 48, kernel_size=(1, 1), stride=(1, 1))
          (bn2): Identity()
          (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (dropout): Dropout(p=0.0, inplace=False)
        )


In [35]:
dim_dict = shuffle_coordinates_all(dim_dict)
dim_dict

{'24': (DenseNet3(
    (conv1): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (block1): DenseBlock(
      (layer): Sequential(
        (0): BottleneckBlock(
          (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv1): Conv2d(24, 48, kernel_size=(1, 1), stride=(1, 1))
          (bn2): Identity()
          (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (dropout): Dropout(p=0.0, inplace=False)
        )
        (1): BottleneckBlock(
          (bn1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv1): Conv2d(36, 48, kernel_size=(1, 1), stride=(1, 1))
          (bn2): Identity()
          (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (dropout): Dropout(p=0.0, inplace=False)
        )


#### 6.5 Initialize wandb for plotting

In [36]:
initialize_wandb(args)

[34m[1mwandb[0m: Currently logged in as: [33mefradosuryadi[0m ([33mefradosuryadi-universitas-indonesia[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


#### 6.6 Hypernetwork training loop

In [37]:
args.experiment.num_epochs

50

In [38]:
# Iterate over the epochs
for epoch in range(start_epoch, args.experiment.num_epochs):
    # Train the hypernetwork to generate a model with random dimension for one epoch
    train_loss, dim_dict, gt_model_dict, train_acc = train_one_epoch(hyper_model, train_loader, optimizer, criterion, 
                                                                     dim_dict, gt_model_dict, epoch_idx=epoch, ema=ema, 
                                                                     args=args, device=device)
    # Step the scheduler
    scheduler.step()

    # Print the training loss and learning rate
    print(f"Epoch [{epoch+1}/{args.experiment.num_epochs}], Training Loss: {train_loss:.4f}, Training Accuracy: {train_acc*100:.2f}, Learning Rate: {scheduler.get_last_lr()[0]:.6f}")

    # If it's time to evaluate the model
    if (epoch + 1) % args.experiment.eval_interval == 0:
        # Apply EMA if it is specified
        if ema:
            ema.apply()  # Save the weights of original model created before training_loop
        
        # Sample the merged model (create model of same structure before training loop by using the hypernetwork)
        # And then test the performance of the hypernetwork by seeing how good it is in generating the weights
        model = sample_merge_model(hyper_model, model, args) 
        # Validate the merged model
        val_loss, acc = validate_single(model, val_loader, val_criterion, args=args)

        # If EMA is specified, restore the original weights
        if ema:
            ema.restore()  # Restore the original weights to the weights of the pretrained networks

        # Log the validation loss and accuracy to wandb
        wandb.log({
            "Validation Loss": val_loss,
            "Validation Accuracy": acc
        })
        # Print the validation loss and accuracy
        print(f"Epoch [{epoch+1}/{args.experiment.num_epochs}], Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc*100:.2f}%")
        print('\n\n')

        # Save the checkpoint if the accuracy is better than the previous best
        if acc > best_acc:
            best_acc = acc
            save_checkpoint(f"{args.training.save_model_path}/cifar10_nerf_best.pth",hyper_model,optimizer,ema,epoch,best_acc)
            print(f"Checkpoint saved at epoch {epoch} with accuracy: {best_acc*100:.2f}%")


Iteration 0: Loss = 0.1346, Reg Loss = 2.0720, Reconstruct Loss = 0.0000, Cls Loss = 0.1344, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.2450, Reg Loss = 1.7412, Reconstruct Loss = 0.0025, Cls Loss = 0.2423, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.2470, Reg Loss = 1.5820, Reconstruct Loss = 0.0018, Cls Loss = 0.2450, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.2416, Reg Loss = 1.4994, Reconstruct Loss = 0.0024, Cls Loss = 0.2391, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.2393, Reg Loss = 1.5248, Reconstruct Loss = 0.0032, Cls Loss = 0.2360, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.2377, Reg Loss = 1.5470, Reconstruct Loss = 0.0032, Cls Loss = 0.2344, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.2370, Reg Loss = 1.5614, Reconstruct Loss = 0.0026, Cls Loss = 0.2342, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.2346, Reg Loss = 1.5800, Reconstruct Loss = 0.0024, Cls Loss = 0.2321, Learning rate = 1.0000e-04
Iteration 400: Loss = 0.233

100%|██████████| 157/157 [00:03<00:00, 42.10it/s]


Epoch [1/50], Validation Loss: 1.3160, Validation Accuracy: 66.57%



Checkpoint saved at epoch 0 with accuracy: 66.57%
Iteration 0: Loss = 0.1282, Reg Loss = 1.5693, Reconstruct Loss = 0.0000, Cls Loss = 0.1281, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.2168, Reg Loss = 1.6033, Reconstruct Loss = 0.0032, Cls Loss = 0.2135, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.2211, Reg Loss = 1.5685, Reconstruct Loss = 0.0028, Cls Loss = 0.2181, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.2198, Reg Loss = 1.5530, Reconstruct Loss = 0.0023, Cls Loss = 0.2174, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.2207, Reg Loss = 1.5684, Reconstruct Loss = 0.0021, Cls Loss = 0.2184, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.2220, Reg Loss = 1.6360, Reconstruct Loss = 0.0021, Cls Loss = 0.2198, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.2204, Reg Loss = 1.6695, Reconstruct Loss = 0.0022, Cls Loss = 0.2181, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.2225,

100%|██████████| 157/157 [00:03<00:00, 42.54it/s]


Epoch [2/50], Validation Loss: 1.3173, Validation Accuracy: 66.71%



Checkpoint saved at epoch 1 with accuracy: 66.71%
Iteration 0: Loss = 0.1920, Reg Loss = 1.7586, Reconstruct Loss = 0.0000, Cls Loss = 0.1918, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.2046, Reg Loss = 1.7282, Reconstruct Loss = 0.0014, Cls Loss = 0.2029, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.2110, Reg Loss = 1.7273, Reconstruct Loss = 0.0011, Cls Loss = 0.2098, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.2110, Reg Loss = 1.7534, Reconstruct Loss = 0.0009, Cls Loss = 0.2099, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.2096, Reg Loss = 1.7913, Reconstruct Loss = 0.0014, Cls Loss = 0.2080, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.2099, Reg Loss = 1.7963, Reconstruct Loss = 0.0016, Cls Loss = 0.2081, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.2107, Reg Loss = 1.7834, Reconstruct Loss = 0.0016, Cls Loss = 0.2089, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.2131,

100%|██████████| 157/157 [00:03<00:00, 43.01it/s]


Epoch [3/50], Validation Loss: 1.3298, Validation Accuracy: 64.87%



Iteration 0: Loss = 0.1723, Reg Loss = 2.3073, Reconstruct Loss = 0.0000, Cls Loss = 0.1720, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.2026, Reg Loss = 2.1603, Reconstruct Loss = 0.0012, Cls Loss = 0.2012, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.2037, Reg Loss = 2.2353, Reconstruct Loss = 0.0010, Cls Loss = 0.2026, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.2058, Reg Loss = 2.2953, Reconstruct Loss = 0.0014, Cls Loss = 0.2041, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.2067, Reg Loss = 2.3127, Reconstruct Loss = 0.0015, Cls Loss = 0.2050, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.2060, Reg Loss = 2.3357, Reconstruct Loss = 0.0013, Cls Loss = 0.2044, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.2035, Reg Loss = 2.3540, Reconstruct Loss = 0.0014, Cls Loss = 0.2018, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.2045, Reg Loss = 2.3851, Reconstruct Loss = 0.0014, Cls

100%|██████████| 157/157 [00:03<00:00, 42.60it/s]


Epoch [4/50], Validation Loss: 1.3717, Validation Accuracy: 63.39%



Iteration 0: Loss = 0.2352, Reg Loss = 2.5764, Reconstruct Loss = 0.0000, Cls Loss = 0.2350, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1958, Reg Loss = 2.3673, Reconstruct Loss = 0.0022, Cls Loss = 0.1933, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1917, Reg Loss = 2.4214, Reconstruct Loss = 0.0020, Cls Loss = 0.1895, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1945, Reg Loss = 2.4715, Reconstruct Loss = 0.0021, Cls Loss = 0.1922, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1943, Reg Loss = 2.5088, Reconstruct Loss = 0.0020, Cls Loss = 0.1921, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1921, Reg Loss = 2.5408, Reconstruct Loss = 0.0016, Cls Loss = 0.1903, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1954, Reg Loss = 2.5301, Reconstruct Loss = 0.0016, Cls Loss = 0.1935, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1978, Reg Loss = 2.5517, Reconstruct Loss = 0.0018, Cls

100%|██████████| 157/157 [00:04<00:00, 32.56it/s]


Epoch [5/50], Validation Loss: 1.3653, Validation Accuracy: 63.46%



Iteration 0: Loss = 0.1789, Reg Loss = 2.7383, Reconstruct Loss = 0.0000, Cls Loss = 0.1786, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1886, Reg Loss = 2.5376, Reconstruct Loss = 0.0024, Cls Loss = 0.1860, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1967, Reg Loss = 2.5253, Reconstruct Loss = 0.0018, Cls Loss = 0.1946, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1962, Reg Loss = 2.4959, Reconstruct Loss = 0.0014, Cls Loss = 0.1946, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1985, Reg Loss = 2.5092, Reconstruct Loss = 0.0017, Cls Loss = 0.1966, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1974, Reg Loss = 2.5013, Reconstruct Loss = 0.0018, Cls Loss = 0.1954, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1965, Reg Loss = 2.5001, Reconstruct Loss = 0.0017, Cls Loss = 0.1945, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1987, Reg Loss = 2.5057, Reconstruct Loss = 0.0017, Cls

100%|██████████| 157/157 [00:03<00:00, 42.56it/s]


Epoch [6/50], Validation Loss: 1.3567, Validation Accuracy: 63.68%



Iteration 0: Loss = 0.2437, Reg Loss = 2.7165, Reconstruct Loss = 0.0000, Cls Loss = 0.2435, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1978, Reg Loss = 2.4632, Reconstruct Loss = 0.0007, Cls Loss = 0.1969, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1918, Reg Loss = 2.4905, Reconstruct Loss = 0.0003, Cls Loss = 0.1912, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1897, Reg Loss = 2.5579, Reconstruct Loss = 0.0008, Cls Loss = 0.1887, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1866, Reg Loss = 2.5571, Reconstruct Loss = 0.0009, Cls Loss = 0.1854, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1894, Reg Loss = 2.5514, Reconstruct Loss = 0.0009, Cls Loss = 0.1883, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1882, Reg Loss = 2.5713, Reconstruct Loss = 0.0011, Cls Loss = 0.1869, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1893, Reg Loss = 2.5755, Reconstruct Loss = 0.0012, Cls

100%|██████████| 157/157 [00:04<00:00, 33.10it/s]


Epoch [7/50], Validation Loss: 1.3537, Validation Accuracy: 63.85%



Iteration 0: Loss = 0.1101, Reg Loss = 2.4402, Reconstruct Loss = 0.0000, Cls Loss = 0.1099, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1924, Reg Loss = 2.5457, Reconstruct Loss = 0.0008, Cls Loss = 0.1914, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1862, Reg Loss = 2.4781, Reconstruct Loss = 0.0011, Cls Loss = 0.1849, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1862, Reg Loss = 2.4799, Reconstruct Loss = 0.0016, Cls Loss = 0.1844, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1861, Reg Loss = 2.4645, Reconstruct Loss = 0.0014, Cls Loss = 0.1844, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1852, Reg Loss = 2.4283, Reconstruct Loss = 0.0014, Cls Loss = 0.1835, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1845, Reg Loss = 2.4108, Reconstruct Loss = 0.0014, Cls Loss = 0.1829, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1844, Reg Loss = 2.3951, Reconstruct Loss = 0.0015, Cls

100%|██████████| 157/157 [00:03<00:00, 43.71it/s]


Epoch [8/50], Validation Loss: 1.3268, Validation Accuracy: 64.91%



Iteration 0: Loss = 0.1232, Reg Loss = 2.0560, Reconstruct Loss = 0.0000, Cls Loss = 0.1230, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.2006, Reg Loss = 2.3374, Reconstruct Loss = 0.0024, Cls Loss = 0.1980, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1941, Reg Loss = 2.3146, Reconstruct Loss = 0.0019, Cls Loss = 0.1921, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1884, Reg Loss = 2.2995, Reconstruct Loss = 0.0014, Cls Loss = 0.1867, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1846, Reg Loss = 2.3066, Reconstruct Loss = 0.0014, Cls Loss = 0.1829, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1816, Reg Loss = 2.3128, Reconstruct Loss = 0.0015, Cls Loss = 0.1799, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1799, Reg Loss = 2.3101, Reconstruct Loss = 0.0012, Cls Loss = 0.1784, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1799, Reg Loss = 2.3257, Reconstruct Loss = 0.0015, Cls

100%|██████████| 157/157 [00:03<00:00, 42.71it/s]


Epoch [9/50], Validation Loss: 1.3313, Validation Accuracy: 64.71%



Iteration 0: Loss = 0.1681, Reg Loss = 2.7788, Reconstruct Loss = 0.0254, Cls Loss = 0.1424, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1619, Reg Loss = 2.2763, Reconstruct Loss = 0.0010, Cls Loss = 0.1606, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1682, Reg Loss = 2.2634, Reconstruct Loss = 0.0010, Cls Loss = 0.1669, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1685, Reg Loss = 2.2755, Reconstruct Loss = 0.0009, Cls Loss = 0.1674, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1741, Reg Loss = 2.3168, Reconstruct Loss = 0.0009, Cls Loss = 0.1730, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1747, Reg Loss = 2.3279, Reconstruct Loss = 0.0009, Cls Loss = 0.1736, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1743, Reg Loss = 2.3235, Reconstruct Loss = 0.0012, Cls Loss = 0.1729, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1743, Reg Loss = 2.3462, Reconstruct Loss = 0.0013, Cls

100%|██████████| 157/157 [00:04<00:00, 31.80it/s]


Epoch [10/50], Validation Loss: 1.3253, Validation Accuracy: 64.98%



Iteration 0: Loss = 0.1121, Reg Loss = 2.4986, Reconstruct Loss = 0.0000, Cls Loss = 0.1119, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1666, Reg Loss = 2.4735, Reconstruct Loss = 0.0012, Cls Loss = 0.1652, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1706, Reg Loss = 2.4697, Reconstruct Loss = 0.0006, Cls Loss = 0.1698, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1748, Reg Loss = 2.4693, Reconstruct Loss = 0.0006, Cls Loss = 0.1739, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1719, Reg Loss = 2.4962, Reconstruct Loss = 0.0008, Cls Loss = 0.1709, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1703, Reg Loss = 2.4962, Reconstruct Loss = 0.0012, Cls Loss = 0.1688, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1704, Reg Loss = 2.4916, Reconstruct Loss = 0.0010, Cls Loss = 0.1692, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1708, Reg Loss = 2.4841, Reconstruct Loss = 0.0012, Cl

100%|██████████| 157/157 [00:04<00:00, 32.33it/s]


Epoch [11/50], Validation Loss: 1.3383, Validation Accuracy: 64.45%



Iteration 0: Loss = 0.2058, Reg Loss = 2.8837, Reconstruct Loss = 0.0000, Cls Loss = 0.2055, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1727, Reg Loss = 2.5450, Reconstruct Loss = 0.0041, Cls Loss = 0.1684, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1650, Reg Loss = 2.4737, Reconstruct Loss = 0.0029, Cls Loss = 0.1618, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1643, Reg Loss = 2.4342, Reconstruct Loss = 0.0024, Cls Loss = 0.1616, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1621, Reg Loss = 2.4365, Reconstruct Loss = 0.0018, Cls Loss = 0.1601, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1620, Reg Loss = 2.4313, Reconstruct Loss = 0.0019, Cls Loss = 0.1599, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1629, Reg Loss = 2.4414, Reconstruct Loss = 0.0018, Cls Loss = 0.1609, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1645, Reg Loss = 2.4319, Reconstruct Loss = 0.0017, Cl

100%|██████████| 157/157 [00:04<00:00, 32.62it/s]


Epoch [12/50], Validation Loss: 1.3741, Validation Accuracy: 63.35%



Iteration 0: Loss = 0.1744, Reg Loss = 2.6028, Reconstruct Loss = 0.0000, Cls Loss = 0.1741, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1739, Reg Loss = 2.5154, Reconstruct Loss = 0.0012, Cls Loss = 0.1724, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1709, Reg Loss = 2.4284, Reconstruct Loss = 0.0012, Cls Loss = 0.1694, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1725, Reg Loss = 2.4030, Reconstruct Loss = 0.0013, Cls Loss = 0.1709, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1698, Reg Loss = 2.4019, Reconstruct Loss = 0.0016, Cls Loss = 0.1679, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1684, Reg Loss = 2.3972, Reconstruct Loss = 0.0017, Cls Loss = 0.1665, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1698, Reg Loss = 2.3870, Reconstruct Loss = 0.0016, Cls Loss = 0.1680, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1694, Reg Loss = 2.3899, Reconstruct Loss = 0.0016, Cl

100%|██████████| 157/157 [00:04<00:00, 31.92it/s]


Epoch [13/50], Validation Loss: 1.3296, Validation Accuracy: 64.86%



Iteration 0: Loss = 0.1058, Reg Loss = 2.4024, Reconstruct Loss = 0.0000, Cls Loss = 0.1056, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1579, Reg Loss = 2.4608, Reconstruct Loss = 0.0016, Cls Loss = 0.1561, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1603, Reg Loss = 2.4852, Reconstruct Loss = 0.0011, Cls Loss = 0.1589, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1569, Reg Loss = 2.4799, Reconstruct Loss = 0.0007, Cls Loss = 0.1559, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1603, Reg Loss = 2.4474, Reconstruct Loss = 0.0007, Cls Loss = 0.1594, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1606, Reg Loss = 2.4490, Reconstruct Loss = 0.0007, Cls Loss = 0.1597, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1608, Reg Loss = 2.4384, Reconstruct Loss = 0.0007, Cls Loss = 0.1598, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1630, Reg Loss = 2.4303, Reconstruct Loss = 0.0009, Cl

100%|██████████| 157/157 [00:04<00:00, 31.57it/s]


Epoch [14/50], Validation Loss: 1.3356, Validation Accuracy: 64.57%



Iteration 0: Loss = 0.2097, Reg Loss = 2.2355, Reconstruct Loss = 0.0000, Cls Loss = 0.2095, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1573, Reg Loss = 2.3685, Reconstruct Loss = 0.0014, Cls Loss = 0.1556, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1640, Reg Loss = 2.4345, Reconstruct Loss = 0.0010, Cls Loss = 0.1628, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1608, Reg Loss = 2.4345, Reconstruct Loss = 0.0008, Cls Loss = 0.1597, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1609, Reg Loss = 2.4616, Reconstruct Loss = 0.0009, Cls Loss = 0.1598, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1603, Reg Loss = 2.4746, Reconstruct Loss = 0.0010, Cls Loss = 0.1590, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1597, Reg Loss = 2.4663, Reconstruct Loss = 0.0009, Cls Loss = 0.1586, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1592, Reg Loss = 2.4864, Reconstruct Loss = 0.0010, Cl

100%|██████████| 157/157 [00:04<00:00, 32.89it/s]


Epoch [15/50], Validation Loss: 1.3128, Validation Accuracy: 66.17%



Iteration 0: Loss = 0.1069, Reg Loss = 2.3238, Reconstruct Loss = 0.0000, Cls Loss = 0.1067, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1545, Reg Loss = 2.3468, Reconstruct Loss = 0.0005, Cls Loss = 0.1538, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1564, Reg Loss = 2.3533, Reconstruct Loss = 0.0008, Cls Loss = 0.1554, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1571, Reg Loss = 2.3901, Reconstruct Loss = 0.0007, Cls Loss = 0.1562, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1574, Reg Loss = 2.3928, Reconstruct Loss = 0.0011, Cls Loss = 0.1560, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1586, Reg Loss = 2.3736, Reconstruct Loss = 0.0014, Cls Loss = 0.1570, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1581, Reg Loss = 2.3698, Reconstruct Loss = 0.0012, Cls Loss = 0.1566, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1572, Reg Loss = 2.3566, Reconstruct Loss = 0.0010, Cl

100%|██████████| 157/157 [00:03<00:00, 42.72it/s]


Epoch [16/50], Validation Loss: 1.3269, Validation Accuracy: 64.99%



Iteration 0: Loss = 0.0852, Reg Loss = 2.0322, Reconstruct Loss = 0.0000, Cls Loss = 0.0850, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1548, Reg Loss = 2.2926, Reconstruct Loss = 0.0003, Cls Loss = 0.1542, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1618, Reg Loss = 2.2907, Reconstruct Loss = 0.0009, Cls Loss = 0.1607, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1591, Reg Loss = 2.3264, Reconstruct Loss = 0.0014, Cls Loss = 0.1575, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1582, Reg Loss = 2.3255, Reconstruct Loss = 0.0012, Cls Loss = 0.1568, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1588, Reg Loss = 2.3154, Reconstruct Loss = 0.0010, Cls Loss = 0.1576, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1596, Reg Loss = 2.3124, Reconstruct Loss = 0.0010, Cls Loss = 0.1584, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1594, Reg Loss = 2.3009, Reconstruct Loss = 0.0010, Cl

100%|██████████| 157/157 [00:04<00:00, 33.85it/s]


Epoch [17/50], Validation Loss: 1.3128, Validation Accuracy: 66.18%



Iteration 0: Loss = 0.1363, Reg Loss = 2.2980, Reconstruct Loss = 0.0000, Cls Loss = 0.1361, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1592, Reg Loss = 2.3245, Reconstruct Loss = 0.0011, Cls Loss = 0.1579, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1605, Reg Loss = 2.3090, Reconstruct Loss = 0.0008, Cls Loss = 0.1595, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1608, Reg Loss = 2.3012, Reconstruct Loss = 0.0011, Cls Loss = 0.1595, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1642, Reg Loss = 2.2737, Reconstruct Loss = 0.0012, Cls Loss = 0.1628, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1638, Reg Loss = 2.2632, Reconstruct Loss = 0.0010, Cls Loss = 0.1626, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1616, Reg Loss = 2.2656, Reconstruct Loss = 0.0010, Cls Loss = 0.1604, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1604, Reg Loss = 2.2675, Reconstruct Loss = 0.0010, Cl

100%|██████████| 157/157 [00:04<00:00, 33.08it/s]


Epoch [18/50], Validation Loss: 1.3257, Validation Accuracy: 65.08%



Iteration 0: Loss = 0.1598, Reg Loss = 2.3052, Reconstruct Loss = 0.0000, Cls Loss = 0.1596, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1494, Reg Loss = 2.3153, Reconstruct Loss = 0.0011, Cls Loss = 0.1480, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1532, Reg Loss = 2.3210, Reconstruct Loss = 0.0013, Cls Loss = 0.1516, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1542, Reg Loss = 2.3033, Reconstruct Loss = 0.0017, Cls Loss = 0.1523, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1538, Reg Loss = 2.2811, Reconstruct Loss = 0.0015, Cls Loss = 0.1521, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1550, Reg Loss = 2.2776, Reconstruct Loss = 0.0013, Cls Loss = 0.1535, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1564, Reg Loss = 2.2671, Reconstruct Loss = 0.0014, Cls Loss = 0.1548, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1564, Reg Loss = 2.2738, Reconstruct Loss = 0.0012, Cl

100%|██████████| 157/157 [00:04<00:00, 32.39it/s]


Epoch [19/50], Validation Loss: 1.3145, Validation Accuracy: 66.11%



Iteration 0: Loss = 0.2788, Reg Loss = 2.1842, Reconstruct Loss = 0.0000, Cls Loss = 0.2785, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1540, Reg Loss = 2.2097, Reconstruct Loss = 0.0011, Cls Loss = 0.1527, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1505, Reg Loss = 2.2225, Reconstruct Loss = 0.0014, Cls Loss = 0.1489, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1525, Reg Loss = 2.2350, Reconstruct Loss = 0.0014, Cls Loss = 0.1509, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1535, Reg Loss = 2.2547, Reconstruct Loss = 0.0011, Cls Loss = 0.1523, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1526, Reg Loss = 2.2502, Reconstruct Loss = 0.0008, Cls Loss = 0.1515, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1514, Reg Loss = 2.2513, Reconstruct Loss = 0.0010, Cls Loss = 0.1502, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1518, Reg Loss = 2.2478, Reconstruct Loss = 0.0010, Cl

100%|██████████| 157/157 [00:04<00:00, 32.92it/s]


Epoch [20/50], Validation Loss: 1.3138, Validation Accuracy: 66.24%



Iteration 0: Loss = 0.0951, Reg Loss = 2.1642, Reconstruct Loss = 0.0000, Cls Loss = 0.0948, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1461, Reg Loss = 2.1674, Reconstruct Loss = 0.0000, Cls Loss = 0.1459, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1454, Reg Loss = 2.1299, Reconstruct Loss = 0.0009, Cls Loss = 0.1443, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1477, Reg Loss = 2.1261, Reconstruct Loss = 0.0009, Cls Loss = 0.1465, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1520, Reg Loss = 2.1435, Reconstruct Loss = 0.0010, Cls Loss = 0.1508, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1495, Reg Loss = 2.1618, Reconstruct Loss = 0.0009, Cls Loss = 0.1484, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1509, Reg Loss = 2.1713, Reconstruct Loss = 0.0012, Cls Loss = 0.1495, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1508, Reg Loss = 2.1815, Reconstruct Loss = 0.0013, Cl

100%|██████████| 157/157 [00:04<00:00, 33.18it/s]


Epoch [21/50], Validation Loss: 1.3138, Validation Accuracy: 66.38%



Iteration 0: Loss = 0.1455, Reg Loss = 2.0939, Reconstruct Loss = 0.0000, Cls Loss = 0.1453, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1499, Reg Loss = 2.1663, Reconstruct Loss = 0.0015, Cls Loss = 0.1482, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1513, Reg Loss = 2.1856, Reconstruct Loss = 0.0013, Cls Loss = 0.1498, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1565, Reg Loss = 2.1770, Reconstruct Loss = 0.0016, Cls Loss = 0.1547, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1592, Reg Loss = 2.2003, Reconstruct Loss = 0.0015, Cls Loss = 0.1575, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1584, Reg Loss = 2.2166, Reconstruct Loss = 0.0014, Cls Loss = 0.1568, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1576, Reg Loss = 2.2161, Reconstruct Loss = 0.0015, Cls Loss = 0.1559, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1561, Reg Loss = 2.2103, Reconstruct Loss = 0.0013, Cl

100%|██████████| 157/157 [00:04<00:00, 33.43it/s]


Epoch [22/50], Validation Loss: 1.3167, Validation Accuracy: 66.11%



Iteration 0: Loss = 0.2031, Reg Loss = 2.1388, Reconstruct Loss = 0.0000, Cls Loss = 0.2029, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1582, Reg Loss = 2.2429, Reconstruct Loss = 0.0021, Cls Loss = 0.1559, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1503, Reg Loss = 2.2240, Reconstruct Loss = 0.0022, Cls Loss = 0.1478, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1472, Reg Loss = 2.2225, Reconstruct Loss = 0.0019, Cls Loss = 0.1451, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1501, Reg Loss = 2.2389, Reconstruct Loss = 0.0018, Cls Loss = 0.1480, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1501, Reg Loss = 2.2396, Reconstruct Loss = 0.0015, Cls Loss = 0.1483, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1501, Reg Loss = 2.2455, Reconstruct Loss = 0.0013, Cls Loss = 0.1486, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1512, Reg Loss = 2.2464, Reconstruct Loss = 0.0012, Cl

100%|██████████| 157/157 [00:04<00:00, 33.95it/s]


Epoch [23/50], Validation Loss: 1.3142, Validation Accuracy: 66.38%



Iteration 0: Loss = 0.1177, Reg Loss = 2.2130, Reconstruct Loss = 0.0000, Cls Loss = 0.1175, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1478, Reg Loss = 2.2834, Reconstruct Loss = 0.0017, Cls Loss = 0.1458, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1535, Reg Loss = 2.2913, Reconstruct Loss = 0.0022, Cls Loss = 0.1511, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1557, Reg Loss = 2.3178, Reconstruct Loss = 0.0022, Cls Loss = 0.1533, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1529, Reg Loss = 2.3276, Reconstruct Loss = 0.0019, Cls Loss = 0.1507, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1539, Reg Loss = 2.3214, Reconstruct Loss = 0.0019, Cls Loss = 0.1517, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1536, Reg Loss = 2.3228, Reconstruct Loss = 0.0018, Cls Loss = 0.1516, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1516, Reg Loss = 2.3147, Reconstruct Loss = 0.0017, Cl

100%|██████████| 157/157 [00:04<00:00, 33.13it/s]


Epoch [24/50], Validation Loss: 1.3144, Validation Accuracy: 66.49%



Iteration 0: Loss = 0.1291, Reg Loss = 2.0426, Reconstruct Loss = 0.0000, Cls Loss = 0.1289, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1596, Reg Loss = 2.2993, Reconstruct Loss = 0.0019, Cls Loss = 0.1575, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1602, Reg Loss = 2.3473, Reconstruct Loss = 0.0015, Cls Loss = 0.1585, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1553, Reg Loss = 2.3433, Reconstruct Loss = 0.0013, Cls Loss = 0.1537, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1527, Reg Loss = 2.3595, Reconstruct Loss = 0.0017, Cls Loss = 0.1507, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1529, Reg Loss = 2.3573, Reconstruct Loss = 0.0016, Cls Loss = 0.1511, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1513, Reg Loss = 2.3574, Reconstruct Loss = 0.0013, Cls Loss = 0.1498, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1514, Reg Loss = 2.3487, Reconstruct Loss = 0.0011, Cl

100%|██████████| 157/157 [00:04<00:00, 33.45it/s]


Epoch [25/50], Validation Loss: 1.3186, Validation Accuracy: 65.82%



Iteration 0: Loss = 0.2413, Reg Loss = 2.1496, Reconstruct Loss = 0.0000, Cls Loss = 0.2411, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1597, Reg Loss = 2.3674, Reconstruct Loss = 0.0010, Cls Loss = 0.1584, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1530, Reg Loss = 2.3682, Reconstruct Loss = 0.0005, Cls Loss = 0.1523, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1510, Reg Loss = 2.3715, Reconstruct Loss = 0.0007, Cls Loss = 0.1500, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1498, Reg Loss = 2.3798, Reconstruct Loss = 0.0009, Cls Loss = 0.1487, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1510, Reg Loss = 2.3739, Reconstruct Loss = 0.0008, Cls Loss = 0.1500, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1515, Reg Loss = 2.3653, Reconstruct Loss = 0.0007, Cls Loss = 0.1506, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1515, Reg Loss = 2.3592, Reconstruct Loss = 0.0007, Cl

100%|██████████| 157/157 [00:04<00:00, 33.34it/s]


Epoch [26/50], Validation Loss: 1.3124, Validation Accuracy: 66.80%



Checkpoint saved at epoch 25 with accuracy: 66.80%
Iteration 0: Loss = 0.0728, Reg Loss = 2.3965, Reconstruct Loss = 0.0000, Cls Loss = 0.0725, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1491, Reg Loss = 2.4323, Reconstruct Loss = 0.0018, Cls Loss = 0.1471, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1502, Reg Loss = 2.4244, Reconstruct Loss = 0.0019, Cls Loss = 0.1481, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1497, Reg Loss = 2.4168, Reconstruct Loss = 0.0015, Cls Loss = 0.1479, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1529, Reg Loss = 2.4097, Reconstruct Loss = 0.0014, Cls Loss = 0.1513, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1527, Reg Loss = 2.4005, Reconstruct Loss = 0.0012, Cls Loss = 0.1513, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1520, Reg Loss = 2.3981, Reconstruct Loss = 0.0011, Cls Loss = 0.1507, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.150

100%|██████████| 157/157 [00:04<00:00, 31.90it/s]


Epoch [27/50], Validation Loss: 1.3127, Validation Accuracy: 66.89%



Checkpoint saved at epoch 26 with accuracy: 66.89%
Iteration 0: Loss = 0.1359, Reg Loss = 2.5142, Reconstruct Loss = 0.0000, Cls Loss = 0.1356, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1563, Reg Loss = 2.4055, Reconstruct Loss = 0.0005, Cls Loss = 0.1555, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1507, Reg Loss = 2.4103, Reconstruct Loss = 0.0007, Cls Loss = 0.1497, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1476, Reg Loss = 2.3970, Reconstruct Loss = 0.0006, Cls Loss = 0.1468, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1495, Reg Loss = 2.3952, Reconstruct Loss = 0.0007, Cls Loss = 0.1486, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1475, Reg Loss = 2.3892, Reconstruct Loss = 0.0007, Cls Loss = 0.1466, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1458, Reg Loss = 2.3846, Reconstruct Loss = 0.0006, Cls Loss = 0.1450, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.145

100%|██████████| 157/157 [00:04<00:00, 32.33it/s]


Epoch [28/50], Validation Loss: 1.3166, Validation Accuracy: 66.58%



Iteration 0: Loss = 0.1000, Reg Loss = 2.3418, Reconstruct Loss = 0.0000, Cls Loss = 0.0997, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1392, Reg Loss = 2.3259, Reconstruct Loss = 0.0006, Cls Loss = 0.1384, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1383, Reg Loss = 2.3388, Reconstruct Loss = 0.0005, Cls Loss = 0.1375, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1383, Reg Loss = 2.3425, Reconstruct Loss = 0.0009, Cls Loss = 0.1372, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1383, Reg Loss = 2.3532, Reconstruct Loss = 0.0008, Cls Loss = 0.1373, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1408, Reg Loss = 2.3650, Reconstruct Loss = 0.0009, Cls Loss = 0.1396, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1416, Reg Loss = 2.3764, Reconstruct Loss = 0.0014, Cls Loss = 0.1400, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1417, Reg Loss = 2.3825, Reconstruct Loss = 0.0017, Cl

100%|██████████| 157/157 [00:04<00:00, 32.60it/s]


Epoch [29/50], Validation Loss: 1.3205, Validation Accuracy: 67.38%



Checkpoint saved at epoch 28 with accuracy: 67.38%
Iteration 0: Loss = 0.1138, Reg Loss = 2.3219, Reconstruct Loss = 0.0000, Cls Loss = 0.1135, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1417, Reg Loss = 2.4714, Reconstruct Loss = 0.0010, Cls Loss = 0.1404, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1393, Reg Loss = 2.4472, Reconstruct Loss = 0.0010, Cls Loss = 0.1381, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1432, Reg Loss = 2.4557, Reconstruct Loss = 0.0010, Cls Loss = 0.1420, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1419, Reg Loss = 2.4452, Reconstruct Loss = 0.0010, Cls Loss = 0.1407, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1414, Reg Loss = 2.4549, Reconstruct Loss = 0.0012, Cls Loss = 0.1399, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1417, Reg Loss = 2.4652, Reconstruct Loss = 0.0013, Cls Loss = 0.1401, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.139

100%|██████████| 157/157 [00:04<00:00, 32.06it/s]


Epoch [30/50], Validation Loss: 1.3278, Validation Accuracy: 67.74%



Checkpoint saved at epoch 29 with accuracy: 67.74%
Iteration 0: Loss = 0.0943, Reg Loss = 2.4015, Reconstruct Loss = 0.0000, Cls Loss = 0.0941, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1277, Reg Loss = 2.6175, Reconstruct Loss = 0.0022, Cls Loss = 0.1253, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1327, Reg Loss = 2.6194, Reconstruct Loss = 0.0023, Cls Loss = 0.1302, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1345, Reg Loss = 2.5862, Reconstruct Loss = 0.0019, Cls Loss = 0.1324, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1333, Reg Loss = 2.5951, Reconstruct Loss = 0.0021, Cls Loss = 0.1310, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1343, Reg Loss = 2.5873, Reconstruct Loss = 0.0018, Cls Loss = 0.1322, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1342, Reg Loss = 2.5742, Reconstruct Loss = 0.0018, Cls Loss = 0.1321, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.134

100%|██████████| 157/157 [00:04<00:00, 32.20it/s]


Epoch [31/50], Validation Loss: 1.3218, Validation Accuracy: 67.46%



Iteration 0: Loss = 0.0915, Reg Loss = 2.5034, Reconstruct Loss = 0.0000, Cls Loss = 0.0912, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1287, Reg Loss = 2.5331, Reconstruct Loss = 0.0014, Cls Loss = 0.1271, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1308, Reg Loss = 2.5501, Reconstruct Loss = 0.0016, Cls Loss = 0.1289, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1346, Reg Loss = 2.5301, Reconstruct Loss = 0.0011, Cls Loss = 0.1333, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1333, Reg Loss = 2.5402, Reconstruct Loss = 0.0008, Cls Loss = 0.1322, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1332, Reg Loss = 2.5549, Reconstruct Loss = 0.0008, Cls Loss = 0.1321, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1355, Reg Loss = 2.5645, Reconstruct Loss = 0.0007, Cls Loss = 0.1345, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1362, Reg Loss = 2.5711, Reconstruct Loss = 0.0007, Cl

100%|██████████| 157/157 [00:04<00:00, 33.93it/s]


Epoch [32/50], Validation Loss: 1.3238, Validation Accuracy: 67.54%



Iteration 0: Loss = 0.1064, Reg Loss = 2.4804, Reconstruct Loss = 0.0000, Cls Loss = 0.1062, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1375, Reg Loss = 2.5793, Reconstruct Loss = 0.0004, Cls Loss = 0.1368, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1405, Reg Loss = 2.5790, Reconstruct Loss = 0.0008, Cls Loss = 0.1395, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1377, Reg Loss = 2.5697, Reconstruct Loss = 0.0007, Cls Loss = 0.1368, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1355, Reg Loss = 2.5625, Reconstruct Loss = 0.0005, Cls Loss = 0.1348, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1331, Reg Loss = 2.5733, Reconstruct Loss = 0.0007, Cls Loss = 0.1322, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1325, Reg Loss = 2.5727, Reconstruct Loss = 0.0007, Cls Loss = 0.1315, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1325, Reg Loss = 2.5801, Reconstruct Loss = 0.0007, Cl

100%|██████████| 157/157 [00:04<00:00, 34.08it/s]


Epoch [33/50], Validation Loss: 1.3259, Validation Accuracy: 67.40%



Iteration 0: Loss = 0.1318, Reg Loss = 2.6867, Reconstruct Loss = 0.0000, Cls Loss = 0.1315, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1420, Reg Loss = 2.7532, Reconstruct Loss = 0.0026, Cls Loss = 0.1392, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1325, Reg Loss = 2.6716, Reconstruct Loss = 0.0013, Cls Loss = 0.1309, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1338, Reg Loss = 2.6463, Reconstruct Loss = 0.0010, Cls Loss = 0.1326, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1323, Reg Loss = 2.6447, Reconstruct Loss = 0.0009, Cls Loss = 0.1310, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1315, Reg Loss = 2.6246, Reconstruct Loss = 0.0008, Cls Loss = 0.1305, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1313, Reg Loss = 2.6329, Reconstruct Loss = 0.0009, Cls Loss = 0.1302, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1304, Reg Loss = 2.6338, Reconstruct Loss = 0.0009, Cl

100%|██████████| 157/157 [00:03<00:00, 43.30it/s]


Epoch [34/50], Validation Loss: 1.3320, Validation Accuracy: 67.55%



Iteration 0: Loss = 0.1015, Reg Loss = 2.6886, Reconstruct Loss = 0.0000, Cls Loss = 0.1013, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1330, Reg Loss = 2.5959, Reconstruct Loss = 0.0005, Cls Loss = 0.1322, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1316, Reg Loss = 2.6428, Reconstruct Loss = 0.0009, Cls Loss = 0.1305, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1323, Reg Loss = 2.6857, Reconstruct Loss = 0.0010, Cls Loss = 0.1310, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1323, Reg Loss = 2.6981, Reconstruct Loss = 0.0010, Cls Loss = 0.1310, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1293, Reg Loss = 2.7126, Reconstruct Loss = 0.0010, Cls Loss = 0.1281, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1293, Reg Loss = 2.7202, Reconstruct Loss = 0.0009, Cls Loss = 0.1280, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1305, Reg Loss = 2.7233, Reconstruct Loss = 0.0009, Cl

100%|██████████| 157/157 [00:03<00:00, 43.34it/s]


Epoch [35/50], Validation Loss: 1.3300, Validation Accuracy: 67.54%



Iteration 0: Loss = 0.1471, Reg Loss = 2.4956, Reconstruct Loss = 0.0000, Cls Loss = 0.1469, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1256, Reg Loss = 2.7253, Reconstruct Loss = 0.0004, Cls Loss = 0.1250, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1289, Reg Loss = 2.7196, Reconstruct Loss = 0.0004, Cls Loss = 0.1283, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1289, Reg Loss = 2.7297, Reconstruct Loss = 0.0006, Cls Loss = 0.1280, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1300, Reg Loss = 2.7150, Reconstruct Loss = 0.0006, Cls Loss = 0.1292, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1292, Reg Loss = 2.7270, Reconstruct Loss = 0.0007, Cls Loss = 0.1282, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1292, Reg Loss = 2.7211, Reconstruct Loss = 0.0007, Cls Loss = 0.1282, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1294, Reg Loss = 2.7192, Reconstruct Loss = 0.0008, Cl

100%|██████████| 157/157 [00:03<00:00, 43.24it/s]


Epoch [36/50], Validation Loss: 1.3316, Validation Accuracy: 67.55%



Iteration 0: Loss = 0.1894, Reg Loss = 2.5890, Reconstruct Loss = 0.0000, Cls Loss = 0.1892, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1277, Reg Loss = 2.5865, Reconstruct Loss = 0.0004, Cls Loss = 0.1270, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1290, Reg Loss = 2.6426, Reconstruct Loss = 0.0008, Cls Loss = 0.1279, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1273, Reg Loss = 2.6736, Reconstruct Loss = 0.0010, Cls Loss = 0.1260, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1278, Reg Loss = 2.6836, Reconstruct Loss = 0.0009, Cls Loss = 0.1267, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1275, Reg Loss = 2.6858, Reconstruct Loss = 0.0009, Cls Loss = 0.1263, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1260, Reg Loss = 2.6759, Reconstruct Loss = 0.0008, Cls Loss = 0.1249, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1269, Reg Loss = 2.6765, Reconstruct Loss = 0.0007, Cl

100%|██████████| 157/157 [00:03<00:00, 43.65it/s]


Epoch [37/50], Validation Loss: 1.3336, Validation Accuracy: 67.50%



Iteration 0: Loss = 0.0786, Reg Loss = 2.8001, Reconstruct Loss = 0.0000, Cls Loss = 0.0783, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1186, Reg Loss = 2.7885, Reconstruct Loss = 0.0008, Cls Loss = 0.1175, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1229, Reg Loss = 2.8183, Reconstruct Loss = 0.0008, Cls Loss = 0.1218, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1225, Reg Loss = 2.8118, Reconstruct Loss = 0.0007, Cls Loss = 0.1215, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1224, Reg Loss = 2.8060, Reconstruct Loss = 0.0008, Cls Loss = 0.1213, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1241, Reg Loss = 2.8024, Reconstruct Loss = 0.0009, Cls Loss = 0.1229, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1253, Reg Loss = 2.7948, Reconstruct Loss = 0.0009, Cls Loss = 0.1241, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1256, Reg Loss = 2.7915, Reconstruct Loss = 0.0009, Cl

100%|██████████| 157/157 [00:03<00:00, 43.34it/s]


Epoch [38/50], Validation Loss: 1.3229, Validation Accuracy: 67.46%



Iteration 0: Loss = 0.1004, Reg Loss = 2.7653, Reconstruct Loss = 0.0000, Cls Loss = 0.1002, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1259, Reg Loss = 2.7792, Reconstruct Loss = 0.0004, Cls Loss = 0.1253, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1260, Reg Loss = 2.8082, Reconstruct Loss = 0.0004, Cls Loss = 0.1253, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1273, Reg Loss = 2.8296, Reconstruct Loss = 0.0006, Cls Loss = 0.1263, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1279, Reg Loss = 2.8601, Reconstruct Loss = 0.0012, Cls Loss = 0.1264, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1285, Reg Loss = 2.8348, Reconstruct Loss = 0.0010, Cls Loss = 0.1272, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1273, Reg Loss = 2.8284, Reconstruct Loss = 0.0011, Cls Loss = 0.1259, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1268, Reg Loss = 2.8204, Reconstruct Loss = 0.0010, Cl

100%|██████████| 157/157 [00:03<00:00, 43.50it/s]


Epoch [39/50], Validation Loss: 1.3308, Validation Accuracy: 67.54%



Iteration 0: Loss = 0.1233, Reg Loss = 2.7266, Reconstruct Loss = 0.0000, Cls Loss = 0.1230, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1246, Reg Loss = 2.7940, Reconstruct Loss = 0.0008, Cls Loss = 0.1236, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1198, Reg Loss = 2.7230, Reconstruct Loss = 0.0004, Cls Loss = 0.1191, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1180, Reg Loss = 2.7309, Reconstruct Loss = 0.0006, Cls Loss = 0.1171, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1181, Reg Loss = 2.7302, Reconstruct Loss = 0.0006, Cls Loss = 0.1172, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1177, Reg Loss = 2.7502, Reconstruct Loss = 0.0007, Cls Loss = 0.1167, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1196, Reg Loss = 2.7495, Reconstruct Loss = 0.0007, Cls Loss = 0.1186, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1204, Reg Loss = 2.7381, Reconstruct Loss = 0.0006, Cl

100%|██████████| 157/157 [00:03<00:00, 43.58it/s]


Epoch [40/50], Validation Loss: 1.3417, Validation Accuracy: 67.64%



Iteration 0: Loss = 0.1336, Reg Loss = 2.6300, Reconstruct Loss = 0.0000, Cls Loss = 0.1334, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1124, Reg Loss = 2.8734, Reconstruct Loss = 0.0016, Cls Loss = 0.1105, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1214, Reg Loss = 2.8038, Reconstruct Loss = 0.0012, Cls Loss = 0.1199, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1229, Reg Loss = 2.7884, Reconstruct Loss = 0.0010, Cls Loss = 0.1216, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1202, Reg Loss = 2.7733, Reconstruct Loss = 0.0009, Cls Loss = 0.1190, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1207, Reg Loss = 2.7628, Reconstruct Loss = 0.0009, Cls Loss = 0.1195, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1188, Reg Loss = 2.7636, Reconstruct Loss = 0.0009, Cls Loss = 0.1176, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1194, Reg Loss = 2.7557, Reconstruct Loss = 0.0009, Cl

100%|██████████| 157/157 [00:03<00:00, 43.61it/s]


Epoch [41/50], Validation Loss: 1.3527, Validation Accuracy: 68.04%



Checkpoint saved at epoch 40 with accuracy: 68.04%
Iteration 0: Loss = 0.1271, Reg Loss = 2.5931, Reconstruct Loss = 0.0000, Cls Loss = 0.1269, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1245, Reg Loss = 2.8716, Reconstruct Loss = 0.0020, Cls Loss = 0.1222, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1223, Reg Loss = 2.8360, Reconstruct Loss = 0.0015, Cls Loss = 0.1205, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1201, Reg Loss = 2.8051, Reconstruct Loss = 0.0013, Cls Loss = 0.1186, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1215, Reg Loss = 2.8054, Reconstruct Loss = 0.0012, Cls Loss = 0.1201, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1205, Reg Loss = 2.8261, Reconstruct Loss = 0.0013, Cls Loss = 0.1188, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1187, Reg Loss = 2.8350, Reconstruct Loss = 0.0014, Cls Loss = 0.1170, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.118

100%|██████████| 157/157 [00:03<00:00, 43.71it/s]


Epoch [42/50], Validation Loss: 1.3437, Validation Accuracy: 67.77%



Iteration 0: Loss = 0.0663, Reg Loss = 2.6341, Reconstruct Loss = 0.0000, Cls Loss = 0.0660, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1078, Reg Loss = 2.6706, Reconstruct Loss = 0.0004, Cls Loss = 0.1071, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1099, Reg Loss = 2.7193, Reconstruct Loss = 0.0008, Cls Loss = 0.1089, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1160, Reg Loss = 2.7468, Reconstruct Loss = 0.0009, Cls Loss = 0.1148, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1161, Reg Loss = 2.7678, Reconstruct Loss = 0.0010, Cls Loss = 0.1148, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1164, Reg Loss = 2.7537, Reconstruct Loss = 0.0009, Cls Loss = 0.1152, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1149, Reg Loss = 2.7591, Reconstruct Loss = 0.0009, Cls Loss = 0.1138, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1157, Reg Loss = 2.7707, Reconstruct Loss = 0.0009, Cl

100%|██████████| 157/157 [00:03<00:00, 43.68it/s]


Epoch [43/50], Validation Loss: 1.3454, Validation Accuracy: 67.86%



Iteration 0: Loss = 0.2246, Reg Loss = 2.5539, Reconstruct Loss = 0.0000, Cls Loss = 0.2243, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1103, Reg Loss = 2.6385, Reconstruct Loss = 0.0004, Cls Loss = 0.1096, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1121, Reg Loss = 2.6756, Reconstruct Loss = 0.0007, Cls Loss = 0.1112, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1112, Reg Loss = 2.6483, Reconstruct Loss = 0.0006, Cls Loss = 0.1104, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1106, Reg Loss = 2.6449, Reconstruct Loss = 0.0006, Cls Loss = 0.1098, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1119, Reg Loss = 2.6347, Reconstruct Loss = 0.0005, Cls Loss = 0.1112, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1118, Reg Loss = 2.6416, Reconstruct Loss = 0.0005, Cls Loss = 0.1110, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1129, Reg Loss = 2.6511, Reconstruct Loss = 0.0006, Cl

100%|██████████| 157/157 [00:03<00:00, 43.27it/s]


Epoch [44/50], Validation Loss: 1.3319, Validation Accuracy: 67.84%



Iteration 0: Loss = 0.0927, Reg Loss = 2.4805, Reconstruct Loss = 0.0000, Cls Loss = 0.0924, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1048, Reg Loss = 2.5874, Reconstruct Loss = 0.0006, Cls Loss = 0.1040, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1115, Reg Loss = 2.5727, Reconstruct Loss = 0.0006, Cls Loss = 0.1106, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1118, Reg Loss = 2.6055, Reconstruct Loss = 0.0010, Cls Loss = 0.1105, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1086, Reg Loss = 2.5875, Reconstruct Loss = 0.0009, Cls Loss = 0.1074, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1071, Reg Loss = 2.5624, Reconstruct Loss = 0.0007, Cls Loss = 0.1061, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1071, Reg Loss = 2.5548, Reconstruct Loss = 0.0007, Cls Loss = 0.1061, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1058, Reg Loss = 2.5401, Reconstruct Loss = 0.0006, Cl

100%|██████████| 157/157 [00:03<00:00, 43.46it/s]


Epoch [45/50], Validation Loss: 1.3310, Validation Accuracy: 68.00%



Iteration 0: Loss = 0.1328, Reg Loss = 2.4825, Reconstruct Loss = 0.0000, Cls Loss = 0.1325, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1038, Reg Loss = 2.5254, Reconstruct Loss = 0.0010, Cls Loss = 0.1026, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1036, Reg Loss = 2.5456, Reconstruct Loss = 0.0012, Cls Loss = 0.1021, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1059, Reg Loss = 2.5511, Reconstruct Loss = 0.0011, Cls Loss = 0.1046, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1054, Reg Loss = 2.5452, Reconstruct Loss = 0.0010, Cls Loss = 0.1041, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1065, Reg Loss = 2.5259, Reconstruct Loss = 0.0010, Cls Loss = 0.1052, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1065, Reg Loss = 2.5066, Reconstruct Loss = 0.0009, Cls Loss = 0.1053, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1052, Reg Loss = 2.4745, Reconstruct Loss = 0.0008, Cl

100%|██████████| 157/157 [00:03<00:00, 43.47it/s]


Epoch [46/50], Validation Loss: 1.3199, Validation Accuracy: 67.56%



Iteration 0: Loss = 0.0500, Reg Loss = 2.4420, Reconstruct Loss = 0.0000, Cls Loss = 0.0497, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1005, Reg Loss = 2.5539, Reconstruct Loss = 0.0007, Cls Loss = 0.0995, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1024, Reg Loss = 2.5049, Reconstruct Loss = 0.0006, Cls Loss = 0.1016, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1010, Reg Loss = 2.4815, Reconstruct Loss = 0.0006, Cls Loss = 0.1001, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1017, Reg Loss = 2.4702, Reconstruct Loss = 0.0006, Cls Loss = 0.1009, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.1003, Reg Loss = 2.4754, Reconstruct Loss = 0.0007, Cls Loss = 0.0993, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.1002, Reg Loss = 2.4546, Reconstruct Loss = 0.0006, Cls Loss = 0.0993, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1009, Reg Loss = 2.4600, Reconstruct Loss = 0.0006, Cl

100%|██████████| 157/157 [00:04<00:00, 33.35it/s]


Epoch [47/50], Validation Loss: 1.3222, Validation Accuracy: 67.63%



Iteration 0: Loss = 0.0529, Reg Loss = 2.2794, Reconstruct Loss = 0.0000, Cls Loss = 0.0527, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.0996, Reg Loss = 2.3895, Reconstruct Loss = 0.0010, Cls Loss = 0.0984, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1005, Reg Loss = 2.4485, Reconstruct Loss = 0.0012, Cls Loss = 0.0990, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.1040, Reg Loss = 2.4532, Reconstruct Loss = 0.0012, Cls Loss = 0.1025, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.1002, Reg Loss = 2.4523, Reconstruct Loss = 0.0011, Cls Loss = 0.0988, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.0992, Reg Loss = 2.4295, Reconstruct Loss = 0.0010, Cls Loss = 0.0980, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0995, Reg Loss = 2.4215, Reconstruct Loss = 0.0008, Cls Loss = 0.0984, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.1001, Reg Loss = 2.4206, Reconstruct Loss = 0.0008, Cl

100%|██████████| 157/157 [00:04<00:00, 33.01it/s]


Epoch [48/50], Validation Loss: 1.3316, Validation Accuracy: 67.92%



Iteration 0: Loss = 0.1284, Reg Loss = 2.2471, Reconstruct Loss = 0.0000, Cls Loss = 0.1281, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.1014, Reg Loss = 2.4286, Reconstruct Loss = 0.0013, Cls Loss = 0.0998, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.1036, Reg Loss = 2.3494, Reconstruct Loss = 0.0009, Cls Loss = 0.1024, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.0988, Reg Loss = 2.3315, Reconstruct Loss = 0.0008, Cls Loss = 0.0978, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0971, Reg Loss = 2.3496, Reconstruct Loss = 0.0009, Cls Loss = 0.0960, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.0983, Reg Loss = 2.3570, Reconstruct Loss = 0.0009, Cls Loss = 0.0972, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0991, Reg Loss = 2.3503, Reconstruct Loss = 0.0009, Cls Loss = 0.0980, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.0977, Reg Loss = 2.3432, Reconstruct Loss = 0.0008, Cl

100%|██████████| 157/157 [00:03<00:00, 43.87it/s]


Epoch [49/50], Validation Loss: 1.3357, Validation Accuracy: 68.01%



Iteration 0: Loss = 0.1568, Reg Loss = 2.1524, Reconstruct Loss = 0.0000, Cls Loss = 0.1566, Learning rate = 1.0000e-04
Iteration 50: Loss = 0.0885, Reg Loss = 2.2040, Reconstruct Loss = 0.0003, Cls Loss = 0.0880, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0926, Reg Loss = 2.1866, Reconstruct Loss = 0.0004, Cls Loss = 0.0920, Learning rate = 1.0000e-04
Iteration 150: Loss = 0.0907, Reg Loss = 2.1892, Reconstruct Loss = 0.0004, Cls Loss = 0.0900, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0924, Reg Loss = 2.2087, Reconstruct Loss = 0.0006, Cls Loss = 0.0916, Learning rate = 1.0000e-04
Iteration 250: Loss = 0.0944, Reg Loss = 2.2331, Reconstruct Loss = 0.0008, Cls Loss = 0.0934, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0938, Reg Loss = 2.2265, Reconstruct Loss = 0.0008, Cls Loss = 0.0928, Learning rate = 1.0000e-04
Iteration 350: Loss = 0.0945, Reg Loss = 2.2210, Reconstruct Loss = 0.0008, Cl

100%|██████████| 157/157 [00:03<00:00, 43.54it/s]

Epoch [50/50], Validation Loss: 1.3372, Validation Accuracy: 67.94%








In [39]:
wandb.finish()

0,1
Cls Loss,█▇█▇▇▆▆▅▅▅▅▅▅▅▅▅▅▄▄▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▁▂▁▁▁
Learning rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss,▆▆▆▆▆▅▅▅▄▄▄▄▄▃▄█▃▃▅▃▂▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁
Reconstruct Loss,█▇▇█▆▅▆▆▅▆▅▃▂▄▄▅▄▄▅▆▅▅▄▆▁▆▅▄▂▄▄▄▃▅▄▆▅▃▄▄
Reg Loss,▁▁▁▂▂▅▆▆▆▇▆█▅▅▆▆▆▆▅▅▆▅▆▅▆▆▇▇▇██████▇▇▇▆▆
Training accuracy,▁▂▂▂▃▃▃▄▄▅▅▅▅▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇████
Validation Accuracy,▆▆▃▁▁▂▃▃▃▃▃▃▅▃▅▅▅▅▆▆▆▆▆▇█▇▇▇▇▇▇▇▇████▇▇█
Validation Loss,▁▂▃█▇▃▃▂▄█▄▁▃▁▃▁▁▁▁▁▁▁▁▃▂▃▃▃▃▃▃▄▆▅▅▃▂▂▃▄

0,1
Cls Loss,0.09288
Learning rate,0.0001
Loss,0.0938
Reconstruct Loss,0.0007
Reg Loss,2.19658
Training accuracy,0.97172
Validation Accuracy,0.6794
Validation Loss,1.33722


### 7 Testing loop

In [40]:
saved_hypernet_path = args.training.save_model_path + '/cifar10_nerf_best.pth'

In [41]:
saved_hypernet_path

'toy/experiments_densenet/dense_7th_experiment/cifar10_nerf_best.pth'

In [42]:
hyper_model_test = get_hypernetwork(args, number_param)

Hyper model type: resmlp
Using scalar 0.1
num_freqs:  16 <class 'int'>


In [43]:
checkpoint = torch.load(saved_hypernet_path, map_location="cpu")  # or "cuda" if using GPU
hyper_model_test.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [44]:
for hidden_dim in range(12, 49):
    # Create a model for this given dimension
    model_trained = create_model(args.model.type,
                                 layers=args.model.layers,
                                 growth=args.model.growth,
                                 compression=args.model.compression,
                                 bottleneck=args.model.bottleneck,
                                 drop_rate=args.model.drop_rate,
                                 path=args.model.pretrained_path,
                                 hidden_dim=hidden_dim).to(device)
    
    # If EMA is specified, apply it
    if ema:
        print('Applying EMA')
        ema.apply()

    # Sample the merged model
    accumulated_model = sample_merge_model(hyper_model_test, model_trained, args, K=100)

    # Validate the merged model
    val_loss, acc = validate_single(accumulated_model, val_loader, val_criterion, args=args)

    # If EMA is specified, restore the original weights after applying EMA
    if ema:
        ema.restore()  # Restore the original weights after applying 
        
    # Save the model
    save_name = os.path.join(args.training.save_model_path, f"cifar10_{accumulated_model.__class__.__name__}_dim{hidden_dim}_single.pth")
    torch.save(accumulated_model.state_dict(),save_name)

    # Print the results
    print(f"Test using model {args.model}: hidden_dim {hidden_dim}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc*100:.2f}%")
    print('\n')

    # Define the directory and filename structure
    filename = f"cifar10_results_{args.experiment.name}.txt"
    filepath = os.path.join(args.training.save_model_path, filename)

    # Write the results. 'a' is used to append the results; a new file will be created if it doesn't exist.
    with open(filepath, "a") as file:
        file.write(f"Hidden_dim: {hidden_dim}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc*100:.2f}%\n")

Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:04<00:00, 34.70it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 12, Validation Loss: 1.3607, Validation Accuracy: 66.51%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:04<00:00, 34.29it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 13, Validation Loss: 1.3535, Validation Accuracy: 66.62%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:04<00:00, 34.27it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 14, Validation Loss: 1.3687, Validation Accuracy: 67.84%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:04<00:00, 34.49it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 15, Validation Loss: 1.3555, Validation Accuracy: 67.01%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:04<00:00, 32.63it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 16, Validation Loss: 1.3737, Validation Accuracy: 66.86%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.56it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 17, Validation Loss: 1.3708, Validation Accuracy: 68.23%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 45.24it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 18, Validation Loss: 1.3588, Validation Accuracy: 67.60%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 42.73it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 19, Validation Loss: 1.3587, Validation Accuracy: 66.91%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 42.37it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 20, Validation Loss: 1.3861, Validation Accuracy: 68.27%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.04it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 21, Validation Loss: 1.4029, Validation Accuracy: 67.97%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 42.33it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 22, Validation Loss: 1.3711, Validation Accuracy: 67.14%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:04<00:00, 33.44it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 23, Validation Loss: 1.4024, Validation Accuracy: 68.39%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:04<00:00, 33.27it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 24, Validation Loss: 1.3568, Validation Accuracy: 67.72%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:04<00:00, 35.91it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 25, Validation Loss: 1.3587, Validation Accuracy: 67.92%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 44.98it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 26, Validation Loss: 1.3556, Validation Accuracy: 67.75%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 42.18it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 27, Validation Loss: 1.3633, Validation Accuracy: 67.97%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.19it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 28, Validation Loss: 1.3827, Validation Accuracy: 68.47%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 44.76it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 29, Validation Loss: 1.3536, Validation Accuracy: 67.68%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 44.21it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 30, Validation Loss: 1.3669, Validation Accuracy: 67.58%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 41.72it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 31, Validation Loss: 1.3587, Validation Accuracy: 67.33%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 42.46it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 32, Validation Loss: 1.3576, Validation Accuracy: 67.57%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 42.99it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 33, Validation Loss: 1.3613, Validation Accuracy: 67.93%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.39it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 34, Validation Loss: 1.3677, Validation Accuracy: 67.70%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.03it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 35, Validation Loss: 1.3699, Validation Accuracy: 68.10%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 42.67it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 36, Validation Loss: 1.3587, Validation Accuracy: 67.61%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 42.67it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 37, Validation Loss: 1.3597, Validation Accuracy: 67.22%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.27it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 38, Validation Loss: 1.3651, Validation Accuracy: 67.74%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 42.34it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 39, Validation Loss: 1.3621, Validation Accuracy: 67.49%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.55it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 40, Validation Loss: 1.3488, Validation Accuracy: 67.78%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 42.42it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 41, Validation Loss: 1.3547, Validation Accuracy: 67.24%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 42.95it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 42, Validation Loss: 1.3466, Validation Accuracy: 66.62%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 41.96it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 43, Validation Loss: 1.3612, Validation Accuracy: 67.40%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 42.26it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 44, Validation Loss: 1.3617, Validation Accuracy: 67.31%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:04<00:00, 33.72it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 45, Validation Loss: 1.3679, Validation Accuracy: 67.75%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:04<00:00, 34.81it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 46, Validation Loss: 1.3711, Validation Accuracy: 67.80%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 42.26it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 47, Validation Loss: 1.3794, Validation Accuracy: 68.06%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 157/157 [00:03<00:00, 43.33it/s]

Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 48, Validation Loss: 1.3303, Validation Accuracy: 67.46%





