## Import

In [12]:
import os
import random

In [13]:
import torch
import torch.nn as nn

import wandb

from sklearn.metrics import accuracy_score

In [14]:
from neumeta.models import create_densenet_model as create_model
from neumeta.utils import (
    parse_args, print_omegaconf,
    load_checkpoint, save_checkpoint,
    set_seed,
    get_dataset,
    sample_coordinates, sample_subset, shuffle_coordinates_all,
    get_hypernetwork, get_optimizer,
    sample_weights,
    weighted_regression_loss, validate_single, AverageMeter, EMA,
    sample_merge_model
)

## Functions

### Find max dimension of the model

In [15]:
def find_max_dim(model_cls):
    """Find maximum dimension of the model"""
    # Get the learnable parameters of the model
    checkpoint = model_cls.learnable_parameter 

    # Set the maximum value to the length of the checkpoint
    max_value = len(checkpoint)

    # Iterate over the new model's weight
    for i, (k, tensor) in enumerate(checkpoint.items()):
        # Handle 2D tensors (e.g., weight matrices) 
        if len(tensor.shape) == 4:
            coords = [tensor.shape[0], tensor.shape[1]]
            max_value = max(max_value, max(coords))
        # Handle 1D tensors (e.g., biases)
        elif len(tensor.shape) == 1:
            max_value = max(max_value, tensor.shape[0])
    
    return max_value

### Initialize wandb

In [16]:
def initialize_wandb(config):
    import time
    """
    Initializes Weights and Biases (wandb) with the given configuration.
    
    Args:
        configuration (dict): Configuration parameters for the run.
    """
    # Name the run using current time and configuration name
    run_name = f"{time.strftime('%Y%m%d%H%M%S')}-{config.experiment.name}"
    
    wandb.init(project="dense-inr-trial", name=run_name, config=dict(config), group='cifar10')

### Init model dictionary

In [17]:
def init_model_dict(args, device):
    """
    Initializes a dictionary of models for each dimension in the given range, along with ground truth models for the starting dimension.

    Args:
        args: An object containing the arguments for initializing the models.

    Returns:
        dim_dict: A dictionary containing the models for each dimension, along with their corresponding coordinates, keys, indices, size, and ground truth models.
        gt_model_dict: A dictionary containing the ground truth models for the starting dimension.
    """
    dim_dict = {}
    gt_model_dict = {}
    
    # Create a model for each dimension in dimensions range
    for dim in args.dimensions.range:
        model_cls = create_model(args.model.type,
                                 layers=args.model.layers,
                                 growth=args.model.growth,
                                 compression=args.model.compression,
                                 bottleneck=args.model.bottleneck,
                                 drop_rate=args.model.drop_rate,
                                 hidden_dim=dim,
                                 path=args.model.pretrained_path).to(device)
        # Sample the coordinates, keys, indices, and the size for the model
        coords_tensor, keys_list, indices_list, size_list = sample_coordinates(model_cls)
        # Add the model, coordinates, keys, indices, size, and key mask to the dictionary
        dim_dict[f"{dim}"] = (model_cls, coords_tensor, keys_list, indices_list, size_list, None)

        # Print to makes line better
        print('\n')
        
        # If the dimension is the starting dimension (the dimension of pretrained_model), add the ground truth model to the dictionary
        if dim == args.dimensions.start:
            print(f"Loading model for dim {dim}")
            model_trained = create_model(args.model.type,
                                         layers=args.model.layers,
                                         growth=args.model.growth,
                                         compression=args.model.compression,
                                         bottleneck=args.model.bottleneck,
                                         drop_rate=args.model.drop_rate,
                                         path=args.model.pretrained_path,
                                         smooth=True,
                                         hidden_dim=dim).to(device)
            model_trained.eval()
            gt_model_dict[f'{dim}'] = model_trained

    
    return dim_dict, gt_model_dict

### Training function

In [18]:
# Function to train the model for one epoch
def train_one_epoch(model, train_loader, optimizer, criterion, dim_dict, gt_model_dict, epoch_idx, ema=None, args=None, device='cpu'):
    # Set the model to training mode
    model.train()
    total_loss = 0.0

    # Initialize AverageMeter objects to track the losses
    losses = AverageMeter()
    cls_losses = AverageMeter()
    reg_losses = AverageMeter()
    reconstruct_losses = AverageMeter()

    # Training accuracy
    preds = []
    gt = []

    # Iterate over the training data
    for batch_idx, (x, target) in enumerate(train_loader):
        # Zero the gradients
        optimizer.zero_grad()

        # Preprocess input
        # ------------------------------------------------------------------------------------------------------
        # Move the data to the device
        x, target = x.to(device), target.to(device)
        # Choose a random hidden dimension
        hidden_dim = random.choice(args.dimensions.range)
        # Get the model class, coordinates, keys, indices, size, and key mask for the chosen dimension
        model_cls, coords_tensor, keys_list, indices_list, size_list, key_mask = dim_dict[f"{hidden_dim}"]
        # Sample a subset the input tensor of the coordinates, keys, indices, size, and selected keys
        coords_tensor, keys_list, indices_list, size_list, selected_keys = sample_subset(coords_tensor,
                                                                                         keys_list,
                                                                                         indices_list,
                                                                                         size_list,
                                                                                         key_mask,
                                                                                         ratio=args.ratio)
        # Add noise to the coordinates if specified
        if args.training.coordinate_noise > 0.0:
            coords_tensor = coords_tensor + (torch.rand_like(coords_tensor) - 0.5) * args.training.coordinate_noise


        # Main task of hypernetwork and target network
        # ------------------------------------------------------------------------------------------------------
        # Sample the weights for the target model using hypernetwork
        model_cls, reconstructed_weights = sample_weights(model, model_cls,
                                                          coords_tensor, keys_list, indices_list, size_list, key_mask, selected_keys,
                                                          device=device, NORM=args.dimensions.norm)
        # Forward pass
        predict = model_cls(x)
        
        # Sample test model to see training accuracy

        pred = torch.argmax(predict, dim=-1)

        preds.append(pred)
        gt.append(target)

        # Compute losses
        # ------------------------------------------------------------------------------------------------------
        # Compute classification loss
        cls_loss = criterion(predict, target) 
        # Compute regularization loss
        reg_loss = sum([torch.norm(w, p=2) for w in reconstructed_weights])
        # Compute reconstruction loss if ground truth model is available
        if f"{hidden_dim}" in gt_model_dict:
            gt_model = gt_model_dict[f"{hidden_dim}"]
            gt_selected_weights = [
                w for k, w in gt_model.learnable_parameter.items() if k in selected_keys]

            reconstruct_loss = weighted_regression_loss(
                reconstructed_weights, gt_selected_weights)
        else:
            reconstruct_loss = torch.tensor(0.0)
        # Compute the total loss
        loss = args.hyper_model.loss_weight.ce_weight * cls_loss + args.hyper_model.loss_weight.reg_weight * \
            reg_loss + args.hyper_model.loss_weight.recon_weight * reconstruct_loss


        # Compute gradients and update weights
        # ------------------------------------------------------------------------------------------------------
        # Zero the gradients of the updated weights
        for updated_weight in model_cls.parameters():
            updated_weight.grad = None

        # Compute the gradients of the reconstructed weights
        loss.backward(retain_graph=True)
        torch.autograd.backward(reconstructed_weights, [
                                w.grad for k, w in model_cls.named_parameters() if k in selected_keys])
        
        # Clip the gradients if specified
        if args.training.get('clip_grad', 0.0) > 0:
            torch.nn.utils.clip_grad_value_(
                model.parameters(), args.training.clip_grad)
            
        # Update the weights
        optimizer.step()

        # Update the EMA if specified
        if ema:
            ema.update()  # Update the EMA after each training step
        total_loss += loss.item()

        # Update the AverageMeter objects
        losses.update(loss.item())
        cls_losses.update(cls_loss.item())
        reg_losses.update(reg_loss.item())
        reconstruct_losses.update(reconstruct_loss.item())

        # Log (or plot) losses
        # ------------------------------------------------------------------------------------------------------
        # Log the losses and learning rate to wandb
        if batch_idx % args.experiment.log_interval == 0:
            wandb.log({
                "Loss": losses.avg,
                "Cls Loss": cls_losses.avg,
                "Reg Loss": reg_losses.avg,
                "Reconstruct Loss": reconstruct_losses.avg,
                "Learning rate": optimizer.param_groups[0]['lr']
            }, step=batch_idx + epoch_idx * len(train_loader))
            # Print the losses and learning rate
            print(
                f"Iteration {batch_idx}: Loss = {losses.avg:.4f}, Reg Loss = {reg_losses.avg:.4f}, Reconstruct Loss = {reconstruct_losses.avg:.4f}, Cls Loss = {cls_losses.avg:.4f}, Learning rate = {optimizer.param_groups[0]['lr']:.4e}")
    
    train_acc = accuracy_score(torch.cat(gt).cpu().numpy(), torch.cat(preds).cpu().numpy())

    wandb.log({
        "Training accuracy": train_acc
    })

    # Returns the training loss, structure of network in each dimension, and the original structure of pretrained network
    return losses.avg, dim_dict, gt_model_dict, train_acc

## Main

### 0 Set device to GPU

In [19]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

### 1 Parsing arguments for input

In [20]:
CONFIG_PATH = 'neumeta/config/densenet_inr_train/dense_17th_experiment.yaml'
RATIO = '1.0'
CHECKPOINT_PATH = 'toy/experiments_densenet/dense_17th_experiment/cifar10_nerf_best.pth'

In [21]:
argv_train = ['--config', CONFIG_PATH, '--ratio', RATIO, '--resume_from', CHECKPOINT_PATH]

In [22]:
args = parse_args(argv_train)  # Parse arguments
print_omegaconf(args)  # Print arguments

+--------------------------------------+------------------------------------------------------------------------------------------------------+
|                 Key                  |                                                Value                                                 |
+--------------------------------------+------------------------------------------------------------------------------------------------------+
|           experiment.name            |                                        dense_17th_experiment                                         |
|        experiment.num_epochs         |                                                  50                                                  |
|       experiment.log_interval        |                                                  50                                                  |
|       experiment.eval_interval       |                                                  1                                             

In [23]:
set_seed(args.experiment.seed)

Setting seed... 42 for reproducibility


### 2 Get training and validation dataloader

In [24]:
train_loader, val_loader = get_dataset('cifar10', args.training.batch_size, strong_transform=args.training.get('strong_aug', None))

Using dataset: cifar10 with batch size: 128 and strong transform: None


### 3 Create target model

#### 3.0 Create the model

In [25]:
model = create_model(args.model.type,
                     layers=args.model.layers,
                     growth=args.model.growth,
                     compression=args.model.compression,
                     bottleneck=args.model.bottleneck,
                     drop_rate=args.model.drop_rate,
                     hidden_dim=args.dimensions.start,
                     path=args.model.pretrained_path).to(device)

Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


#### 3.1 Print the structure and shape of the model

In [26]:
model

DenseNet3(
  (conv1): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (block1): DenseBlock(
    (layer): Sequential(
      (0): BottleneckBlock(
        (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv1): Conv2d(24, 48, kernel_size=(1, 1), stride=(1, 1))
        (bn2): Identity()
        (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (dropout): Dropout(p=0.0, inplace=False)
      )
      (1): BottleneckBlock(
        (bn1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv1): Conv2d(36, 48, kernel_size=(1, 1), stride=(1, 1))
        (bn2): Identity()
        (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (dropout): Dropout(p=0.0, inplace=False)
      )
      (2): BottleneckBlock(
        (bn1): Bat

In [27]:
for i, (k, tensor) in enumerate(model.learnable_parameter.items()):
    print(k, tensor.shape)

block3.layer.5.conv1.weight torch.Size([48, 120, 1, 1])
block3.layer.5.conv1.bias torch.Size([48])
block3.layer.5.conv2.weight torch.Size([12, 48, 3, 3])


In [28]:
# Print the maximum dimension of the model
print(f'Maximum DIM: {find_max_dim(model)}')

Maximum DIM: 120


#### 3.2 Validate the accuracy of pretrained model

In [29]:
# Validate the model for the starting dimension (its pretrained form)
val_loss, acc = validate_single(model, val_loader, nn.CrossEntropyLoss(), args=args)
print(f'Initial Permutated model Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc * 100:.2f}%')

100%|██████████| 79/79 [00:03<00:00, 21.76it/s]

Initial Permutated model Validation Loss: 0.3248, Validation Accuracy: 91.92%





In [30]:
checkpoint = model.learnable_parameter
number_param = len(checkpoint)

In [31]:
# Print the keys of the parameters and the number of parameters
print(f"Parameters keys: {model.keys}")
print(f"Number of parameters to be learned: {number_param}")

Parameters keys: ['block3.layer.5.conv1.weight', 'block3.layer.5.conv1.bias', 'block3.layer.5.conv2.weight']
Number of parameters to be learned: 3


### 4 Create hypernetwork

#### 4.0 Create the model

In [32]:
# Get the hypermodel
hyper_model = get_hypernetwork(args, number_param)

Hyper model type: resmlp
Using scalar 0.1
num_freqs:  16 <class 'int'>


#### 4.1 Print model structure

In [33]:
hyper_model

NeRF_ResMLP_Compose(
  (positional_encoding): PositionalEncoding()
  (model): ModuleList(
    (0-2): 3 x NeRF_MLP_Residual_Scaled(
      (initial_layer): Linear(in_features=198, out_features=128, bias=True)
      (residual_blocks): ModuleList(
        (0-6): 7 x Linear(in_features=128, out_features=128, bias=True)
      )
      (scalars): ParameterList(
          (0): Parameter containing: [torch.float32 of size  (cuda:0)]
          (1): Parameter containing: [torch.float32 of size  (cuda:0)]
          (2): Parameter containing: [torch.float32 of size  (cuda:0)]
          (3): Parameter containing: [torch.float32 of size  (cuda:0)]
          (4): Parameter containing: [torch.float32 of size  (cuda:0)]
          (5): Parameter containing: [torch.float32 of size  (cuda:0)]
          (6): Parameter containing: [torch.float32 of size  (cuda:0)]
      )
      (act): ReLU(inplace=True)
      (output_layer): Linear(in_features=128, out_features=9, bias=True)
    )
  )
)

#### 4.2 Initialize EMA to track only a smooth version of the model weight

In [34]:
# Initialize the EMA
ema = EMA(hyper_model, decay=args.hyper_model.ema_decay)

### 5 Get loss function, optimizer and scheduler

In [35]:
criterion, val_criterion, optimizer, scheduler = get_optimizer(args, hyper_model)

In [36]:
print(f'Criterion: {criterion}\nVal_criterion: {val_criterion}\nOptimizer: {optimizer}\nScheduler: {scheduler}')

Criterion: CrossEntropyLoss()
Val_criterion: CrossEntropyLoss()
Optimizer: AdamW (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    initial_lr: 0.001
    lr: 0.001
    maximize: False
    weight_decay: 0.01
)
Scheduler: <torch.optim.lr_scheduler.MultiStepLR object at 0x00000198B1475B10>


### 6 Training loop

#### 6.1 Initialize training parameters

In [37]:
# Initialize the starting epoch and best accuracy
start_epoch = 0
best_acc = 0.0

#### 6.2 Directory to save the model

In [38]:
# Create the directory to save the model
os.makedirs(args.training.save_model_path, exist_ok=True)

#### 6.3 Resume training loop

In [39]:
args.resume_from

'toy/experiments_densenet/dense_17th_experiment/cifar10_nerf_best.pth'

In [40]:
args.resume_from = False

In [41]:
if args.resume_from:
        print(f"Resuming from checkpoint: {args.resume_from}")
        checkpoint_info = load_checkpoint(args.resume_from, hyper_model, optimizer, ema)
        start_epoch = checkpoint_info['epoch']
        best_acc = checkpoint_info['best_acc']
        print(f"Resuming from epoch: {start_epoch}, best accuracy: {best_acc*100:.2f}%")
        # Note: If there are more elements to retrieve, do so here.

#### 6.4 Initialize model dictionary for each dimension and shuffle it

In [42]:
# Initialize model dictionary
dim_dict, gt_model_dict = init_model_dict(args, device)

Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/exper

In [43]:
gt_model_dict['48']

DenseNet3(
  (conv1): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (block1): DenseBlock(
    (layer): Sequential(
      (0): BottleneckBlock(
        (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv1): Conv2d(24, 48, kernel_size=(1, 1), stride=(1, 1))
        (bn2): Identity()
        (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (dropout): Dropout(p=0.0, inplace=False)
      )
      (1): BottleneckBlock(
        (bn1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv1): Conv2d(36, 48, kernel_size=(1, 1), stride=(1, 1))
        (bn2): Identity()
        (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (dropout): Dropout(p=0.0, inplace=False)
      )
      (2): BottleneckBlock(
        (bn1): Bat

In [44]:
# Validate the model for the starting dimension (its pretrained form)
val_loss, acc = validate_single(gt_model_dict['48'], val_loader, nn.CrossEntropyLoss(), args=args)
print(f'Initial Permutated model Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc * 100:.2f}%')

100%|██████████| 79/79 [00:03<00:00, 23.65it/s]

Initial Permutated model Validation Loss: 0.3248, Validation Accuracy: 91.93%





In [45]:
dim_dict

{'24': (DenseNet3(
    (conv1): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (block1): DenseBlock(
      (layer): Sequential(
        (0): BottleneckBlock(
          (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv1): Conv2d(24, 48, kernel_size=(1, 1), stride=(1, 1))
          (bn2): Identity()
          (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (dropout): Dropout(p=0.0, inplace=False)
        )
        (1): BottleneckBlock(
          (bn1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv1): Conv2d(36, 48, kernel_size=(1, 1), stride=(1, 1))
          (bn2): Identity()
          (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (dropout): Dropout(p=0.0, inplace=False)
        )


In [46]:
dim_dict = shuffle_coordinates_all(dim_dict)
dim_dict

{'24': (DenseNet3(
    (conv1): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (block1): DenseBlock(
      (layer): Sequential(
        (0): BottleneckBlock(
          (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv1): Conv2d(24, 48, kernel_size=(1, 1), stride=(1, 1))
          (bn2): Identity()
          (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (dropout): Dropout(p=0.0, inplace=False)
        )
        (1): BottleneckBlock(
          (bn1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv1): Conv2d(36, 48, kernel_size=(1, 1), stride=(1, 1))
          (bn2): Identity()
          (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (dropout): Dropout(p=0.0, inplace=False)
        )


#### 6.5 Initialize wandb for plotting

In [47]:
initialize_wandb(args)

[34m[1mwandb[0m: Currently logged in as: [33mefradosuryadi[0m ([33mefradosuryadi-universitas-indonesia[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


#### 6.6 Hypernetwork training loop

In [48]:
args.experiment.num_epochs

50

In [49]:
# Iterate over the epochs
for epoch in range(start_epoch, args.experiment.num_epochs):
    # Train the hypernetwork to generate a model with random dimension for one epoch
    train_loss, dim_dict, gt_model_dict, train_acc = train_one_epoch(hyper_model, train_loader, optimizer, criterion, 
                                                                     dim_dict, gt_model_dict, epoch_idx=epoch, ema=ema, 
                                                                     args=args, device=device)
    # Step the scheduler
    scheduler.step()

    # Print the training loss and learning rate
    print(f"Epoch [{epoch+1}/{args.experiment.num_epochs}], Training Loss: {train_loss:.4f}, Training Accuracy: {train_acc*100:.2f}, Learning Rate: {scheduler.get_last_lr()[0]:.6f}")

    # If it's time to evaluate the model
    if (epoch + 1) % args.experiment.eval_interval == 0:
        # Apply EMA if it is specified
        if ema:
            ema.apply()  # Save the weights of original model created before training_loop
        
        # Sample the merged model (create model of same structure before training loop by using the hypernetwork)
        # And then test the performance of the hypernetwork by seeing how good it is in generating the weights
        model = sample_merge_model(hyper_model, model, args) 
        # Validate the merged model
        val_loss, acc = validate_single(model, val_loader, val_criterion, args=args)

        # If EMA is specified, restore the original weights
        if ema:
            ema.restore()  # Restore the original weights to the weights of the pretrained networks

        # Log the validation loss and accuracy to wandb
        wandb.log({
            "Validation Loss": val_loss,
            "Validation Accuracy": acc
        })
        # Print the validation loss and accuracy
        print(f"Epoch [{epoch+1}/{args.experiment.num_epochs}], Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc*100:.2f}%")
        print('\n\n')

        # Save the checkpoint if the accuracy is better than the previous best
        if acc > best_acc:
            best_acc = acc
            save_checkpoint(f"{args.training.save_model_path}/cifar10_nerf_best.pth",hyper_model,optimizer,ema,epoch,best_acc)
            print(f"Checkpoint saved at epoch {epoch} with accuracy: {best_acc*100:.2f}%")


Iteration 0: Loss = 0.3421, Reg Loss = 1.1405, Reconstruct Loss = 0.0000, Cls Loss = 0.3420, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.2240, Reg Loss = 5.6286, Reconstruct Loss = 0.0050, Cls Loss = 0.2185, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.2169, Reg Loss = 5.5638, Reconstruct Loss = 0.0031, Cls Loss = 0.2133, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.2132, Reg Loss = 5.2961, Reconstruct Loss = 0.0032, Cls Loss = 0.2094, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.2104, Reg Loss = 4.9411, Reconstruct Loss = 0.0034, Cls Loss = 0.2064, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.2093, Reg Loss = 4.5866, Reconstruct Loss = 0.0031, Cls Loss = 0.2058, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.2073, Reg Loss = 4.4473, Reconstruct Loss = 0.0026, Cls Loss = 0.2043, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.2064, Reg Loss = 4.4340, Reconstruct Loss = 0.0023, Cls Loss = 0.2037, Learning rate = 1.0000e-03
Epoch [1/50], Training Loss

100%|██████████| 79/79 [00:04<00:00, 16.84it/s]


Epoch [1/50], Validation Loss: 3.9037, Validation Accuracy: 46.77%



Checkpoint saved at epoch 0 with accuracy: 46.77%
Iteration 0: Loss = 0.2151, Reg Loss = 5.3876, Reconstruct Loss = 0.0000, Cls Loss = 0.2145, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1904, Reg Loss = 4.3282, Reconstruct Loss = 0.0016, Cls Loss = 0.1884, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1843, Reg Loss = 4.3535, Reconstruct Loss = 0.0013, Cls Loss = 0.1826, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1867, Reg Loss = 4.9366, Reconstruct Loss = 0.0013, Cls Loss = 0.1848, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1871, Reg Loss = 5.1315, Reconstruct Loss = 0.0010, Cls Loss = 0.1856, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1871, Reg Loss = 5.1737, Reconstruct Loss = 0.0009, Cls Loss = 0.1857, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1867, Reg Loss = 5.1778, Reconstruct Loss = 0.0013, Cls Loss = 0.1848, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1851,

100%|██████████| 79/79 [00:04<00:00, 17.55it/s]


Epoch [2/50], Validation Loss: 1.1469, Validation Accuracy: 67.55%



Checkpoint saved at epoch 1 with accuracy: 67.55%
Iteration 0: Loss = 0.1762, Reg Loss = 4.3486, Reconstruct Loss = 0.0000, Cls Loss = 0.1757, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1900, Reg Loss = 4.9203, Reconstruct Loss = 0.0023, Cls Loss = 0.1872, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1881, Reg Loss = 5.3279, Reconstruct Loss = 0.0018, Cls Loss = 0.1858, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1825, Reg Loss = 5.3283, Reconstruct Loss = 0.0017, Cls Loss = 0.1803, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1801, Reg Loss = 5.1554, Reconstruct Loss = 0.0015, Cls Loss = 0.1781, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1781, Reg Loss = 5.0656, Reconstruct Loss = 0.0014, Cls Loss = 0.1763, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1759, Reg Loss = 5.0190, Reconstruct Loss = 0.0015, Cls Loss = 0.1740, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1753,

100%|██████████| 79/79 [00:04<00:00, 18.12it/s]


Epoch [3/50], Validation Loss: 1.1754, Validation Accuracy: 68.04%



Checkpoint saved at epoch 2 with accuracy: 68.04%
Iteration 0: Loss = 0.1772, Reg Loss = 4.1741, Reconstruct Loss = 0.0000, Cls Loss = 0.1768, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1856, Reg Loss = 4.4357, Reconstruct Loss = 0.0026, Cls Loss = 0.1825, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1781, Reg Loss = 4.6083, Reconstruct Loss = 0.0016, Cls Loss = 0.1761, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1750, Reg Loss = 4.5725, Reconstruct Loss = 0.0014, Cls Loss = 0.1731, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1737, Reg Loss = 4.5567, Reconstruct Loss = 0.0013, Cls Loss = 0.1719, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1717, Reg Loss = 4.5141, Reconstruct Loss = 0.0011, Cls Loss = 0.1701, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1720, Reg Loss = 4.4438, Reconstruct Loss = 0.0012, Cls Loss = 0.1703, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1711,

100%|██████████| 79/79 [00:04<00:00, 17.92it/s]


Epoch [4/50], Validation Loss: 1.1214, Validation Accuracy: 69.35%



Checkpoint saved at epoch 3 with accuracy: 69.35%
Iteration 0: Loss = 0.1759, Reg Loss = 4.2244, Reconstruct Loss = 0.0000, Cls Loss = 0.1754, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1750, Reg Loss = 4.2545, Reconstruct Loss = 0.0012, Cls Loss = 0.1734, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1735, Reg Loss = 4.5283, Reconstruct Loss = 0.0008, Cls Loss = 0.1723, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1706, Reg Loss = 4.7107, Reconstruct Loss = 0.0007, Cls Loss = 0.1695, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1680, Reg Loss = 4.5525, Reconstruct Loss = 0.0011, Cls Loss = 0.1664, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1690, Reg Loss = 4.4989, Reconstruct Loss = 0.0014, Cls Loss = 0.1672, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1678, Reg Loss = 4.4046, Reconstruct Loss = 0.0014, Cls Loss = 0.1660, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1680,

100%|██████████| 79/79 [00:03<00:00, 22.95it/s]


Epoch [5/50], Validation Loss: 0.9657, Validation Accuracy: 72.45%



Checkpoint saved at epoch 4 with accuracy: 72.45%
Iteration 0: Loss = 0.1132, Reg Loss = 4.5259, Reconstruct Loss = 0.0000, Cls Loss = 0.1127, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1666, Reg Loss = 4.9816, Reconstruct Loss = 0.0000, Cls Loss = 0.1661, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1557, Reg Loss = 4.6522, Reconstruct Loss = 0.0004, Cls Loss = 0.1547, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1584, Reg Loss = 4.4024, Reconstruct Loss = 0.0012, Cls Loss = 0.1568, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1604, Reg Loss = 4.3020, Reconstruct Loss = 0.0015, Cls Loss = 0.1585, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1595, Reg Loss = 4.2245, Reconstruct Loss = 0.0013, Cls Loss = 0.1577, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1598, Reg Loss = 4.1791, Reconstruct Loss = 0.0011, Cls Loss = 0.1582, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1606,

100%|██████████| 79/79 [00:03<00:00, 23.06it/s]


Epoch [6/50], Validation Loss: 0.9738, Validation Accuracy: 70.75%



Iteration 0: Loss = 0.1945, Reg Loss = 3.7753, Reconstruct Loss = 0.0000, Cls Loss = 0.1942, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1488, Reg Loss = 3.8812, Reconstruct Loss = 0.0012, Cls Loss = 0.1472, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1521, Reg Loss = 3.7361, Reconstruct Loss = 0.0011, Cls Loss = 0.1507, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1519, Reg Loss = 3.7944, Reconstruct Loss = 0.0013, Cls Loss = 0.1503, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1507, Reg Loss = 3.8354, Reconstruct Loss = 0.0012, Cls Loss = 0.1491, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1525, Reg Loss = 4.0787, Reconstruct Loss = 0.0012, Cls Loss = 0.1509, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1532, Reg Loss = 4.1860, Reconstruct Loss = 0.0012, Cls Loss = 0.1515, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1527, Reg Loss = 4.2270, Reconstruct Loss = 0.0013, Cls

100%|██████████| 79/79 [00:03<00:00, 23.01it/s]


Epoch [7/50], Validation Loss: 1.3005, Validation Accuracy: 65.42%



Iteration 0: Loss = 0.1443, Reg Loss = 4.1290, Reconstruct Loss = 0.0000, Cls Loss = 0.1439, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1437, Reg Loss = 4.0567, Reconstruct Loss = 0.0015, Cls Loss = 0.1418, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1438, Reg Loss = 3.9536, Reconstruct Loss = 0.0014, Cls Loss = 0.1420, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1457, Reg Loss = 4.0987, Reconstruct Loss = 0.0017, Cls Loss = 0.1435, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1451, Reg Loss = 4.0255, Reconstruct Loss = 0.0015, Cls Loss = 0.1432, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1431, Reg Loss = 4.0069, Reconstruct Loss = 0.0015, Cls Loss = 0.1412, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1417, Reg Loss = 3.9195, Reconstruct Loss = 0.0015, Cls Loss = 0.1398, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1404, Reg Loss = 3.8542, Reconstruct Loss = 0.0015, Cls

100%|██████████| 79/79 [00:03<00:00, 23.20it/s]


Epoch [8/50], Validation Loss: 1.4363, Validation Accuracy: 62.52%



Iteration 0: Loss = 0.2005, Reg Loss = 3.1924, Reconstruct Loss = 0.0000, Cls Loss = 0.2001, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1329, Reg Loss = 3.6664, Reconstruct Loss = 0.0020, Cls Loss = 0.1306, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1275, Reg Loss = 3.6418, Reconstruct Loss = 0.0014, Cls Loss = 0.1257, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1230, Reg Loss = 3.6891, Reconstruct Loss = 0.0017, Cls Loss = 0.1209, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1238, Reg Loss = 3.7136, Reconstruct Loss = 0.0017, Cls Loss = 0.1218, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1228, Reg Loss = 3.7594, Reconstruct Loss = 0.0013, Cls Loss = 0.1211, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1212, Reg Loss = 3.7652, Reconstruct Loss = 0.0013, Cls Loss = 0.1195, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1218, Reg Loss = 3.7883, Reconstruct Loss = 0.0014, Cls

100%|██████████| 79/79 [00:03<00:00, 23.16it/s]


Epoch [9/50], Validation Loss: 1.4296, Validation Accuracy: 64.14%



Iteration 0: Loss = 0.1267, Reg Loss = 5.2566, Reconstruct Loss = 0.0000, Cls Loss = 0.1261, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1135, Reg Loss = 4.6169, Reconstruct Loss = 0.0004, Cls Loss = 0.1127, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1145, Reg Loss = 4.4155, Reconstruct Loss = 0.0005, Cls Loss = 0.1136, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1142, Reg Loss = 4.3387, Reconstruct Loss = 0.0008, Cls Loss = 0.1129, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1149, Reg Loss = 4.2917, Reconstruct Loss = 0.0008, Cls Loss = 0.1137, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1141, Reg Loss = 4.2749, Reconstruct Loss = 0.0008, Cls Loss = 0.1129, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1140, Reg Loss = 4.2447, Reconstruct Loss = 0.0006, Cls Loss = 0.1130, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1138, Reg Loss = 4.2213, Reconstruct Loss = 0.0010, Cls

100%|██████████| 79/79 [00:03<00:00, 23.02it/s]


Epoch [10/50], Validation Loss: 1.6467, Validation Accuracy: 63.36%



Iteration 0: Loss = 0.0632, Reg Loss = 4.3110, Reconstruct Loss = 0.0000, Cls Loss = 0.0628, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1059, Reg Loss = 4.2084, Reconstruct Loss = 0.0015, Cls Loss = 0.1039, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1076, Reg Loss = 4.2780, Reconstruct Loss = 0.0012, Cls Loss = 0.1060, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1070, Reg Loss = 4.3173, Reconstruct Loss = 0.0009, Cls Loss = 0.1056, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1073, Reg Loss = 4.3089, Reconstruct Loss = 0.0011, Cls Loss = 0.1057, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1077, Reg Loss = 4.3477, Reconstruct Loss = 0.0012, Cls Loss = 0.1060, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1065, Reg Loss = 4.3700, Reconstruct Loss = 0.0012, Cls Loss = 0.1049, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1072, Reg Loss = 4.3642, Reconstruct Loss = 0.0012, Cl

100%|██████████| 79/79 [00:03<00:00, 23.05it/s]


Epoch [11/50], Validation Loss: 2.1162, Validation Accuracy: 62.05%



Iteration 0: Loss = 0.0852, Reg Loss = 4.4010, Reconstruct Loss = 0.0000, Cls Loss = 0.0848, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.1061, Reg Loss = 4.3275, Reconstruct Loss = 0.0025, Cls Loss = 0.1032, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.1047, Reg Loss = 4.3566, Reconstruct Loss = 0.0019, Cls Loss = 0.1024, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.1045, Reg Loss = 4.4029, Reconstruct Loss = 0.0014, Cls Loss = 0.1027, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1030, Reg Loss = 4.4509, Reconstruct Loss = 0.0011, Cls Loss = 0.1015, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1021, Reg Loss = 4.4781, Reconstruct Loss = 0.0011, Cls Loss = 0.1006, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1033, Reg Loss = 4.4970, Reconstruct Loss = 0.0010, Cls Loss = 0.1018, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.1035, Reg Loss = 4.5370, Reconstruct Loss = 0.0009, Cl

100%|██████████| 79/79 [00:03<00:00, 23.18it/s]


Epoch [12/50], Validation Loss: 1.9118, Validation Accuracy: 63.03%



Iteration 0: Loss = 0.0727, Reg Loss = 4.7912, Reconstruct Loss = 0.0000, Cls Loss = 0.0722, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0966, Reg Loss = 4.7522, Reconstruct Loss = 0.0005, Cls Loss = 0.0956, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0989, Reg Loss = 4.6740, Reconstruct Loss = 0.0003, Cls Loss = 0.0982, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0998, Reg Loss = 4.6488, Reconstruct Loss = 0.0006, Cls Loss = 0.0987, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1014, Reg Loss = 4.6608, Reconstruct Loss = 0.0007, Cls Loss = 0.1003, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.1004, Reg Loss = 4.6317, Reconstruct Loss = 0.0006, Cls Loss = 0.0993, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0995, Reg Loss = 4.5955, Reconstruct Loss = 0.0008, Cls Loss = 0.0983, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0987, Reg Loss = 4.5633, Reconstruct Loss = 0.0009, Cl

100%|██████████| 79/79 [00:03<00:00, 23.20it/s]


Epoch [13/50], Validation Loss: 1.8410, Validation Accuracy: 62.82%



Iteration 0: Loss = 0.0481, Reg Loss = 4.6328, Reconstruct Loss = 0.0000, Cls Loss = 0.0476, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0926, Reg Loss = 4.8869, Reconstruct Loss = 0.0006, Cls Loss = 0.0915, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0925, Reg Loss = 4.7207, Reconstruct Loss = 0.0007, Cls Loss = 0.0913, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0940, Reg Loss = 4.7691, Reconstruct Loss = 0.0014, Cls Loss = 0.0921, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0943, Reg Loss = 4.7123, Reconstruct Loss = 0.0013, Cls Loss = 0.0925, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0927, Reg Loss = 4.7070, Reconstruct Loss = 0.0012, Cls Loss = 0.0911, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0933, Reg Loss = 4.6936, Reconstruct Loss = 0.0012, Cls Loss = 0.0916, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0919, Reg Loss = 4.7138, Reconstruct Loss = 0.0011, Cl

100%|██████████| 79/79 [00:03<00:00, 23.05it/s]


Epoch [14/50], Validation Loss: 2.3366, Validation Accuracy: 59.02%



Iteration 0: Loss = 0.0840, Reg Loss = 5.1109, Reconstruct Loss = 0.0000, Cls Loss = 0.0835, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0850, Reg Loss = 4.8886, Reconstruct Loss = 0.0004, Cls Loss = 0.0841, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0843, Reg Loss = 4.9513, Reconstruct Loss = 0.0007, Cls Loss = 0.0831, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0855, Reg Loss = 4.9127, Reconstruct Loss = 0.0012, Cls Loss = 0.0838, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0864, Reg Loss = 4.8775, Reconstruct Loss = 0.0010, Cls Loss = 0.0849, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0851, Reg Loss = 4.9135, Reconstruct Loss = 0.0011, Cls Loss = 0.0836, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0841, Reg Loss = 4.9082, Reconstruct Loss = 0.0011, Cls Loss = 0.0825, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0840, Reg Loss = 4.9145, Reconstruct Loss = 0.0012, Cl

100%|██████████| 79/79 [00:03<00:00, 23.11it/s]


Epoch [15/50], Validation Loss: 2.2877, Validation Accuracy: 61.32%



Iteration 0: Loss = 0.0564, Reg Loss = 4.7908, Reconstruct Loss = 0.0000, Cls Loss = 0.0559, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0810, Reg Loss = 4.8508, Reconstruct Loss = 0.0010, Cls Loss = 0.0796, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0747, Reg Loss = 4.8803, Reconstruct Loss = 0.0009, Cls Loss = 0.0733, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0756, Reg Loss = 4.9119, Reconstruct Loss = 0.0008, Cls Loss = 0.0743, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0767, Reg Loss = 4.9337, Reconstruct Loss = 0.0007, Cls Loss = 0.0755, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0771, Reg Loss = 4.9484, Reconstruct Loss = 0.0010, Cls Loss = 0.0756, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0757, Reg Loss = 4.9568, Reconstruct Loss = 0.0009, Cls Loss = 0.0743, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0746, Reg Loss = 4.9298, Reconstruct Loss = 0.0008, Cl

100%|██████████| 79/79 [00:03<00:00, 23.04it/s]


Epoch [16/50], Validation Loss: 3.0862, Validation Accuracy: 56.23%



Iteration 0: Loss = 0.0704, Reg Loss = 4.8671, Reconstruct Loss = 0.0000, Cls Loss = 0.0700, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0751, Reg Loss = 5.1179, Reconstruct Loss = 0.0019, Cls Loss = 0.0727, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0750, Reg Loss = 5.1275, Reconstruct Loss = 0.0014, Cls Loss = 0.0731, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0726, Reg Loss = 5.1246, Reconstruct Loss = 0.0011, Cls Loss = 0.0710, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0730, Reg Loss = 5.1196, Reconstruct Loss = 0.0010, Cls Loss = 0.0714, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0727, Reg Loss = 5.1127, Reconstruct Loss = 0.0012, Cls Loss = 0.0710, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0718, Reg Loss = 5.1084, Reconstruct Loss = 0.0010, Cls Loss = 0.0703, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0717, Reg Loss = 5.1164, Reconstruct Loss = 0.0011, Cl

100%|██████████| 79/79 [00:03<00:00, 22.97it/s]


Epoch [17/50], Validation Loss: 3.8617, Validation Accuracy: 56.16%



Iteration 0: Loss = 0.0561, Reg Loss = 5.1576, Reconstruct Loss = 0.0000, Cls Loss = 0.0556, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0672, Reg Loss = 5.1250, Reconstruct Loss = 0.0000, Cls Loss = 0.0667, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0647, Reg Loss = 5.1007, Reconstruct Loss = 0.0007, Cls Loss = 0.0635, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0651, Reg Loss = 5.0882, Reconstruct Loss = 0.0009, Cls Loss = 0.0637, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0650, Reg Loss = 5.0503, Reconstruct Loss = 0.0007, Cls Loss = 0.0637, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0664, Reg Loss = 5.0969, Reconstruct Loss = 0.0008, Cls Loss = 0.0651, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0669, Reg Loss = 5.1033, Reconstruct Loss = 0.0009, Cls Loss = 0.0655, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0675, Reg Loss = 5.1715, Reconstruct Loss = 0.0009, Cl

100%|██████████| 79/79 [00:03<00:00, 22.91it/s]


Epoch [18/50], Validation Loss: 3.1220, Validation Accuracy: 57.33%



Iteration 0: Loss = 0.0894, Reg Loss = 6.0928, Reconstruct Loss = 0.0233, Cls Loss = 0.0656, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0635, Reg Loss = 5.1782, Reconstruct Loss = 0.0008, Cls Loss = 0.0621, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0652, Reg Loss = 5.1488, Reconstruct Loss = 0.0008, Cls Loss = 0.0640, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0659, Reg Loss = 5.1357, Reconstruct Loss = 0.0006, Cls Loss = 0.0648, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0644, Reg Loss = 5.1992, Reconstruct Loss = 0.0006, Cls Loss = 0.0632, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0645, Reg Loss = 5.2099, Reconstruct Loss = 0.0006, Cls Loss = 0.0634, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0636, Reg Loss = 5.2451, Reconstruct Loss = 0.0008, Cls Loss = 0.0623, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0638, Reg Loss = 5.2934, Reconstruct Loss = 0.0008, Cl

100%|██████████| 79/79 [00:03<00:00, 22.91it/s]


Epoch [19/50], Validation Loss: 2.6079, Validation Accuracy: 59.58%



Iteration 0: Loss = 0.0400, Reg Loss = 5.7301, Reconstruct Loss = 0.0000, Cls Loss = 0.0394, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0608, Reg Loss = 5.7415, Reconstruct Loss = 0.0003, Cls Loss = 0.0598, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0611, Reg Loss = 5.6740, Reconstruct Loss = 0.0002, Cls Loss = 0.0604, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0643, Reg Loss = 5.6133, Reconstruct Loss = 0.0004, Cls Loss = 0.0633, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0655, Reg Loss = 5.6351, Reconstruct Loss = 0.0007, Cls Loss = 0.0642, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0643, Reg Loss = 5.6331, Reconstruct Loss = 0.0008, Cls Loss = 0.0629, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0641, Reg Loss = 5.6404, Reconstruct Loss = 0.0008, Cls Loss = 0.0627, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0645, Reg Loss = 5.6209, Reconstruct Loss = 0.0009, Cl

100%|██████████| 79/79 [00:03<00:00, 22.82it/s]


Epoch [20/50], Validation Loss: 2.7819, Validation Accuracy: 58.54%



Iteration 0: Loss = 0.0362, Reg Loss = 5.3625, Reconstruct Loss = 0.0000, Cls Loss = 0.0357, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0635, Reg Loss = 5.7206, Reconstruct Loss = 0.0007, Cls Loss = 0.0622, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0638, Reg Loss = 5.7329, Reconstruct Loss = 0.0004, Cls Loss = 0.0629, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0623, Reg Loss = 5.7979, Reconstruct Loss = 0.0004, Cls Loss = 0.0614, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0609, Reg Loss = 5.8097, Reconstruct Loss = 0.0005, Cls Loss = 0.0598, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0606, Reg Loss = 5.8352, Reconstruct Loss = 0.0008, Cls Loss = 0.0592, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0604, Reg Loss = 5.8547, Reconstruct Loss = 0.0007, Cls Loss = 0.0591, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0609, Reg Loss = 5.8539, Reconstruct Loss = 0.0008, Cl

100%|██████████| 79/79 [00:04<00:00, 17.44it/s]


Epoch [21/50], Validation Loss: 3.8138, Validation Accuracy: 52.05%



Iteration 0: Loss = 0.0640, Reg Loss = 5.4895, Reconstruct Loss = 0.0000, Cls Loss = 0.0635, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0622, Reg Loss = 5.9618, Reconstruct Loss = 0.0011, Cls Loss = 0.0605, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0645, Reg Loss = 5.9198, Reconstruct Loss = 0.0006, Cls Loss = 0.0633, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0619, Reg Loss = 5.9061, Reconstruct Loss = 0.0004, Cls Loss = 0.0610, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0605, Reg Loss = 5.9250, Reconstruct Loss = 0.0006, Cls Loss = 0.0593, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0606, Reg Loss = 5.9487, Reconstruct Loss = 0.0005, Cls Loss = 0.0595, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0606, Reg Loss = 5.9762, Reconstruct Loss = 0.0007, Cls Loss = 0.0593, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0611, Reg Loss = 6.0009, Reconstruct Loss = 0.0007, Cl

100%|██████████| 79/79 [00:03<00:00, 22.13it/s]


Epoch [22/50], Validation Loss: 5.7190, Validation Accuracy: 45.72%



Iteration 0: Loss = 0.0454, Reg Loss = 5.7689, Reconstruct Loss = 0.0000, Cls Loss = 0.0448, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0600, Reg Loss = 6.2713, Reconstruct Loss = 0.0027, Cls Loss = 0.0567, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0592, Reg Loss = 6.2693, Reconstruct Loss = 0.0020, Cls Loss = 0.0566, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0603, Reg Loss = 6.2087, Reconstruct Loss = 0.0016, Cls Loss = 0.0581, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0599, Reg Loss = 6.1589, Reconstruct Loss = 0.0012, Cls Loss = 0.0581, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0601, Reg Loss = 6.1418, Reconstruct Loss = 0.0013, Cls Loss = 0.0582, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0592, Reg Loss = 6.1174, Reconstruct Loss = 0.0012, Cls Loss = 0.0575, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0594, Reg Loss = 6.1022, Reconstruct Loss = 0.0011, Cl

100%|██████████| 79/79 [00:04<00:00, 16.79it/s]


Epoch [23/50], Validation Loss: 4.5554, Validation Accuracy: 50.52%



Iteration 0: Loss = 0.0297, Reg Loss = 5.7655, Reconstruct Loss = 0.0000, Cls Loss = 0.0291, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0623, Reg Loss = 5.9736, Reconstruct Loss = 0.0007, Cls Loss = 0.0610, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0607, Reg Loss = 5.9951, Reconstruct Loss = 0.0007, Cls Loss = 0.0594, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0612, Reg Loss = 6.0844, Reconstruct Loss = 0.0009, Cls Loss = 0.0596, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0587, Reg Loss = 6.1176, Reconstruct Loss = 0.0007, Cls Loss = 0.0574, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0589, Reg Loss = 6.1303, Reconstruct Loss = 0.0006, Cls Loss = 0.0577, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0587, Reg Loss = 6.1934, Reconstruct Loss = 0.0007, Cls Loss = 0.0574, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0585, Reg Loss = 6.2431, Reconstruct Loss = 0.0006, Cl

100%|██████████| 79/79 [00:03<00:00, 22.91it/s]


Epoch [24/50], Validation Loss: 4.4220, Validation Accuracy: 50.67%



Iteration 0: Loss = 0.0521, Reg Loss = 6.8256, Reconstruct Loss = 0.0000, Cls Loss = 0.0515, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0609, Reg Loss = 6.5822, Reconstruct Loss = 0.0006, Cls Loss = 0.0596, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0574, Reg Loss = 6.6150, Reconstruct Loss = 0.0007, Cls Loss = 0.0561, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0562, Reg Loss = 6.5795, Reconstruct Loss = 0.0008, Cls Loss = 0.0547, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0551, Reg Loss = 6.5195, Reconstruct Loss = 0.0009, Cls Loss = 0.0535, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0553, Reg Loss = 6.4768, Reconstruct Loss = 0.0010, Cls Loss = 0.0536, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0551, Reg Loss = 6.4471, Reconstruct Loss = 0.0010, Cls Loss = 0.0534, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0553, Reg Loss = 6.4313, Reconstruct Loss = 0.0010, Cl

100%|██████████| 79/79 [00:04<00:00, 17.41it/s]


Epoch [25/50], Validation Loss: 4.8257, Validation Accuracy: 47.42%



Iteration 0: Loss = 0.0192, Reg Loss = 6.9491, Reconstruct Loss = 0.0000, Cls Loss = 0.0185, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0582, Reg Loss = 6.8078, Reconstruct Loss = 0.0013, Cls Loss = 0.0562, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0578, Reg Loss = 6.7564, Reconstruct Loss = 0.0010, Cls Loss = 0.0562, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0566, Reg Loss = 6.6980, Reconstruct Loss = 0.0008, Cls Loss = 0.0551, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0573, Reg Loss = 6.6567, Reconstruct Loss = 0.0007, Cls Loss = 0.0560, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0565, Reg Loss = 6.6785, Reconstruct Loss = 0.0007, Cls Loss = 0.0551, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0551, Reg Loss = 6.6027, Reconstruct Loss = 0.0007, Cls Loss = 0.0537, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0546, Reg Loss = 6.5894, Reconstruct Loss = 0.0006, Cl

100%|██████████| 79/79 [00:03<00:00, 23.02it/s]


Epoch [26/50], Validation Loss: 5.0071, Validation Accuracy: 47.83%



Iteration 0: Loss = 0.0536, Reg Loss = 6.3735, Reconstruct Loss = 0.0000, Cls Loss = 0.0529, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0550, Reg Loss = 6.8324, Reconstruct Loss = 0.0013, Cls Loss = 0.0530, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0561, Reg Loss = 6.7701, Reconstruct Loss = 0.0009, Cls Loss = 0.0545, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0543, Reg Loss = 6.7263, Reconstruct Loss = 0.0006, Cls Loss = 0.0531, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0530, Reg Loss = 6.6904, Reconstruct Loss = 0.0006, Cls Loss = 0.0517, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0534, Reg Loss = 6.6675, Reconstruct Loss = 0.0006, Cls Loss = 0.0521, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0530, Reg Loss = 6.6624, Reconstruct Loss = 0.0006, Cls Loss = 0.0517, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0529, Reg Loss = 6.6601, Reconstruct Loss = 0.0007, Cl

100%|██████████| 79/79 [00:03<00:00, 22.87it/s]


Epoch [27/50], Validation Loss: 4.2795, Validation Accuracy: 50.34%



Iteration 0: Loss = 0.0626, Reg Loss = 6.5019, Reconstruct Loss = 0.0000, Cls Loss = 0.0620, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0507, Reg Loss = 6.5445, Reconstruct Loss = 0.0010, Cls Loss = 0.0491, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0485, Reg Loss = 6.5327, Reconstruct Loss = 0.0005, Cls Loss = 0.0474, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0492, Reg Loss = 6.5698, Reconstruct Loss = 0.0005, Cls Loss = 0.0480, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0503, Reg Loss = 6.6530, Reconstruct Loss = 0.0003, Cls Loss = 0.0493, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0506, Reg Loss = 6.6936, Reconstruct Loss = 0.0004, Cls Loss = 0.0495, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0513, Reg Loss = 6.7384, Reconstruct Loss = 0.0006, Cls Loss = 0.0500, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0519, Reg Loss = 6.7385, Reconstruct Loss = 0.0008, Cl

100%|██████████| 79/79 [00:03<00:00, 22.79it/s]


Epoch [28/50], Validation Loss: 7.4728, Validation Accuracy: 34.04%



Iteration 0: Loss = 0.0794, Reg Loss = 6.9210, Reconstruct Loss = 0.0000, Cls Loss = 0.0787, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0506, Reg Loss = 7.1063, Reconstruct Loss = 0.0008, Cls Loss = 0.0491, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0508, Reg Loss = 6.9628, Reconstruct Loss = 0.0006, Cls Loss = 0.0495, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0510, Reg Loss = 6.9109, Reconstruct Loss = 0.0005, Cls Loss = 0.0498, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0522, Reg Loss = 6.9188, Reconstruct Loss = 0.0005, Cls Loss = 0.0510, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0521, Reg Loss = 6.9364, Reconstruct Loss = 0.0006, Cls Loss = 0.0508, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0522, Reg Loss = 6.9064, Reconstruct Loss = 0.0005, Cls Loss = 0.0509, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0525, Reg Loss = 6.9212, Reconstruct Loss = 0.0006, Cl

100%|██████████| 79/79 [00:03<00:00, 23.09it/s]


Epoch [29/50], Validation Loss: 4.5452, Validation Accuracy: 48.20%



Iteration 0: Loss = 0.0379, Reg Loss = 7.3951, Reconstruct Loss = 0.0000, Cls Loss = 0.0371, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0478, Reg Loss = 7.1771, Reconstruct Loss = 0.0006, Cls Loss = 0.0465, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0470, Reg Loss = 7.1844, Reconstruct Loss = 0.0007, Cls Loss = 0.0455, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0492, Reg Loss = 7.1876, Reconstruct Loss = 0.0005, Cls Loss = 0.0480, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0487, Reg Loss = 7.2178, Reconstruct Loss = 0.0004, Cls Loss = 0.0475, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0492, Reg Loss = 7.2258, Reconstruct Loss = 0.0005, Cls Loss = 0.0479, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0494, Reg Loss = 7.2464, Reconstruct Loss = 0.0006, Cls Loss = 0.0481, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0499, Reg Loss = 7.2674, Reconstruct Loss = 0.0005, Cl

100%|██████████| 79/79 [00:04<00:00, 17.14it/s]


Epoch [30/50], Validation Loss: 6.6941, Validation Accuracy: 39.92%



Iteration 0: Loss = 0.0475, Reg Loss = 7.7074, Reconstruct Loss = 0.0000, Cls Loss = 0.0467, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0488, Reg Loss = 7.2141, Reconstruct Loss = 0.0004, Cls Loss = 0.0478, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0512, Reg Loss = 7.3088, Reconstruct Loss = 0.0005, Cls Loss = 0.0499, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0532, Reg Loss = 7.1942, Reconstruct Loss = 0.0004, Cls Loss = 0.0520, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0534, Reg Loss = 7.2068, Reconstruct Loss = 0.0008, Cls Loss = 0.0519, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0521, Reg Loss = 7.2485, Reconstruct Loss = 0.0010, Cls Loss = 0.0504, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0505, Reg Loss = 7.2309, Reconstruct Loss = 0.0008, Cls Loss = 0.0489, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0500, Reg Loss = 7.2473, Reconstruct Loss = 0.0007, Cl

100%|██████████| 79/79 [00:04<00:00, 17.88it/s]


Epoch [31/50], Validation Loss: 5.7805, Validation Accuracy: 42.95%



Iteration 0: Loss = 0.0621, Reg Loss = 7.2851, Reconstruct Loss = 0.0000, Cls Loss = 0.0613, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0522, Reg Loss = 7.3991, Reconstruct Loss = 0.0010, Cls Loss = 0.0505, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0502, Reg Loss = 7.5071, Reconstruct Loss = 0.0010, Cls Loss = 0.0484, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0492, Reg Loss = 7.6041, Reconstruct Loss = 0.0007, Cls Loss = 0.0477, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0491, Reg Loss = 7.6415, Reconstruct Loss = 0.0008, Cls Loss = 0.0475, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0491, Reg Loss = 7.5772, Reconstruct Loss = 0.0009, Cls Loss = 0.0474, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0485, Reg Loss = 7.5833, Reconstruct Loss = 0.0007, Cls Loss = 0.0470, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0489, Reg Loss = 7.5694, Reconstruct Loss = 0.0008, Cl

100%|██████████| 79/79 [00:03<00:00, 22.83it/s]


Epoch [32/50], Validation Loss: 4.1922, Validation Accuracy: 50.21%



Iteration 0: Loss = 0.0388, Reg Loss = 7.9511, Reconstruct Loss = 0.0000, Cls Loss = 0.0381, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0458, Reg Loss = 7.4790, Reconstruct Loss = 0.0005, Cls Loss = 0.0445, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0462, Reg Loss = 7.5114, Reconstruct Loss = 0.0008, Cls Loss = 0.0447, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0476, Reg Loss = 7.5713, Reconstruct Loss = 0.0010, Cls Loss = 0.0459, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0466, Reg Loss = 7.5661, Reconstruct Loss = 0.0008, Cls Loss = 0.0450, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0462, Reg Loss = 7.5006, Reconstruct Loss = 0.0007, Cls Loss = 0.0447, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0463, Reg Loss = 7.5195, Reconstruct Loss = 0.0007, Cls Loss = 0.0448, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0464, Reg Loss = 7.5202, Reconstruct Loss = 0.0007, Cl

100%|██████████| 79/79 [00:03<00:00, 22.84it/s]


Epoch [33/50], Validation Loss: 5.9347, Validation Accuracy: 44.77%



Iteration 0: Loss = 0.0437, Reg Loss = 7.7467, Reconstruct Loss = 0.0000, Cls Loss = 0.0429, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0511, Reg Loss = 7.6410, Reconstruct Loss = 0.0006, Cls Loss = 0.0498, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0493, Reg Loss = 7.5559, Reconstruct Loss = 0.0006, Cls Loss = 0.0479, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0473, Reg Loss = 7.5855, Reconstruct Loss = 0.0006, Cls Loss = 0.0460, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0475, Reg Loss = 7.5722, Reconstruct Loss = 0.0005, Cls Loss = 0.0462, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0467, Reg Loss = 7.6166, Reconstruct Loss = 0.0004, Cls Loss = 0.0455, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0462, Reg Loss = 7.6623, Reconstruct Loss = 0.0004, Cls Loss = 0.0450, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0461, Reg Loss = 7.7069, Reconstruct Loss = 0.0005, Cl

100%|██████████| 79/79 [00:03<00:00, 22.70it/s]


Epoch [34/50], Validation Loss: 5.6673, Validation Accuracy: 46.07%



Iteration 0: Loss = 0.0234, Reg Loss = 7.9355, Reconstruct Loss = 0.0000, Cls Loss = 0.0226, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0478, Reg Loss = 7.8790, Reconstruct Loss = 0.0005, Cls Loss = 0.0464, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0468, Reg Loss = 7.7857, Reconstruct Loss = 0.0005, Cls Loss = 0.0456, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0449, Reg Loss = 7.7632, Reconstruct Loss = 0.0006, Cls Loss = 0.0436, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0455, Reg Loss = 7.7864, Reconstruct Loss = 0.0008, Cls Loss = 0.0439, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0457, Reg Loss = 7.7791, Reconstruct Loss = 0.0006, Cls Loss = 0.0442, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0462, Reg Loss = 7.7649, Reconstruct Loss = 0.0006, Cls Loss = 0.0448, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0464, Reg Loss = 7.7554, Reconstruct Loss = 0.0007, Cl

100%|██████████| 79/79 [00:04<00:00, 16.96it/s]


Epoch [35/50], Validation Loss: 5.9480, Validation Accuracy: 44.31%



Iteration 0: Loss = 0.0350, Reg Loss = 7.4604, Reconstruct Loss = 0.0000, Cls Loss = 0.0343, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0436, Reg Loss = 7.7459, Reconstruct Loss = 0.0007, Cls Loss = 0.0422, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0457, Reg Loss = 7.7965, Reconstruct Loss = 0.0005, Cls Loss = 0.0444, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0455, Reg Loss = 7.8540, Reconstruct Loss = 0.0008, Cls Loss = 0.0439, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0457, Reg Loss = 7.8423, Reconstruct Loss = 0.0006, Cls Loss = 0.0443, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0456, Reg Loss = 7.8672, Reconstruct Loss = 0.0006, Cls Loss = 0.0442, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0458, Reg Loss = 7.8454, Reconstruct Loss = 0.0006, Cls Loss = 0.0444, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0449, Reg Loss = 7.8058, Reconstruct Loss = 0.0006, Cl

100%|██████████| 79/79 [00:04<00:00, 17.37it/s]


Epoch [36/50], Validation Loss: 8.6143, Validation Accuracy: 34.12%



Iteration 0: Loss = 0.0225, Reg Loss = 7.4036, Reconstruct Loss = 0.0000, Cls Loss = 0.0218, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0486, Reg Loss = 7.6034, Reconstruct Loss = 0.0007, Cls Loss = 0.0471, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0472, Reg Loss = 7.7416, Reconstruct Loss = 0.0007, Cls Loss = 0.0457, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0464, Reg Loss = 7.7715, Reconstruct Loss = 0.0009, Cls Loss = 0.0447, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0463, Reg Loss = 7.7932, Reconstruct Loss = 0.0008, Cls Loss = 0.0447, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0461, Reg Loss = 7.7746, Reconstruct Loss = 0.0007, Cls Loss = 0.0446, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0458, Reg Loss = 7.7619, Reconstruct Loss = 0.0008, Cls Loss = 0.0442, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0458, Reg Loss = 7.7301, Reconstruct Loss = 0.0007, Cl

100%|██████████| 79/79 [00:04<00:00, 16.69it/s]


Epoch [37/50], Validation Loss: 6.7470, Validation Accuracy: 38.68%



Iteration 0: Loss = 0.0642, Reg Loss = 7.3773, Reconstruct Loss = 0.0000, Cls Loss = 0.0634, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0470, Reg Loss = 7.3272, Reconstruct Loss = 0.0006, Cls Loss = 0.0457, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0460, Reg Loss = 7.5500, Reconstruct Loss = 0.0003, Cls Loss = 0.0449, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0459, Reg Loss = 7.6204, Reconstruct Loss = 0.0005, Cls Loss = 0.0446, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0447, Reg Loss = 7.6227, Reconstruct Loss = 0.0005, Cls Loss = 0.0434, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0440, Reg Loss = 7.6346, Reconstruct Loss = 0.0005, Cls Loss = 0.0428, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0430, Reg Loss = 7.6179, Reconstruct Loss = 0.0005, Cls Loss = 0.0418, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0436, Reg Loss = 7.6242, Reconstruct Loss = 0.0004, Cl

100%|██████████| 79/79 [00:04<00:00, 17.22it/s]


Epoch [38/50], Validation Loss: 6.0403, Validation Accuracy: 42.92%



Iteration 0: Loss = 0.0427, Reg Loss = 7.7085, Reconstruct Loss = 0.0000, Cls Loss = 0.0419, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0472, Reg Loss = 7.5103, Reconstruct Loss = 0.0006, Cls Loss = 0.0458, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0464, Reg Loss = 7.6293, Reconstruct Loss = 0.0007, Cls Loss = 0.0449, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0460, Reg Loss = 7.6796, Reconstruct Loss = 0.0008, Cls Loss = 0.0445, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0449, Reg Loss = 7.6434, Reconstruct Loss = 0.0006, Cls Loss = 0.0435, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0441, Reg Loss = 7.5771, Reconstruct Loss = 0.0005, Cls Loss = 0.0429, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0443, Reg Loss = 7.5747, Reconstruct Loss = 0.0005, Cls Loss = 0.0430, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0439, Reg Loss = 7.5828, Reconstruct Loss = 0.0005, Cl

100%|██████████| 79/79 [00:04<00:00, 17.03it/s]


Epoch [39/50], Validation Loss: 6.4368, Validation Accuracy: 43.88%



Iteration 0: Loss = 0.0491, Reg Loss = 7.6194, Reconstruct Loss = 0.0000, Cls Loss = 0.0483, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0426, Reg Loss = 7.9515, Reconstruct Loss = 0.0003, Cls Loss = 0.0415, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0419, Reg Loss = 8.0968, Reconstruct Loss = 0.0003, Cls Loss = 0.0407, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0423, Reg Loss = 8.1469, Reconstruct Loss = 0.0006, Cls Loss = 0.0409, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0421, Reg Loss = 8.0460, Reconstruct Loss = 0.0004, Cls Loss = 0.0408, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0421, Reg Loss = 8.0046, Reconstruct Loss = 0.0003, Cls Loss = 0.0409, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0416, Reg Loss = 8.0021, Reconstruct Loss = 0.0004, Cls Loss = 0.0404, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0414, Reg Loss = 7.9928, Reconstruct Loss = 0.0005, Cl

100%|██████████| 79/79 [00:04<00:00, 18.91it/s]


Epoch [40/50], Validation Loss: 5.3065, Validation Accuracy: 47.94%



Iteration 0: Loss = 0.0508, Reg Loss = 7.8022, Reconstruct Loss = 0.0000, Cls Loss = 0.0500, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0405, Reg Loss = 7.7742, Reconstruct Loss = 0.0000, Cls Loss = 0.0397, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0435, Reg Loss = 7.8904, Reconstruct Loss = 0.0004, Cls Loss = 0.0423, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0425, Reg Loss = 7.8949, Reconstruct Loss = 0.0004, Cls Loss = 0.0413, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0428, Reg Loss = 7.9387, Reconstruct Loss = 0.0005, Cls Loss = 0.0415, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0418, Reg Loss = 7.9003, Reconstruct Loss = 0.0005, Cls Loss = 0.0405, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0427, Reg Loss = 7.9039, Reconstruct Loss = 0.0008, Cls Loss = 0.0411, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0424, Reg Loss = 7.9123, Reconstruct Loss = 0.0008, Cl

100%|██████████| 79/79 [00:03<00:00, 22.97it/s]


Epoch [41/50], Validation Loss: 4.3653, Validation Accuracy: 51.45%



Iteration 0: Loss = 0.0294, Reg Loss = 7.8069, Reconstruct Loss = 0.0000, Cls Loss = 0.0286, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0393, Reg Loss = 8.0168, Reconstruct Loss = 0.0000, Cls Loss = 0.0385, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0427, Reg Loss = 8.0152, Reconstruct Loss = 0.0003, Cls Loss = 0.0416, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0427, Reg Loss = 8.0597, Reconstruct Loss = 0.0004, Cls Loss = 0.0415, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0424, Reg Loss = 8.0435, Reconstruct Loss = 0.0005, Cls Loss = 0.0411, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0425, Reg Loss = 8.0384, Reconstruct Loss = 0.0006, Cls Loss = 0.0411, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0430, Reg Loss = 8.0379, Reconstruct Loss = 0.0006, Cls Loss = 0.0416, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0434, Reg Loss = 8.0575, Reconstruct Loss = 0.0006, Cl

100%|██████████| 79/79 [00:04<00:00, 17.62it/s]


Epoch [42/50], Validation Loss: 4.6565, Validation Accuracy: 48.86%



Iteration 0: Loss = 0.0181, Reg Loss = 7.9136, Reconstruct Loss = 0.0000, Cls Loss = 0.0173, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0418, Reg Loss = 7.8727, Reconstruct Loss = 0.0008, Cls Loss = 0.0402, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0407, Reg Loss = 7.9984, Reconstruct Loss = 0.0007, Cls Loss = 0.0392, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0417, Reg Loss = 8.0094, Reconstruct Loss = 0.0009, Cls Loss = 0.0400, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0422, Reg Loss = 7.9901, Reconstruct Loss = 0.0008, Cls Loss = 0.0405, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0423, Reg Loss = 7.9661, Reconstruct Loss = 0.0008, Cls Loss = 0.0408, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0415, Reg Loss = 7.9609, Reconstruct Loss = 0.0008, Cls Loss = 0.0400, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0421, Reg Loss = 7.9357, Reconstruct Loss = 0.0007, Cl

100%|██████████| 79/79 [00:03<00:00, 22.95it/s]


Epoch [43/50], Validation Loss: 3.9978, Validation Accuracy: 54.52%



Iteration 0: Loss = 0.0338, Reg Loss = 7.4746, Reconstruct Loss = 0.0000, Cls Loss = 0.0331, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0431, Reg Loss = 7.9119, Reconstruct Loss = 0.0008, Cls Loss = 0.0415, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0420, Reg Loss = 7.7041, Reconstruct Loss = 0.0007, Cls Loss = 0.0405, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0402, Reg Loss = 7.6830, Reconstruct Loss = 0.0006, Cls Loss = 0.0389, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0402, Reg Loss = 7.7129, Reconstruct Loss = 0.0005, Cls Loss = 0.0389, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0407, Reg Loss = 7.7395, Reconstruct Loss = 0.0005, Cls Loss = 0.0394, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0410, Reg Loss = 7.7204, Reconstruct Loss = 0.0005, Cls Loss = 0.0397, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0411, Reg Loss = 7.7729, Reconstruct Loss = 0.0005, Cl

100%|██████████| 79/79 [00:03<00:00, 22.79it/s]


Epoch [44/50], Validation Loss: 3.5129, Validation Accuracy: 55.65%



Iteration 0: Loss = 0.0383, Reg Loss = 7.6474, Reconstruct Loss = 0.0000, Cls Loss = 0.0376, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0409, Reg Loss = 7.9307, Reconstruct Loss = 0.0011, Cls Loss = 0.0390, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0415, Reg Loss = 8.1383, Reconstruct Loss = 0.0011, Cls Loss = 0.0395, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0398, Reg Loss = 8.1422, Reconstruct Loss = 0.0009, Cls Loss = 0.0381, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0405, Reg Loss = 8.1783, Reconstruct Loss = 0.0009, Cls Loss = 0.0387, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0405, Reg Loss = 8.1534, Reconstruct Loss = 0.0008, Cls Loss = 0.0388, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0408, Reg Loss = 8.1257, Reconstruct Loss = 0.0007, Cls Loss = 0.0393, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0408, Reg Loss = 8.1298, Reconstruct Loss = 0.0007, Cl

100%|██████████| 79/79 [00:03<00:00, 23.04it/s]


Epoch [45/50], Validation Loss: 6.2450, Validation Accuracy: 43.43%



Iteration 0: Loss = 0.0344, Reg Loss = 7.6126, Reconstruct Loss = 0.0000, Cls Loss = 0.0336, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0397, Reg Loss = 8.0189, Reconstruct Loss = 0.0023, Cls Loss = 0.0366, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0416, Reg Loss = 7.9686, Reconstruct Loss = 0.0017, Cls Loss = 0.0391, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0424, Reg Loss = 7.9073, Reconstruct Loss = 0.0012, Cls Loss = 0.0405, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0433, Reg Loss = 8.0090, Reconstruct Loss = 0.0010, Cls Loss = 0.0415, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0433, Reg Loss = 8.0652, Reconstruct Loss = 0.0011, Cls Loss = 0.0414, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0428, Reg Loss = 8.0427, Reconstruct Loss = 0.0009, Cls Loss = 0.0410, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0419, Reg Loss = 8.0155, Reconstruct Loss = 0.0008, Cl

100%|██████████| 79/79 [00:04<00:00, 17.22it/s]


Epoch [46/50], Validation Loss: 5.8260, Validation Accuracy: 43.32%



Iteration 0: Loss = 0.0699, Reg Loss = 8.2176, Reconstruct Loss = 0.0000, Cls Loss = 0.0691, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0420, Reg Loss = 8.3767, Reconstruct Loss = 0.0010, Cls Loss = 0.0402, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0423, Reg Loss = 8.3212, Reconstruct Loss = 0.0011, Cls Loss = 0.0403, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0423, Reg Loss = 8.3082, Reconstruct Loss = 0.0011, Cls Loss = 0.0404, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0423, Reg Loss = 8.2245, Reconstruct Loss = 0.0010, Cls Loss = 0.0405, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0425, Reg Loss = 8.2561, Reconstruct Loss = 0.0010, Cls Loss = 0.0407, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0429, Reg Loss = 8.2210, Reconstruct Loss = 0.0009, Cls Loss = 0.0412, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0422, Reg Loss = 8.2119, Reconstruct Loss = 0.0009, Cl

100%|██████████| 79/79 [00:04<00:00, 16.87it/s]


Epoch [47/50], Validation Loss: 5.7284, Validation Accuracy: 44.72%



Iteration 0: Loss = 0.0682, Reg Loss = 7.6684, Reconstruct Loss = 0.0000, Cls Loss = 0.0674, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0425, Reg Loss = 8.1597, Reconstruct Loss = 0.0008, Cls Loss = 0.0409, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0410, Reg Loss = 8.0810, Reconstruct Loss = 0.0007, Cls Loss = 0.0395, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0401, Reg Loss = 8.1067, Reconstruct Loss = 0.0005, Cls Loss = 0.0388, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0411, Reg Loss = 8.0993, Reconstruct Loss = 0.0004, Cls Loss = 0.0399, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0410, Reg Loss = 8.1278, Reconstruct Loss = 0.0005, Cls Loss = 0.0398, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0403, Reg Loss = 8.1506, Reconstruct Loss = 0.0005, Cls Loss = 0.0390, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0396, Reg Loss = 8.1650, Reconstruct Loss = 0.0005, Cl

100%|██████████| 79/79 [00:03<00:00, 22.36it/s]


Epoch [48/50], Validation Loss: 9.3770, Validation Accuracy: 31.07%



Iteration 0: Loss = 0.0411, Reg Loss = 7.6816, Reconstruct Loss = 0.0000, Cls Loss = 0.0403, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0382, Reg Loss = 8.1985, Reconstruct Loss = 0.0011, Cls Loss = 0.0363, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0391, Reg Loss = 8.2333, Reconstruct Loss = 0.0008, Cls Loss = 0.0374, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0397, Reg Loss = 8.2028, Reconstruct Loss = 0.0008, Cls Loss = 0.0381, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0392, Reg Loss = 8.2104, Reconstruct Loss = 0.0010, Cls Loss = 0.0374, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0401, Reg Loss = 8.2691, Reconstruct Loss = 0.0009, Cls Loss = 0.0384, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0405, Reg Loss = 8.2497, Reconstruct Loss = 0.0007, Cls Loss = 0.0389, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0403, Reg Loss = 8.2799, Reconstruct Loss = 0.0006, Cl

100%|██████████| 79/79 [00:04<00:00, 17.17it/s]


Epoch [49/50], Validation Loss: 9.0800, Validation Accuracy: 34.58%



Iteration 0: Loss = 0.0308, Reg Loss = 8.7396, Reconstruct Loss = 0.0000, Cls Loss = 0.0299, Learning rate = 1.0000e-03
Iteration 50: Loss = 0.0427, Reg Loss = 8.3587, Reconstruct Loss = 0.0008, Cls Loss = 0.0411, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0409, Reg Loss = 8.3380, Reconstruct Loss = 0.0005, Cls Loss = 0.0395, Learning rate = 1.0000e-03
Iteration 150: Loss = 0.0408, Reg Loss = 8.3217, Reconstruct Loss = 0.0005, Cls Loss = 0.0394, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0403, Reg Loss = 8.3797, Reconstruct Loss = 0.0007, Cls Loss = 0.0388, Learning rate = 1.0000e-03
Iteration 250: Loss = 0.0401, Reg Loss = 8.3813, Reconstruct Loss = 0.0006, Cls Loss = 0.0387, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0405, Reg Loss = 8.4161, Reconstruct Loss = 0.0006, Cls Loss = 0.0391, Learning rate = 1.0000e-03
Iteration 350: Loss = 0.0403, Reg Loss = 8.4126, Reconstruct Loss = 0.0005, Cl

100%|██████████| 79/79 [00:03<00:00, 22.12it/s]

Epoch [50/50], Validation Loss: 7.5291, Validation Accuracy: 37.71%








In [50]:
wandb.finish()

0,1
Cls Loss,█▇▆▆▆▄▅▄▄▄▃▃▃▃▃▂▂▂▂▁▂▂▂▂▂▂▂▂▂▂▁▁▂▁▁▁▁▁▁▁
Learning rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss,██▇▆▇▆▅▅▅▃▄▄▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▂▂▂▂▂▂▂▂▂
Reconstruct Loss,██▁▄▅▁▄▄▁▄▃▃▃▄▃▃▃▃▃▂▂▅▄▃▃▂▂▂▂▂▃▂▂▃▂▂▁▂▂▂
Reg Loss,▃▂▂▂▂▁▁▂▂▂▂▂▃▂▃▃▃▃▄▄▆▅▅▅▅▆▆▅▆▆▇▇▇▇█▇▇█▇█
Training accuracy,▁▂▂▂▂▃▄▄▅▅▅▆▆▇▇▇▇▇▇▇▇▇▇▇████████████████
Validation Accuracy,▄▇▇▇█▆▇▆▆▆▆▅▅▅▆▅▃▄▄▄▄▂▄▂▃▃▄▃▂▂▃▄▄▄▅▃▃▃▁▂
Validation Loss,▃▁▁▁▁▁▁▁▂▂▂▂▃▃▃▃▃▅▄▄▄▆▄▆▅▅▅▅▇▆▆▅▄▄▄▅▅▅█▆

0,1
Cls Loss,0.03889
Learning rate,0.001
Loss,0.04028
Reconstruct Loss,0.00055
Reg Loss,8.41263
Training accuracy,0.98826
Validation Accuracy,0.3771
Validation Loss,7.5291


### 7 Testing loop

In [51]:
saved_hypernet_path = args.training.save_model_path + '/cifar10_nerf_best.pth'

In [52]:
saved_hypernet_path

'toy/experiments_densenet/dense_17th_experiment/cifar10_nerf_best.pth'

In [53]:
hyper_model_test = get_hypernetwork(args, number_param)

Hyper model type: resmlp
Using scalar 0.1
num_freqs:  16 <class 'int'>


In [54]:
checkpoint = torch.load(saved_hypernet_path, map_location="cpu")  # or "cuda" if using GPU
hyper_model_test.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [55]:
for hidden_dim in range(12, 49):
    # Create a model for this given dimension
    model_trained = create_model(args.model.type,
                                 layers=args.model.layers,
                                 growth=args.model.growth,
                                 compression=args.model.compression,
                                 bottleneck=args.model.bottleneck,
                                 drop_rate=args.model.drop_rate,
                                 path=args.model.pretrained_path,
                                 hidden_dim=hidden_dim).to(device)
    
    # If EMA is specified, apply it
    if ema:
        print('Applying EMA')
        ema.apply()

    # Sample the merged model
    accumulated_model = sample_merge_model(hyper_model_test, model_trained, args, K=100)

    # Validate the merged model
    val_loss, acc = validate_single(accumulated_model, val_loader, val_criterion, args=args)

    # If EMA is specified, restore the original weights after applying EMA
    if ema:
        ema.restore()  # Restore the original weights after applying 
        
    # Save the model
    save_name = os.path.join(args.training.save_model_path, f"cifar10_{accumulated_model.__class__.__name__}_dim{hidden_dim}_single.pth")
    torch.save(accumulated_model.state_dict(),save_name)

    # Print the results
    print(f"Test using model {args.model}: hidden_dim {hidden_dim}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc*100:.2f}%")
    print('\n')

    # Define the directory and filename structure
    filename = f"cifar10_results_{args.experiment.name}.txt"
    filepath = os.path.join(args.training.save_model_path, filename)

    # Write the results. 'a' is used to append the results; a new file will be created if it doesn't exist.
    with open(filepath, "a") as file:
        file.write(f"Hidden_dim: {hidden_dim}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc*100:.2f}%\n")

Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.23it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 12, Validation Loss: 1.3131, Validation Accuracy: 68.57%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.08it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 13, Validation Loss: 2.5721, Validation Accuracy: 59.37%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.82it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 14, Validation Loss: 1.3499, Validation Accuracy: 69.97%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.16it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 15, Validation Loss: 1.3691, Validation Accuracy: 67.97%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.96it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 16, Validation Loss: 4.3721, Validation Accuracy: 55.56%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.68it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 17, Validation Loss: 0.9360, Validation Accuracy: 71.39%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.29it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 18, Validation Loss: 1.0781, Validation Accuracy: 67.18%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.10it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 19, Validation Loss: 1.1088, Validation Accuracy: 70.17%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.63it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 20, Validation Loss: 1.2863, Validation Accuracy: 69.93%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.67it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 21, Validation Loss: 1.0270, Validation Accuracy: 69.95%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.60it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 22, Validation Loss: 0.9843, Validation Accuracy: 70.65%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.88it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 23, Validation Loss: 0.9930, Validation Accuracy: 72.53%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.03it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 24, Validation Loss: 1.3377, Validation Accuracy: 68.62%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 24.01it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 25, Validation Loss: 1.4556, Validation Accuracy: 68.06%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.83it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 26, Validation Loss: 1.1756, Validation Accuracy: 68.80%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.95it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 27, Validation Loss: 1.2027, Validation Accuracy: 69.15%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.92it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 28, Validation Loss: 1.0870, Validation Accuracy: 69.73%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.28it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 29, Validation Loss: 1.5608, Validation Accuracy: 66.78%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:04<00:00, 19.47it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 30, Validation Loss: 1.6679, Validation Accuracy: 65.88%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 21.85it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 31, Validation Loss: 1.7611, Validation Accuracy: 64.57%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.28it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 32, Validation Loss: 2.6346, Validation Accuracy: 60.45%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.64it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 33, Validation Loss: 1.7065, Validation Accuracy: 65.55%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.64it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 34, Validation Loss: 1.2679, Validation Accuracy: 68.31%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.58it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 35, Validation Loss: 1.6181, Validation Accuracy: 65.92%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 21.98it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 36, Validation Loss: 1.3413, Validation Accuracy: 68.30%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.09it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 37, Validation Loss: 1.5810, Validation Accuracy: 67.13%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.54it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 38, Validation Loss: 1.6187, Validation Accuracy: 68.18%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.15it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 39, Validation Loss: 2.2894, Validation Accuracy: 62.23%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.39it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 40, Validation Loss: 2.0519, Validation Accuracy: 64.05%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.11it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 41, Validation Loss: 1.9936, Validation Accuracy: 63.70%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.92it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 42, Validation Loss: 2.0921, Validation Accuracy: 63.19%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.79it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 43, Validation Loss: 2.1297, Validation Accuracy: 62.81%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.08it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 44, Validation Loss: 1.3356, Validation Accuracy: 68.62%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.06it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 45, Validation Loss: 1.0941, Validation Accuracy: 70.00%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.24it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 46, Validation Loss: 1.2603, Validation Accuracy: 68.88%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.20it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 47, Validation Loss: 1.7457, Validation Accuracy: 65.51%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.84it/s]

Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 48, Validation Loss: 1.0535, Validation Accuracy: 71.76%





