## Import

In [1]:
import os
import random

In [2]:
import torch
import torch.nn as nn

import wandb

from sklearn.metrics import accuracy_score

In [3]:
from neumeta.models import create_densenet_model as create_model
from neumeta.utils import (
    parse_args, print_omegaconf,
    load_checkpoint, save_checkpoint,
    set_seed,
    get_dataset,
    sample_coordinates, sample_subset, shuffle_coordinates_all,
    get_hypernetwork, get_optimizer,
    sample_weights,
    weighted_regression_loss, validate_single, AverageMeter, EMA,
    sample_merge_model
)

## Functions

### Find max dimension of the model

In [4]:
def find_max_dim(model_cls):
    """Find maximum dimension of the model"""
    # Get the learnable parameters of the model
    checkpoint = model_cls.learnable_parameter 

    # Set the maximum value to the length of the checkpoint
    max_value = len(checkpoint)

    # Iterate over the new model's weight
    for i, (k, tensor) in enumerate(checkpoint.items()):
        # Handle 2D tensors (e.g., weight matrices) 
        if len(tensor.shape) == 4:
            coords = [tensor.shape[0], tensor.shape[1]]
            max_value = max(max_value, max(coords))
        # Handle 1D tensors (e.g., biases)
        elif len(tensor.shape) == 1:
            max_value = max(max_value, tensor.shape[0])
    
    return max_value

### Initialize wandb

In [5]:
def initialize_wandb(config):
    import time
    """
    Initializes Weights and Biases (wandb) with the given configuration.
    
    Args:
        configuration (dict): Configuration parameters for the run.
    """
    # Name the run using current time and configuration name
    run_name = f"{time.strftime('%Y%m%d%H%M%S')}-{config.experiment.name}"
    
    wandb.init(project="dense-inr-trial", name=run_name, config=dict(config), group='cifar10')

### Init model dictionary

In [6]:
def init_model_dict(args, device):
    """
    Initializes a dictionary of models for each dimension in the given range, along with ground truth models for the starting dimension.

    Args:
        args: An object containing the arguments for initializing the models.

    Returns:
        dim_dict: A dictionary containing the models for each dimension, along with their corresponding coordinates, keys, indices, size, and ground truth models.
        gt_model_dict: A dictionary containing the ground truth models for the starting dimension.
    """
    dim_dict = {}
    gt_model_dict = {}
    
    # Create a model for each dimension in dimensions range
    for dim in args.dimensions.range:
        model_cls = create_model(args.model.type,
                                 layers=args.model.layers,
                                 growth=args.model.growth,
                                 compression=args.model.compression,
                                 bottleneck=args.model.bottleneck,
                                 drop_rate=args.model.drop_rate,
                                 hidden_dim=dim,
                                 path=args.model.pretrained_path).to(device)
        # Sample the coordinates, keys, indices, and the size for the model
        coords_tensor, keys_list, indices_list, size_list = sample_coordinates(model_cls)
        # Add the model, coordinates, keys, indices, size, and key mask to the dictionary
        dim_dict[f"{dim}"] = (model_cls, coords_tensor, keys_list, indices_list, size_list, None)

        # Print to makes line better
        print('\n')
        
        # If the dimension is the starting dimension (the dimension of pretrained_model), add the ground truth model to the dictionary
        if dim == args.dimensions.start:
            print(f"Loading model for dim {dim}")
            model_trained = create_model(args.model.type,
                                         layers=args.model.layers,
                                         growth=args.model.growth,
                                         compression=args.model.compression,
                                         bottleneck=args.model.bottleneck,
                                         drop_rate=args.model.drop_rate,
                                         path=args.model.pretrained_path,
                                         smooth=True,
                                         hidden_dim=dim).to(device)
            model_trained.eval()
            gt_model_dict[f'{dim}'] = model_trained

    
    return dim_dict, gt_model_dict

### Training function

In [7]:
# Function to train the model for one epoch
def train_one_epoch(model, train_loader, optimizer, criterion, dim_dict, gt_model_dict, epoch_idx, ema=None, args=None, device='cpu'):
    # Set the model to training mode
    model.train()
    total_loss = 0.0

    # Initialize AverageMeter objects to track the losses
    losses = AverageMeter()
    cls_losses = AverageMeter()
    reg_losses = AverageMeter()
    reconstruct_losses = AverageMeter()

    # Training accuracy
    preds = []
    gt = []

    # Iterate over the training data
    for batch_idx, (x, target) in enumerate(train_loader):
        # Zero the gradients
        optimizer.zero_grad()

        # Preprocess input
        # ------------------------------------------------------------------------------------------------------
        # Move the data to the device
        x, target = x.to(device), target.to(device)
        # Choose a random hidden dimension
        hidden_dim = random.choice(args.dimensions.range)
        # Get the model class, coordinates, keys, indices, size, and key mask for the chosen dimension
        model_cls, coords_tensor, keys_list, indices_list, size_list, key_mask = dim_dict[f"{hidden_dim}"]
        # Sample a subset the input tensor of the coordinates, keys, indices, size, and selected keys
        coords_tensor, keys_list, indices_list, size_list, selected_keys = sample_subset(coords_tensor,
                                                                                         keys_list,
                                                                                         indices_list,
                                                                                         size_list,
                                                                                         key_mask,
                                                                                         ratio=args.ratio)
        # Add noise to the coordinates if specified
        if args.training.coordinate_noise > 0.0:
            coords_tensor = coords_tensor + (torch.rand_like(coords_tensor) - 0.5) * args.training.coordinate_noise


        # Main task of hypernetwork and target network
        # ------------------------------------------------------------------------------------------------------
        # Sample the weights for the target model using hypernetwork
        model_cls, reconstructed_weights = sample_weights(model, model_cls,
                                                          coords_tensor, keys_list, indices_list, size_list, key_mask, selected_keys,
                                                          device=device, NORM=args.dimensions.norm)
        # Forward pass
        predict = model_cls(x)
        
        # Sample test model to see training accuracy

        pred = torch.argmax(predict, dim=-1)

        preds.append(pred)
        gt.append(target)

        # Compute losses
        # ------------------------------------------------------------------------------------------------------
        # Compute classification loss
        cls_loss = criterion(predict, target) 
        # Compute regularization loss
        reg_loss = sum([torch.norm(w, p=2) for w in reconstructed_weights])
        # Compute reconstruction loss if ground truth model is available
        if f"{hidden_dim}" in gt_model_dict:
            gt_model = gt_model_dict[f"{hidden_dim}"]
            gt_selected_weights = [
                w for k, w in gt_model.learnable_parameter.items() if k in selected_keys]

            reconstruct_loss = weighted_regression_loss(
                reconstructed_weights, gt_selected_weights)
        else:
            reconstruct_loss = torch.tensor(0.0)
        # Compute the total loss
        loss = args.hyper_model.loss_weight.ce_weight * cls_loss + args.hyper_model.loss_weight.reg_weight * \
            reg_loss + args.hyper_model.loss_weight.recon_weight * reconstruct_loss


        # Compute gradients and update weights
        # ------------------------------------------------------------------------------------------------------
        # Zero the gradients of the updated weights
        for updated_weight in model_cls.parameters():
            updated_weight.grad = None

        # Compute the gradients of the reconstructed weights
        loss.backward(retain_graph=True)
        torch.autograd.backward(reconstructed_weights, [
                                w.grad for k, w in model_cls.named_parameters() if k in selected_keys])
        
        # Clip the gradients if specified
        if args.training.get('clip_grad', 0.0) > 0:
            torch.nn.utils.clip_grad_value_(
                model.parameters(), args.training.clip_grad)
            
        # Update the weights
        optimizer.step()

        # Update the EMA if specified
        if ema:
            ema.update()  # Update the EMA after each training step
        total_loss += loss.item()

        # Update the AverageMeter objects
        losses.update(loss.item())
        cls_losses.update(cls_loss.item())
        reg_losses.update(reg_loss.item())
        reconstruct_losses.update(reconstruct_loss.item())

        # Log (or plot) losses
        # ------------------------------------------------------------------------------------------------------
        # Log the losses and learning rate to wandb
        if batch_idx % args.experiment.log_interval == 0:
            wandb.log({
                "Loss": losses.avg,
                "Cls Loss": cls_losses.avg,
                "Reg Loss": reg_losses.avg,
                "Reconstruct Loss": reconstruct_losses.avg,
                "Learning rate": optimizer.param_groups[0]['lr']
            }, step=batch_idx + epoch_idx * len(train_loader))
            # Print the losses and learning rate
            print(
                f"Iteration {batch_idx}: Loss = {losses.avg:.4f}, Reg Loss = {reg_losses.avg:.4f}, Reconstruct Loss = {reconstruct_losses.avg:.4f}, Cls Loss = {cls_losses.avg:.4f}, Learning rate = {optimizer.param_groups[0]['lr']:.4e}")
    
    train_acc = accuracy_score(torch.cat(gt).cpu().numpy(), torch.cat(preds).cpu().numpy())

    wandb.log({
        "Training accuracy": train_acc
    })

    # Returns the training loss, structure of network in each dimension, and the original structure of pretrained network
    return losses.avg, dim_dict, gt_model_dict, train_acc

## Main

### 0 Set device to GPU

In [8]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

### 1 Parsing arguments for input

In [9]:
CONFIG_PATH = 'neumeta/config/densenet_inr_train/dense_23th_experiment.yaml'
RATIO = '1.0'
CHECKPOINT_PATH = 'toy/experiments_densenet/dense_23th_experiment/cifar10_nerf_best.pth'

In [10]:
argv_train = ['--config', CONFIG_PATH, '--ratio', RATIO, '--resume_from', CHECKPOINT_PATH]

In [11]:
args = parse_args(argv_train)  # Parse arguments
print_omegaconf(args)  # Print arguments

+--------------------------------------+------------------------------------------------------------------------------------------------------+
|                 Key                  |                                                Value                                                 |
+--------------------------------------+------------------------------------------------------------------------------------------------------+
|           experiment.name            |                                        dense_23th_experiment                                         |
|        experiment.num_epochs         |                                                 200                                                  |
|       experiment.log_interval        |                                                 100                                                  |
|       experiment.eval_interval       |                                                  1                                             

In [12]:
set_seed(args.experiment.seed)

Setting seed... 42 for reproducibility


### 2 Get training and validation dataloader

In [13]:
train_loader, val_loader = get_dataset('cifar10', args.training.batch_size, strong_transform=args.training.get('strong_aug', None))

Using dataset: cifar10 with batch size: 128 and strong transform: None


### 3 Create target model

#### 3.0 Create the model

In [14]:
model = create_model(args.model.type,
                     layers=args.model.layers,
                     growth=args.model.growth,
                     compression=args.model.compression,
                     bottleneck=args.model.bottleneck,
                     drop_rate=args.model.drop_rate,
                     hidden_dim=args.dimensions.start,
                     path=args.model.pretrained_path).to(device)

Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


#### 3.1 Print the structure and shape of the model

In [15]:
model

DenseNet3(
  (conv1): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (block1): DenseBlock(
    (layer): Sequential(
      (0): BottleneckBlock(
        (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv1): Conv2d(24, 48, kernel_size=(1, 1), stride=(1, 1))
        (bn2): Identity()
        (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (dropout): Dropout(p=0.0, inplace=False)
      )
      (1): BottleneckBlock(
        (bn1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv1): Conv2d(36, 48, kernel_size=(1, 1), stride=(1, 1))
        (bn2): Identity()
        (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (dropout): Dropout(p=0.0, inplace=False)
      )
      (2): BottleneckBlock(
        (bn1): Bat

In [16]:
for i, (k, tensor) in enumerate(model.learnable_parameter.items()):
    print(k, tensor.shape)

block3.layer.5.conv1.weight torch.Size([48, 120, 1, 1])
block3.layer.5.conv1.bias torch.Size([48])
block3.layer.5.conv2.weight torch.Size([12, 48, 3, 3])


In [17]:
# Print the maximum dimension of the model
print(f'Maximum DIM: {find_max_dim(model)}')

Maximum DIM: 120


#### 3.2 Validate the accuracy of pretrained model

In [18]:
# Validate the model for the starting dimension (its pretrained form)
val_loss, acc = validate_single(model, val_loader, nn.CrossEntropyLoss(), args=args)
print(f'Initial Permutated model Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc * 100:.2f}%')

100%|██████████| 79/79 [00:04<00:00, 19.26it/s]

Initial Permutated model Validation Loss: 0.3248, Validation Accuracy: 91.92%





In [19]:
checkpoint = model.learnable_parameter
number_param = len(checkpoint)

In [20]:
# Print the keys of the parameters and the number of parameters
print(f"Parameters keys: {model.keys}")
print(f"Number of parameters to be learned: {number_param}")

Parameters keys: ['block3.layer.5.conv1.weight', 'block3.layer.5.conv1.bias', 'block3.layer.5.conv2.weight']
Number of parameters to be learned: 3


### 4 Create hypernetwork

#### 4.0 Create the model

In [21]:
# Get the hypermodel
hyper_model = get_hypernetwork(args, number_param)

Hyper model type: resmlp
Using scalar 0.1
num_freqs:  16 <class 'int'>


#### 4.1 Print model structure

In [22]:
hyper_model

NeRF_ResMLP_Compose(
  (positional_encoding): PositionalEncoding()
  (model): ModuleList(
    (0-2): 3 x NeRF_MLP_Residual_Scaled(
      (initial_layer): Linear(in_features=198, out_features=256, bias=True)
      (residual_blocks): ModuleList(
        (0-3): 4 x Linear(in_features=256, out_features=256, bias=True)
      )
      (scalars): ParameterList(
          (0): Parameter containing: [torch.float32 of size  (cuda:0)]
          (1): Parameter containing: [torch.float32 of size  (cuda:0)]
          (2): Parameter containing: [torch.float32 of size  (cuda:0)]
          (3): Parameter containing: [torch.float32 of size  (cuda:0)]
      )
      (act): ReLU(inplace=True)
      (output_layer): Linear(in_features=256, out_features=9, bias=True)
    )
  )
)

#### 4.2 Initialize EMA to track only a smooth version of the model weight

In [23]:
# Initialize the EMA
ema = EMA(hyper_model, decay=args.hyper_model.ema_decay)

### 5 Get loss function, optimizer and scheduler

In [24]:
criterion, val_criterion, optimizer, scheduler = get_optimizer(args, hyper_model)

In [25]:
print(f'Criterion: {criterion}\nVal_criterion: {val_criterion}\nOptimizer: {optimizer}\nScheduler: {scheduler}')

Criterion: CrossEntropyLoss()
Val_criterion: CrossEntropyLoss()
Optimizer: AdamW (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    initial_lr: 0.001
    lr: 0.001
    maximize: False
    weight_decay: 0.01
)
Scheduler: <torch.optim.lr_scheduler.MultiStepLR object at 0x0000025A8E8E3C10>


### 6 Training loop

#### 6.1 Initialize training parameters

In [25]:
# Initialize the starting epoch and best accuracy
start_epoch = 0
best_acc = 0.0

#### 6.2 Directory to save the model

In [26]:
# Create the directory to save the model
os.makedirs(args.training.save_model_path, exist_ok=True)

#### 6.3 Resume training loop

In [27]:
args.resume_from

'toy/experiments_densenet/dense_23th_experiment/cifar10_nerf_best.pth'

In [28]:
# args.resume_from = False

In [29]:
if args.resume_from:
        print(f"Resuming from checkpoint: {args.resume_from}")
        checkpoint_info = load_checkpoint(args.resume_from, hyper_model, optimizer, ema)
        start_epoch = checkpoint_info['epoch']
        best_acc = checkpoint_info['best_acc']
        print(f"Resuming from epoch: {start_epoch}, best accuracy: {best_acc*100:.2f}%")
        # Note: If there are more elements to retrieve, do so here.

Resuming from checkpoint: toy/experiments_densenet/dense_23th_experiment/cifar10_nerf_best.pth
Resuming from epoch: 159, best accuracy: 75.89%


#### 6.4 Initialize model dictionary for each dimension and shuffle it

In [30]:
# Initialize model dictionary
dim_dict, gt_model_dict = init_model_dict(args, device)

Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth


Loading model from toy/exper

In [31]:
gt_model_dict['48']

DenseNet3(
  (conv1): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (block1): DenseBlock(
    (layer): Sequential(
      (0): BottleneckBlock(
        (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv1): Conv2d(24, 48, kernel_size=(1, 1), stride=(1, 1))
        (bn2): Identity()
        (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (dropout): Dropout(p=0.0, inplace=False)
      )
      (1): BottleneckBlock(
        (bn1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv1): Conv2d(36, 48, kernel_size=(1, 1), stride=(1, 1))
        (bn2): Identity()
        (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (dropout): Dropout(p=0.0, inplace=False)
      )
      (2): BottleneckBlock(
        (bn1): Bat

In [32]:
# Validate the model for the starting dimension (its pretrained form)
val_loss, acc = validate_single(gt_model_dict['48'], val_loader, nn.CrossEntropyLoss(), args=args)
print(f'Initial Permutated model Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc * 100:.2f}%')

100%|██████████| 79/79 [00:03<00:00, 22.50it/s]

Initial Permutated model Validation Loss: 0.3248, Validation Accuracy: 91.92%





In [33]:
dim_dict

{'24': (DenseNet3(
    (conv1): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (block1): DenseBlock(
      (layer): Sequential(
        (0): BottleneckBlock(
          (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv1): Conv2d(24, 48, kernel_size=(1, 1), stride=(1, 1))
          (bn2): Identity()
          (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (dropout): Dropout(p=0.0, inplace=False)
        )
        (1): BottleneckBlock(
          (bn1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv1): Conv2d(36, 48, kernel_size=(1, 1), stride=(1, 1))
          (bn2): Identity()
          (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (dropout): Dropout(p=0.0, inplace=False)
        )


In [34]:
dim_dict = shuffle_coordinates_all(dim_dict)
dim_dict

{'24': (DenseNet3(
    (conv1): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (block1): DenseBlock(
      (layer): Sequential(
        (0): BottleneckBlock(
          (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv1): Conv2d(24, 48, kernel_size=(1, 1), stride=(1, 1))
          (bn2): Identity()
          (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (dropout): Dropout(p=0.0, inplace=False)
        )
        (1): BottleneckBlock(
          (bn1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv1): Conv2d(36, 48, kernel_size=(1, 1), stride=(1, 1))
          (bn2): Identity()
          (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (dropout): Dropout(p=0.0, inplace=False)
        )


#### 6.5 Initialize wandb for plotting

In [36]:
initialize_wandb(args)

[34m[1mwandb[0m: Currently logged in as: [33mefradosuryadi[0m ([33mefradosuryadi-universitas-indonesia[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


#### 6.6 Hypernetwork training loop

In [37]:
args.experiment.num_epochs

200

In [38]:
# Iterate over the epochs
for epoch in range(start_epoch, args.experiment.num_epochs):
    # Train the hypernetwork to generate a model with random dimension for one epoch
    train_loss, dim_dict, gt_model_dict, train_acc = train_one_epoch(hyper_model, train_loader, optimizer, criterion, 
                                                                     dim_dict, gt_model_dict, epoch_idx=epoch, ema=ema, 
                                                                     args=args, device=device)
    # Step the scheduler
    scheduler.step()

    # Print the training loss and learning rate
    print(f"Epoch [{epoch+1}/{args.experiment.num_epochs}], Training Loss: {train_loss:.4f}, Training Accuracy: {train_acc*100:.2f}, Learning Rate: {scheduler.get_last_lr()[0]:.6f}")

    # If it's time to evaluate the model
    if (epoch + 1) % args.experiment.eval_interval == 0:
        # Apply EMA if it is specified
        if ema:
            ema.apply()  # Save the weights of original model created before training_loop
        
        # Sample the merged model (create model of same structure before training loop by using the hypernetwork)
        # And then test the performance of the hypernetwork by seeing how good it is in generating the weights
        model = sample_merge_model(hyper_model, model, args) 
        # Validate the merged model
        val_loss, acc = validate_single(model, val_loader, val_criterion, args=args)

        # If EMA is specified, restore the original weights
        if ema:
            ema.restore()  # Restore the original weights to the weights of the pretrained networks

        # Log the validation loss and accuracy to wandb
        wandb.log({
            "Validation Loss": val_loss,
            "Validation Accuracy": acc
        })
        # Print the validation loss and accuracy
        print(f"Epoch [{epoch+1}/{args.experiment.num_epochs}], Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc*100:.2f}%")
        print('\n\n')

        # Save the checkpoint if the accuracy is better than the previous best
        if acc > best_acc:
            best_acc = acc
            save_checkpoint(f"{args.training.save_model_path}/cifar10_nerf_best.pth",hyper_model,optimizer,ema,epoch,best_acc)
            print(f"Checkpoint saved at epoch {epoch} with accuracy: {best_acc*100:.2f}%")


Iteration 0: Loss = 0.0666, Reg Loss = 5.6065, Reconstruct Loss = 0.0000, Cls Loss = 0.0660, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0989, Reg Loss = 5.8615, Reconstruct Loss = 0.0009, Cls Loss = 0.0974, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.1006, Reg Loss = 5.8746, Reconstruct Loss = 0.0016, Cls Loss = 0.0983, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.1011, Reg Loss = 5.8429, Reconstruct Loss = 0.0014, Cls Loss = 0.0992, Learning rate = 1.0000e-03
Epoch [10/200], Training Loss: 0.1012, Training Accuracy: 96.90, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.73it/s]


Epoch [10/200], Validation Loss: 1.9788, Validation Accuracy: 64.73%



Iteration 0: Loss = 0.0786, Reg Loss = 6.1486, Reconstruct Loss = 0.0000, Cls Loss = 0.0780, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0930, Reg Loss = 5.7236, Reconstruct Loss = 0.0004, Cls Loss = 0.0920, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0945, Reg Loss = 5.7162, Reconstruct Loss = 0.0005, Cls Loss = 0.0934, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0953, Reg Loss = 5.7386, Reconstruct Loss = 0.0009, Cls Loss = 0.0938, Learning rate = 1.0000e-03
Epoch [11/200], Training Loss: 0.0948, Training Accuracy: 97.23, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.84it/s]


Epoch [11/200], Validation Loss: 2.3605, Validation Accuracy: 64.93%



Iteration 0: Loss = 0.0734, Reg Loss = 5.9345, Reconstruct Loss = 0.0000, Cls Loss = 0.0728, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0883, Reg Loss = 5.8930, Reconstruct Loss = 0.0017, Cls Loss = 0.0860, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0870, Reg Loss = 5.8966, Reconstruct Loss = 0.0013, Cls Loss = 0.0851, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0863, Reg Loss = 5.9118, Reconstruct Loss = 0.0013, Cls Loss = 0.0845, Learning rate = 1.0000e-03
Epoch [12/200], Training Loss: 0.0847, Training Accuracy: 97.52, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.85it/s]


Epoch [12/200], Validation Loss: 2.5854, Validation Accuracy: 64.00%



Iteration 0: Loss = 0.0838, Reg Loss = 6.2055, Reconstruct Loss = 0.0000, Cls Loss = 0.0832, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0801, Reg Loss = 6.2149, Reconstruct Loss = 0.0014, Cls Loss = 0.0781, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0774, Reg Loss = 6.2729, Reconstruct Loss = 0.0011, Cls Loss = 0.0757, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0789, Reg Loss = 6.2621, Reconstruct Loss = 0.0011, Cls Loss = 0.0771, Learning rate = 1.0000e-03
Epoch [13/200], Training Loss: 0.0782, Training Accuracy: 97.77, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.56it/s]


Epoch [13/200], Validation Loss: 3.1127, Validation Accuracy: 61.65%



Iteration 0: Loss = 0.0666, Reg Loss = 6.3521, Reconstruct Loss = 0.0000, Cls Loss = 0.0660, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0799, Reg Loss = 6.3892, Reconstruct Loss = 0.0005, Cls Loss = 0.0788, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0759, Reg Loss = 6.4466, Reconstruct Loss = 0.0008, Cls Loss = 0.0745, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0761, Reg Loss = 6.4748, Reconstruct Loss = 0.0009, Cls Loss = 0.0746, Learning rate = 1.0000e-03
Epoch [14/200], Training Loss: 0.0752, Training Accuracy: 97.75, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.75it/s]


Epoch [14/200], Validation Loss: 3.0774, Validation Accuracy: 60.99%



Iteration 0: Loss = 0.0355, Reg Loss = 6.6617, Reconstruct Loss = 0.0000, Cls Loss = 0.0348, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0716, Reg Loss = 6.5717, Reconstruct Loss = 0.0002, Cls Loss = 0.0707, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0723, Reg Loss = 6.7137, Reconstruct Loss = 0.0009, Cls Loss = 0.0707, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0716, Reg Loss = 6.7192, Reconstruct Loss = 0.0007, Cls Loss = 0.0702, Learning rate = 1.0000e-03
Epoch [15/200], Training Loss: 0.0724, Training Accuracy: 97.88, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.66it/s]


Epoch [15/200], Validation Loss: 3.9343, Validation Accuracy: 58.13%



Iteration 0: Loss = 0.0495, Reg Loss = 6.7315, Reconstruct Loss = 0.0000, Cls Loss = 0.0488, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0696, Reg Loss = 6.9693, Reconstruct Loss = 0.0005, Cls Loss = 0.0684, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0687, Reg Loss = 7.0139, Reconstruct Loss = 0.0007, Cls Loss = 0.0673, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0676, Reg Loss = 7.0199, Reconstruct Loss = 0.0006, Cls Loss = 0.0663, Learning rate = 1.0000e-03
Epoch [16/200], Training Loss: 0.0685, Training Accuracy: 97.99, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.95it/s]


Epoch [16/200], Validation Loss: 4.4019, Validation Accuracy: 55.64%



Iteration 0: Loss = 0.0482, Reg Loss = 7.1786, Reconstruct Loss = 0.0000, Cls Loss = 0.0475, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0640, Reg Loss = 7.0019, Reconstruct Loss = 0.0007, Cls Loss = 0.0626, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0653, Reg Loss = 7.0850, Reconstruct Loss = 0.0007, Cls Loss = 0.0639, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0664, Reg Loss = 7.1520, Reconstruct Loss = 0.0007, Cls Loss = 0.0649, Learning rate = 1.0000e-03
Epoch [17/200], Training Loss: 0.0666, Training Accuracy: 98.01, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.75it/s]


Epoch [17/200], Validation Loss: 4.3436, Validation Accuracy: 55.22%



Iteration 0: Loss = 0.0381, Reg Loss = 6.7889, Reconstruct Loss = 0.0000, Cls Loss = 0.0374, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0675, Reg Loss = 7.3625, Reconstruct Loss = 0.0008, Cls Loss = 0.0660, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0663, Reg Loss = 7.4549, Reconstruct Loss = 0.0009, Cls Loss = 0.0647, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0649, Reg Loss = 7.4223, Reconstruct Loss = 0.0007, Cls Loss = 0.0635, Learning rate = 1.0000e-03
Epoch [18/200], Training Loss: 0.0656, Training Accuracy: 98.06, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.91it/s]


Epoch [18/200], Validation Loss: 4.5102, Validation Accuracy: 54.49%



Iteration 0: Loss = 0.0362, Reg Loss = 7.3802, Reconstruct Loss = 0.0000, Cls Loss = 0.0355, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0618, Reg Loss = 7.2483, Reconstruct Loss = 0.0004, Cls Loss = 0.0607, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0644, Reg Loss = 7.2856, Reconstruct Loss = 0.0004, Cls Loss = 0.0633, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0632, Reg Loss = 7.2626, Reconstruct Loss = 0.0003, Cls Loss = 0.0622, Learning rate = 1.0000e-03
Epoch [19/200], Training Loss: 0.0636, Training Accuracy: 98.08, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.76it/s]


Epoch [19/200], Validation Loss: 5.0259, Validation Accuracy: 50.02%



Iteration 0: Loss = 0.0502, Reg Loss = 7.5349, Reconstruct Loss = 0.0000, Cls Loss = 0.0494, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0574, Reg Loss = 7.4104, Reconstruct Loss = 0.0008, Cls Loss = 0.0559, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0604, Reg Loss = 7.5093, Reconstruct Loss = 0.0008, Cls Loss = 0.0589, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0614, Reg Loss = 7.6537, Reconstruct Loss = 0.0008, Cls Loss = 0.0598, Learning rate = 1.0000e-03
Epoch [20/200], Training Loss: 0.0613, Training Accuracy: 98.21, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.92it/s]


Epoch [20/200], Validation Loss: 6.6594, Validation Accuracy: 41.69%



Iteration 0: Loss = 0.1072, Reg Loss = 7.3630, Reconstruct Loss = 0.0000, Cls Loss = 0.1064, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0567, Reg Loss = 7.7685, Reconstruct Loss = 0.0012, Cls Loss = 0.0547, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0578, Reg Loss = 7.8594, Reconstruct Loss = 0.0008, Cls Loss = 0.0562, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0575, Reg Loss = 7.9899, Reconstruct Loss = 0.0007, Cls Loss = 0.0560, Learning rate = 1.0000e-03
Epoch [21/200], Training Loss: 0.0578, Training Accuracy: 98.34, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.63it/s]


Epoch [21/200], Validation Loss: 6.1153, Validation Accuracy: 48.02%



Iteration 0: Loss = 0.1216, Reg Loss = 8.1381, Reconstruct Loss = 0.0000, Cls Loss = 0.1208, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0543, Reg Loss = 8.2363, Reconstruct Loss = 0.0002, Cls Loss = 0.0533, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0540, Reg Loss = 8.3216, Reconstruct Loss = 0.0005, Cls Loss = 0.0526, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0554, Reg Loss = 8.3147, Reconstruct Loss = 0.0005, Cls Loss = 0.0540, Learning rate = 1.0000e-03
Epoch [22/200], Training Loss: 0.0560, Training Accuracy: 98.41, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 23.01it/s]


Epoch [22/200], Validation Loss: 5.3872, Validation Accuracy: 49.02%



Iteration 0: Loss = 0.0386, Reg Loss = 7.9221, Reconstruct Loss = 0.0000, Cls Loss = 0.0379, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0582, Reg Loss = 8.4719, Reconstruct Loss = 0.0007, Cls Loss = 0.0567, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0585, Reg Loss = 8.5371, Reconstruct Loss = 0.0011, Cls Loss = 0.0566, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0568, Reg Loss = 8.5074, Reconstruct Loss = 0.0009, Cls Loss = 0.0551, Learning rate = 1.0000e-03
Epoch [23/200], Training Loss: 0.0556, Training Accuracy: 98.35, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.96it/s]


Epoch [23/200], Validation Loss: 7.5538, Validation Accuracy: 39.51%



Iteration 0: Loss = 0.0232, Reg Loss = 8.4888, Reconstruct Loss = 0.0000, Cls Loss = 0.0224, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0545, Reg Loss = 8.4659, Reconstruct Loss = 0.0007, Cls Loss = 0.0529, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0532, Reg Loss = 8.4091, Reconstruct Loss = 0.0008, Cls Loss = 0.0515, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0546, Reg Loss = 8.4031, Reconstruct Loss = 0.0009, Cls Loss = 0.0528, Learning rate = 1.0000e-03
Epoch [24/200], Training Loss: 0.0547, Training Accuracy: 98.44, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.69it/s]


Epoch [24/200], Validation Loss: 8.1242, Validation Accuracy: 38.93%



Iteration 0: Loss = 0.0455, Reg Loss = 8.0082, Reconstruct Loss = 0.0000, Cls Loss = 0.0447, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0549, Reg Loss = 8.1945, Reconstruct Loss = 0.0007, Cls Loss = 0.0534, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0551, Reg Loss = 8.2377, Reconstruct Loss = 0.0005, Cls Loss = 0.0537, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0555, Reg Loss = 8.3129, Reconstruct Loss = 0.0006, Cls Loss = 0.0540, Learning rate = 1.0000e-03
Epoch [25/200], Training Loss: 0.0542, Training Accuracy: 98.42, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.82it/s]


Epoch [25/200], Validation Loss: 5.3549, Validation Accuracy: 49.98%



Iteration 0: Loss = 0.0524, Reg Loss = 8.2635, Reconstruct Loss = 0.0000, Cls Loss = 0.0516, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0529, Reg Loss = 8.4348, Reconstruct Loss = 0.0009, Cls Loss = 0.0511, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0529, Reg Loss = 8.4974, Reconstruct Loss = 0.0007, Cls Loss = 0.0514, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0525, Reg Loss = 8.4756, Reconstruct Loss = 0.0007, Cls Loss = 0.0510, Learning rate = 1.0000e-03
Epoch [26/200], Training Loss: 0.0525, Training Accuracy: 98.45, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.81it/s]


Epoch [26/200], Validation Loss: 4.0328, Validation Accuracy: 58.00%



Iteration 0: Loss = 0.0353, Reg Loss = 9.0604, Reconstruct Loss = 0.0000, Cls Loss = 0.0344, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0518, Reg Loss = 8.7620, Reconstruct Loss = 0.0005, Cls Loss = 0.0504, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0528, Reg Loss = 8.7683, Reconstruct Loss = 0.0006, Cls Loss = 0.0513, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0518, Reg Loss = 8.7845, Reconstruct Loss = 0.0006, Cls Loss = 0.0502, Learning rate = 1.0000e-03
Epoch [27/200], Training Loss: 0.0510, Training Accuracy: 98.53, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.61it/s]


Epoch [27/200], Validation Loss: 7.1447, Validation Accuracy: 44.43%



Iteration 0: Loss = 0.0729, Reg Loss = 9.5498, Reconstruct Loss = 0.0294, Cls Loss = 0.0425, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0544, Reg Loss = 8.5731, Reconstruct Loss = 0.0007, Cls Loss = 0.0528, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0523, Reg Loss = 8.6101, Reconstruct Loss = 0.0006, Cls Loss = 0.0509, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0522, Reg Loss = 8.6717, Reconstruct Loss = 0.0007, Cls Loss = 0.0506, Learning rate = 1.0000e-03
Epoch [28/200], Training Loss: 0.0518, Training Accuracy: 98.47, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.75it/s]


Epoch [28/200], Validation Loss: 8.7406, Validation Accuracy: 37.34%



Iteration 0: Loss = 0.0247, Reg Loss = 8.6973, Reconstruct Loss = 0.0000, Cls Loss = 0.0238, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0484, Reg Loss = 8.7178, Reconstruct Loss = 0.0002, Cls Loss = 0.0473, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0494, Reg Loss = 8.6462, Reconstruct Loss = 0.0006, Cls Loss = 0.0479, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0488, Reg Loss = 8.5230, Reconstruct Loss = 0.0007, Cls Loss = 0.0472, Learning rate = 1.0000e-03
Epoch [29/200], Training Loss: 0.0498, Training Accuracy: 98.52, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.79it/s]


Epoch [29/200], Validation Loss: 6.3883, Validation Accuracy: 45.32%



Iteration 0: Loss = 0.0343, Reg Loss = 8.3998, Reconstruct Loss = 0.0000, Cls Loss = 0.0335, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0477, Reg Loss = 8.5761, Reconstruct Loss = 0.0003, Cls Loss = 0.0465, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0470, Reg Loss = 8.5703, Reconstruct Loss = 0.0004, Cls Loss = 0.0458, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0483, Reg Loss = 8.6366, Reconstruct Loss = 0.0005, Cls Loss = 0.0469, Learning rate = 1.0000e-03
Epoch [30/200], Training Loss: 0.0482, Training Accuracy: 98.61, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.74it/s]


Epoch [30/200], Validation Loss: 5.3828, Validation Accuracy: 51.20%



Iteration 0: Loss = 0.0676, Reg Loss = 9.2047, Reconstruct Loss = 0.0000, Cls Loss = 0.0667, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0461, Reg Loss = 8.8471, Reconstruct Loss = 0.0004, Cls Loss = 0.0448, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0475, Reg Loss = 8.9188, Reconstruct Loss = 0.0004, Cls Loss = 0.0462, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0475, Reg Loss = 8.9086, Reconstruct Loss = 0.0005, Cls Loss = 0.0462, Learning rate = 1.0000e-03
Epoch [31/200], Training Loss: 0.0469, Training Accuracy: 98.63, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.83it/s]


Epoch [31/200], Validation Loss: 7.6050, Validation Accuracy: 42.04%



Iteration 0: Loss = 0.0260, Reg Loss = 8.7632, Reconstruct Loss = 0.0000, Cls Loss = 0.0252, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0458, Reg Loss = 8.9645, Reconstruct Loss = 0.0014, Cls Loss = 0.0436, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0478, Reg Loss = 9.2293, Reconstruct Loss = 0.0008, Cls Loss = 0.0461, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0479, Reg Loss = 9.3850, Reconstruct Loss = 0.0008, Cls Loss = 0.0461, Learning rate = 1.0000e-03
Epoch [32/200], Training Loss: 0.0479, Training Accuracy: 98.57, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.82it/s]


Epoch [32/200], Validation Loss: 7.9596, Validation Accuracy: 42.51%



Iteration 0: Loss = 0.0274, Reg Loss = 9.6165, Reconstruct Loss = 0.0000, Cls Loss = 0.0265, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0439, Reg Loss = 9.2594, Reconstruct Loss = 0.0005, Cls Loss = 0.0424, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0452, Reg Loss = 9.5401, Reconstruct Loss = 0.0006, Cls Loss = 0.0436, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0475, Reg Loss = 9.6011, Reconstruct Loss = 0.0006, Cls Loss = 0.0460, Learning rate = 1.0000e-03
Epoch [33/200], Training Loss: 0.0474, Training Accuracy: 98.60, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.73it/s]


Epoch [33/200], Validation Loss: 8.2489, Validation Accuracy: 40.73%



Iteration 0: Loss = 0.0724, Reg Loss = 8.9136, Reconstruct Loss = 0.0000, Cls Loss = 0.0715, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0456, Reg Loss = 9.3467, Reconstruct Loss = 0.0005, Cls Loss = 0.0442, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0455, Reg Loss = 9.2984, Reconstruct Loss = 0.0008, Cls Loss = 0.0438, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0454, Reg Loss = 9.2684, Reconstruct Loss = 0.0007, Cls Loss = 0.0437, Learning rate = 1.0000e-03
Epoch [34/200], Training Loss: 0.0447, Training Accuracy: 98.72, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.80it/s]


Epoch [34/200], Validation Loss: 6.8036, Validation Accuracy: 44.50%



Iteration 0: Loss = 0.0515, Reg Loss = 8.8565, Reconstruct Loss = 0.0000, Cls Loss = 0.0507, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0467, Reg Loss = 9.4555, Reconstruct Loss = 0.0008, Cls Loss = 0.0450, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0439, Reg Loss = 9.2717, Reconstruct Loss = 0.0005, Cls Loss = 0.0425, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0446, Reg Loss = 9.2975, Reconstruct Loss = 0.0005, Cls Loss = 0.0431, Learning rate = 1.0000e-03
Epoch [35/200], Training Loss: 0.0446, Training Accuracy: 98.73, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.56it/s]


Epoch [35/200], Validation Loss: 4.8926, Validation Accuracy: 52.76%



Iteration 0: Loss = 0.0338, Reg Loss = 9.6038, Reconstruct Loss = 0.0000, Cls Loss = 0.0328, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0450, Reg Loss = 9.2992, Reconstruct Loss = 0.0005, Cls Loss = 0.0436, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0430, Reg Loss = 9.3318, Reconstruct Loss = 0.0003, Cls Loss = 0.0417, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0429, Reg Loss = 9.3679, Reconstruct Loss = 0.0004, Cls Loss = 0.0416, Learning rate = 1.0000e-03
Epoch [36/200], Training Loss: 0.0426, Training Accuracy: 98.76, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.86it/s]


Epoch [36/200], Validation Loss: 5.3413, Validation Accuracy: 49.96%



Iteration 0: Loss = 0.0453, Reg Loss = 9.9477, Reconstruct Loss = 0.0000, Cls Loss = 0.0443, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0446, Reg Loss = 9.6788, Reconstruct Loss = 0.0003, Cls Loss = 0.0433, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0445, Reg Loss = 9.6835, Reconstruct Loss = 0.0002, Cls Loss = 0.0433, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0435, Reg Loss = 9.6043, Reconstruct Loss = 0.0004, Cls Loss = 0.0421, Learning rate = 1.0000e-03
Epoch [37/200], Training Loss: 0.0434, Training Accuracy: 98.77, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.64it/s]


Epoch [37/200], Validation Loss: 5.9646, Validation Accuracy: 46.91%



Iteration 0: Loss = 0.0257, Reg Loss = 9.5018, Reconstruct Loss = 0.0000, Cls Loss = 0.0248, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0431, Reg Loss = 9.3437, Reconstruct Loss = 0.0005, Cls Loss = 0.0417, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0430, Reg Loss = 9.4056, Reconstruct Loss = 0.0005, Cls Loss = 0.0416, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0428, Reg Loss = 9.5120, Reconstruct Loss = 0.0006, Cls Loss = 0.0413, Learning rate = 1.0000e-03
Epoch [38/200], Training Loss: 0.0428, Training Accuracy: 98.71, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.78it/s]


Epoch [38/200], Validation Loss: 8.4219, Validation Accuracy: 39.24%



Iteration 0: Loss = 0.0169, Reg Loss = 10.0861, Reconstruct Loss = 0.0000, Cls Loss = 0.0159, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0422, Reg Loss = 9.9713, Reconstruct Loss = 0.0005, Cls Loss = 0.0407, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0421, Reg Loss = 9.8579, Reconstruct Loss = 0.0003, Cls Loss = 0.0408, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0430, Reg Loss = 9.7554, Reconstruct Loss = 0.0006, Cls Loss = 0.0415, Learning rate = 1.0000e-03
Epoch [39/200], Training Loss: 0.0421, Training Accuracy: 98.75, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.89it/s]


Epoch [39/200], Validation Loss: 9.9262, Validation Accuracy: 34.06%



Iteration 0: Loss = 0.0148, Reg Loss = 10.0912, Reconstruct Loss = 0.0000, Cls Loss = 0.0138, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0414, Reg Loss = 9.8330, Reconstruct Loss = 0.0004, Cls Loss = 0.0401, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0409, Reg Loss = 9.7457, Reconstruct Loss = 0.0006, Cls Loss = 0.0393, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0409, Reg Loss = 9.7169, Reconstruct Loss = 0.0006, Cls Loss = 0.0393, Learning rate = 1.0000e-03
Epoch [40/200], Training Loss: 0.0409, Training Accuracy: 98.78, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.72it/s]


Epoch [40/200], Validation Loss: 9.1926, Validation Accuracy: 33.97%



Iteration 0: Loss = 0.0305, Reg Loss = 9.2563, Reconstruct Loss = 0.0000, Cls Loss = 0.0295, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0405, Reg Loss = 9.2626, Reconstruct Loss = 0.0008, Cls Loss = 0.0388, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0412, Reg Loss = 9.3084, Reconstruct Loss = 0.0006, Cls Loss = 0.0397, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0401, Reg Loss = 9.2579, Reconstruct Loss = 0.0005, Cls Loss = 0.0387, Learning rate = 1.0000e-03
Epoch [41/200], Training Loss: 0.0403, Training Accuracy: 98.79, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.78it/s]


Epoch [41/200], Validation Loss: 4.7214, Validation Accuracy: 52.62%



Iteration 0: Loss = 0.0866, Reg Loss = 8.9682, Reconstruct Loss = 0.0000, Cls Loss = 0.0857, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0396, Reg Loss = 9.1531, Reconstruct Loss = 0.0005, Cls Loss = 0.0382, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0401, Reg Loss = 9.3158, Reconstruct Loss = 0.0006, Cls Loss = 0.0385, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0397, Reg Loss = 9.2384, Reconstruct Loss = 0.0005, Cls Loss = 0.0383, Learning rate = 1.0000e-03
Epoch [42/200], Training Loss: 0.0407, Training Accuracy: 98.81, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.86it/s]


Epoch [42/200], Validation Loss: 6.5017, Validation Accuracy: 42.19%



Iteration 0: Loss = 0.0249, Reg Loss = 9.4945, Reconstruct Loss = 0.0000, Cls Loss = 0.0239, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0385, Reg Loss = 9.5076, Reconstruct Loss = 0.0006, Cls Loss = 0.0370, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0389, Reg Loss = 9.4378, Reconstruct Loss = 0.0005, Cls Loss = 0.0374, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0386, Reg Loss = 9.5055, Reconstruct Loss = 0.0004, Cls Loss = 0.0373, Learning rate = 1.0000e-03
Epoch [43/200], Training Loss: 0.0393, Training Accuracy: 98.85, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.70it/s]


Epoch [43/200], Validation Loss: 5.4819, Validation Accuracy: 50.25%



Iteration 0: Loss = 0.0439, Reg Loss = 9.2166, Reconstruct Loss = 0.0000, Cls Loss = 0.0430, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0374, Reg Loss = 9.2369, Reconstruct Loss = 0.0004, Cls Loss = 0.0361, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0389, Reg Loss = 9.2558, Reconstruct Loss = 0.0006, Cls Loss = 0.0374, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0391, Reg Loss = 9.2324, Reconstruct Loss = 0.0004, Cls Loss = 0.0377, Learning rate = 1.0000e-03
Epoch [44/200], Training Loss: 0.0387, Training Accuracy: 98.85, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.89it/s]


Epoch [44/200], Validation Loss: 5.7201, Validation Accuracy: 46.29%



Iteration 0: Loss = 0.0302, Reg Loss = 9.2214, Reconstruct Loss = 0.0000, Cls Loss = 0.0293, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0401, Reg Loss = 9.0383, Reconstruct Loss = 0.0005, Cls Loss = 0.0388, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0399, Reg Loss = 9.0444, Reconstruct Loss = 0.0005, Cls Loss = 0.0386, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0402, Reg Loss = 9.0675, Reconstruct Loss = 0.0005, Cls Loss = 0.0388, Learning rate = 1.0000e-03
Epoch [45/200], Training Loss: 0.0397, Training Accuracy: 98.84, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.79it/s]


Epoch [45/200], Validation Loss: 3.4277, Validation Accuracy: 57.91%



Iteration 0: Loss = 0.0149, Reg Loss = 9.0827, Reconstruct Loss = 0.0000, Cls Loss = 0.0140, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0381, Reg Loss = 9.3191, Reconstruct Loss = 0.0008, Cls Loss = 0.0364, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0388, Reg Loss = 9.3906, Reconstruct Loss = 0.0007, Cls Loss = 0.0371, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0398, Reg Loss = 9.4230, Reconstruct Loss = 0.0007, Cls Loss = 0.0382, Learning rate = 1.0000e-03
Epoch [46/200], Training Loss: 0.0404, Training Accuracy: 98.80, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.68it/s]


Epoch [46/200], Validation Loss: 5.3140, Validation Accuracy: 49.41%



Iteration 0: Loss = 0.0460, Reg Loss = 9.0390, Reconstruct Loss = 0.0000, Cls Loss = 0.0451, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0380, Reg Loss = 9.1736, Reconstruct Loss = 0.0002, Cls Loss = 0.0369, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0379, Reg Loss = 9.2138, Reconstruct Loss = 0.0004, Cls Loss = 0.0365, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0379, Reg Loss = 9.2394, Reconstruct Loss = 0.0004, Cls Loss = 0.0366, Learning rate = 1.0000e-03
Epoch [47/200], Training Loss: 0.0384, Training Accuracy: 98.86, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.78it/s]


Epoch [47/200], Validation Loss: 4.8207, Validation Accuracy: 48.84%



Iteration 0: Loss = 0.0433, Reg Loss = 9.1126, Reconstruct Loss = 0.0000, Cls Loss = 0.0424, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0386, Reg Loss = 9.3273, Reconstruct Loss = 0.0007, Cls Loss = 0.0369, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0396, Reg Loss = 9.3007, Reconstruct Loss = 0.0006, Cls Loss = 0.0381, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0391, Reg Loss = 9.2921, Reconstruct Loss = 0.0005, Cls Loss = 0.0377, Learning rate = 1.0000e-03
Epoch [48/200], Training Loss: 0.0390, Training Accuracy: 98.81, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.76it/s]


Epoch [48/200], Validation Loss: 4.7288, Validation Accuracy: 53.00%



Iteration 0: Loss = 0.0176, Reg Loss = 9.1576, Reconstruct Loss = 0.0000, Cls Loss = 0.0167, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0379, Reg Loss = 9.4217, Reconstruct Loss = 0.0003, Cls Loss = 0.0367, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0370, Reg Loss = 9.3629, Reconstruct Loss = 0.0004, Cls Loss = 0.0356, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0377, Reg Loss = 9.3494, Reconstruct Loss = 0.0004, Cls Loss = 0.0364, Learning rate = 1.0000e-03
Epoch [49/200], Training Loss: 0.0375, Training Accuracy: 98.95, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.71it/s]


Epoch [49/200], Validation Loss: 4.1146, Validation Accuracy: 53.87%



Iteration 0: Loss = 0.0404, Reg Loss = 9.0747, Reconstruct Loss = 0.0000, Cls Loss = 0.0395, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0381, Reg Loss = 9.2823, Reconstruct Loss = 0.0004, Cls Loss = 0.0368, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0393, Reg Loss = 9.2697, Reconstruct Loss = 0.0004, Cls Loss = 0.0379, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0387, Reg Loss = 9.2965, Reconstruct Loss = 0.0005, Cls Loss = 0.0373, Learning rate = 1.0000e-03
Epoch [50/200], Training Loss: 0.0377, Training Accuracy: 98.91, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.88it/s]


Epoch [50/200], Validation Loss: 5.9436, Validation Accuracy: 47.65%



Iteration 0: Loss = 0.0336, Reg Loss = 8.8915, Reconstruct Loss = 0.0000, Cls Loss = 0.0327, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0377, Reg Loss = 9.2895, Reconstruct Loss = 0.0002, Cls Loss = 0.0366, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0360, Reg Loss = 9.3423, Reconstruct Loss = 0.0003, Cls Loss = 0.0348, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0365, Reg Loss = 9.2947, Reconstruct Loss = 0.0005, Cls Loss = 0.0351, Learning rate = 1.0000e-03
Epoch [51/200], Training Loss: 0.0368, Training Accuracy: 98.89, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.74it/s]


Epoch [51/200], Validation Loss: 5.2051, Validation Accuracy: 49.78%



Iteration 0: Loss = 0.0578, Reg Loss = 8.8200, Reconstruct Loss = 0.0000, Cls Loss = 0.0569, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0360, Reg Loss = 8.8647, Reconstruct Loss = 0.0005, Cls Loss = 0.0346, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0368, Reg Loss = 8.9609, Reconstruct Loss = 0.0006, Cls Loss = 0.0353, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0369, Reg Loss = 9.0587, Reconstruct Loss = 0.0006, Cls Loss = 0.0354, Learning rate = 1.0000e-03
Epoch [52/200], Training Loss: 0.0368, Training Accuracy: 98.93, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.94it/s]


Epoch [52/200], Validation Loss: 5.0725, Validation Accuracy: 46.67%



Iteration 0: Loss = 0.0186, Reg Loss = 9.0785, Reconstruct Loss = 0.0000, Cls Loss = 0.0177, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0364, Reg Loss = 9.4761, Reconstruct Loss = 0.0007, Cls Loss = 0.0348, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0375, Reg Loss = 9.3438, Reconstruct Loss = 0.0005, Cls Loss = 0.0361, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0382, Reg Loss = 9.2818, Reconstruct Loss = 0.0005, Cls Loss = 0.0368, Learning rate = 1.0000e-03
Epoch [53/200], Training Loss: 0.0377, Training Accuracy: 98.88, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.75it/s]


Epoch [53/200], Validation Loss: 4.1037, Validation Accuracy: 53.32%



Iteration 0: Loss = 0.0130, Reg Loss = 9.0719, Reconstruct Loss = 0.0000, Cls Loss = 0.0121, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0359, Reg Loss = 9.6394, Reconstruct Loss = 0.0010, Cls Loss = 0.0339, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0371, Reg Loss = 9.5505, Reconstruct Loss = 0.0008, Cls Loss = 0.0354, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0367, Reg Loss = 9.4070, Reconstruct Loss = 0.0006, Cls Loss = 0.0352, Learning rate = 1.0000e-03
Epoch [54/200], Training Loss: 0.0371, Training Accuracy: 98.89, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.64it/s]


Epoch [54/200], Validation Loss: 3.7304, Validation Accuracy: 56.27%



Iteration 0: Loss = 0.0228, Reg Loss = 9.0433, Reconstruct Loss = 0.0000, Cls Loss = 0.0219, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0360, Reg Loss = 9.2887, Reconstruct Loss = 0.0011, Cls Loss = 0.0340, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0359, Reg Loss = 9.0781, Reconstruct Loss = 0.0007, Cls Loss = 0.0343, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0358, Reg Loss = 9.0174, Reconstruct Loss = 0.0006, Cls Loss = 0.0343, Learning rate = 1.0000e-03
Epoch [55/200], Training Loss: 0.0356, Training Accuracy: 99.00, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.91it/s]


Epoch [55/200], Validation Loss: 2.7118, Validation Accuracy: 62.84%



Iteration 0: Loss = 0.0362, Reg Loss = 9.2076, Reconstruct Loss = 0.0000, Cls Loss = 0.0353, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0357, Reg Loss = 9.1185, Reconstruct Loss = 0.0010, Cls Loss = 0.0338, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0363, Reg Loss = 9.0489, Reconstruct Loss = 0.0008, Cls Loss = 0.0346, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0365, Reg Loss = 9.0020, Reconstruct Loss = 0.0008, Cls Loss = 0.0349, Learning rate = 1.0000e-03
Epoch [56/200], Training Loss: 0.0368, Training Accuracy: 98.88, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.78it/s]


Epoch [56/200], Validation Loss: 5.8303, Validation Accuracy: 49.05%



Iteration 0: Loss = 0.0250, Reg Loss = 8.8429, Reconstruct Loss = 0.0000, Cls Loss = 0.0241, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0347, Reg Loss = 9.1448, Reconstruct Loss = 0.0004, Cls Loss = 0.0334, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0360, Reg Loss = 9.0049, Reconstruct Loss = 0.0003, Cls Loss = 0.0348, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0355, Reg Loss = 9.0874, Reconstruct Loss = 0.0003, Cls Loss = 0.0342, Learning rate = 1.0000e-03
Epoch [57/200], Training Loss: 0.0359, Training Accuracy: 98.99, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.69it/s]


Epoch [57/200], Validation Loss: 4.7531, Validation Accuracy: 51.99%



Iteration 0: Loss = 0.0477, Reg Loss = 8.6853, Reconstruct Loss = 0.0000, Cls Loss = 0.0469, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0360, Reg Loss = 9.1262, Reconstruct Loss = 0.0007, Cls Loss = 0.0344, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0375, Reg Loss = 9.1438, Reconstruct Loss = 0.0008, Cls Loss = 0.0358, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0375, Reg Loss = 9.0371, Reconstruct Loss = 0.0006, Cls Loss = 0.0360, Learning rate = 1.0000e-03
Epoch [58/200], Training Loss: 0.0370, Training Accuracy: 98.86, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.70it/s]


Epoch [58/200], Validation Loss: 4.9219, Validation Accuracy: 53.34%



Iteration 0: Loss = 0.0158, Reg Loss = 8.4370, Reconstruct Loss = 0.0000, Cls Loss = 0.0149, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0317, Reg Loss = 8.7945, Reconstruct Loss = 0.0004, Cls Loss = 0.0303, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0350, Reg Loss = 8.8841, Reconstruct Loss = 0.0005, Cls Loss = 0.0336, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0341, Reg Loss = 9.0199, Reconstruct Loss = 0.0004, Cls Loss = 0.0328, Learning rate = 1.0000e-03
Epoch [59/200], Training Loss: 0.0348, Training Accuracy: 98.95, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.96it/s]


Epoch [59/200], Validation Loss: 4.0863, Validation Accuracy: 56.66%



Iteration 0: Loss = 0.0574, Reg Loss = 8.8880, Reconstruct Loss = 0.0000, Cls Loss = 0.0565, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0348, Reg Loss = 9.0800, Reconstruct Loss = 0.0003, Cls Loss = 0.0336, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0359, Reg Loss = 9.1075, Reconstruct Loss = 0.0004, Cls Loss = 0.0346, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0355, Reg Loss = 9.2222, Reconstruct Loss = 0.0003, Cls Loss = 0.0343, Learning rate = 1.0000e-03
Epoch [60/200], Training Loss: 0.0357, Training Accuracy: 98.94, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.73it/s]


Epoch [60/200], Validation Loss: 4.6524, Validation Accuracy: 54.00%



Iteration 0: Loss = 0.0489, Reg Loss = 9.9023, Reconstruct Loss = 0.0000, Cls Loss = 0.0479, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0369, Reg Loss = 9.5822, Reconstruct Loss = 0.0004, Cls Loss = 0.0356, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0367, Reg Loss = 9.5207, Reconstruct Loss = 0.0005, Cls Loss = 0.0352, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0369, Reg Loss = 9.5650, Reconstruct Loss = 0.0005, Cls Loss = 0.0355, Learning rate = 1.0000e-03
Epoch [61/200], Training Loss: 0.0368, Training Accuracy: 98.92, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.79it/s]


Epoch [61/200], Validation Loss: 7.2194, Validation Accuracy: 41.10%



Iteration 0: Loss = 0.0616, Reg Loss = 9.3544, Reconstruct Loss = 0.0000, Cls Loss = 0.0607, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0369, Reg Loss = 9.2619, Reconstruct Loss = 0.0009, Cls Loss = 0.0351, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0366, Reg Loss = 9.2407, Reconstruct Loss = 0.0007, Cls Loss = 0.0350, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0356, Reg Loss = 9.2529, Reconstruct Loss = 0.0005, Cls Loss = 0.0341, Learning rate = 1.0000e-03
Epoch [62/200], Training Loss: 0.0364, Training Accuracy: 98.93, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.70it/s]


Epoch [62/200], Validation Loss: 5.1829, Validation Accuracy: 51.57%



Iteration 0: Loss = 0.0207, Reg Loss = 8.6769, Reconstruct Loss = 0.0000, Cls Loss = 0.0198, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0335, Reg Loss = 9.0472, Reconstruct Loss = 0.0006, Cls Loss = 0.0319, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0357, Reg Loss = 9.2806, Reconstruct Loss = 0.0005, Cls Loss = 0.0342, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0354, Reg Loss = 9.2700, Reconstruct Loss = 0.0004, Cls Loss = 0.0341, Learning rate = 1.0000e-03
Epoch [63/200], Training Loss: 0.0360, Training Accuracy: 98.93, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.71it/s]


Epoch [63/200], Validation Loss: 5.3132, Validation Accuracy: 49.99%



Iteration 0: Loss = 0.0525, Reg Loss = 9.1706, Reconstruct Loss = 0.0000, Cls Loss = 0.0515, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0352, Reg Loss = 9.1947, Reconstruct Loss = 0.0003, Cls Loss = 0.0339, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0367, Reg Loss = 9.0441, Reconstruct Loss = 0.0003, Cls Loss = 0.0355, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0363, Reg Loss = 9.0658, Reconstruct Loss = 0.0003, Cls Loss = 0.0351, Learning rate = 1.0000e-03
Epoch [64/200], Training Loss: 0.0373, Training Accuracy: 98.84, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 23.04it/s]


Epoch [64/200], Validation Loss: 6.0081, Validation Accuracy: 45.75%



Iteration 0: Loss = 0.0445, Reg Loss = 9.1280, Reconstruct Loss = 0.0000, Cls Loss = 0.0435, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0375, Reg Loss = 9.2871, Reconstruct Loss = 0.0007, Cls Loss = 0.0359, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0353, Reg Loss = 9.1976, Reconstruct Loss = 0.0006, Cls Loss = 0.0338, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0357, Reg Loss = 9.2123, Reconstruct Loss = 0.0005, Cls Loss = 0.0343, Learning rate = 1.0000e-03
Epoch [65/200], Training Loss: 0.0357, Training Accuracy: 99.00, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 23.06it/s]


Epoch [65/200], Validation Loss: 5.8733, Validation Accuracy: 47.51%



Iteration 0: Loss = 0.0782, Reg Loss = 8.5420, Reconstruct Loss = 0.0000, Cls Loss = 0.0774, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0381, Reg Loss = 8.9225, Reconstruct Loss = 0.0002, Cls Loss = 0.0370, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0378, Reg Loss = 8.9872, Reconstruct Loss = 0.0003, Cls Loss = 0.0366, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0390, Reg Loss = 9.0640, Reconstruct Loss = 0.0006, Cls Loss = 0.0375, Learning rate = 1.0000e-03
Epoch [66/200], Training Loss: 0.0387, Training Accuracy: 98.75, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 23.08it/s]


Epoch [66/200], Validation Loss: 6.3624, Validation Accuracy: 45.15%



Iteration 0: Loss = 0.0200, Reg Loss = 9.3670, Reconstruct Loss = 0.0000, Cls Loss = 0.0191, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0350, Reg Loss = 9.5367, Reconstruct Loss = 0.0004, Cls Loss = 0.0336, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0330, Reg Loss = 9.3670, Reconstruct Loss = 0.0003, Cls Loss = 0.0318, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0338, Reg Loss = 9.2375, Reconstruct Loss = 0.0003, Cls Loss = 0.0326, Learning rate = 1.0000e-03
Epoch [67/200], Training Loss: 0.0346, Training Accuracy: 98.97, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 23.00it/s]


Epoch [67/200], Validation Loss: 5.9234, Validation Accuracy: 45.88%



Iteration 0: Loss = 0.0243, Reg Loss = 9.1890, Reconstruct Loss = 0.0000, Cls Loss = 0.0234, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0338, Reg Loss = 8.9526, Reconstruct Loss = 0.0005, Cls Loss = 0.0324, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0361, Reg Loss = 9.0808, Reconstruct Loss = 0.0005, Cls Loss = 0.0347, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0358, Reg Loss = 9.1492, Reconstruct Loss = 0.0006, Cls Loss = 0.0343, Learning rate = 1.0000e-03
Epoch [68/200], Training Loss: 0.0360, Training Accuracy: 98.94, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 23.00it/s]


Epoch [68/200], Validation Loss: 7.3936, Validation Accuracy: 43.32%



Iteration 0: Loss = 0.0748, Reg Loss = 9.1770, Reconstruct Loss = 0.0000, Cls Loss = 0.0739, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0350, Reg Loss = 9.0795, Reconstruct Loss = 0.0006, Cls Loss = 0.0335, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0351, Reg Loss = 8.9740, Reconstruct Loss = 0.0006, Cls Loss = 0.0336, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0348, Reg Loss = 8.9890, Reconstruct Loss = 0.0006, Cls Loss = 0.0333, Learning rate = 1.0000e-03
Epoch [69/200], Training Loss: 0.0351, Training Accuracy: 99.02, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.98it/s]


Epoch [69/200], Validation Loss: 4.8348, Validation Accuracy: 51.78%



Iteration 0: Loss = 0.0274, Reg Loss = 8.9720, Reconstruct Loss = 0.0000, Cls Loss = 0.0265, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0342, Reg Loss = 9.2721, Reconstruct Loss = 0.0012, Cls Loss = 0.0320, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0347, Reg Loss = 9.4131, Reconstruct Loss = 0.0010, Cls Loss = 0.0327, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0353, Reg Loss = 9.4231, Reconstruct Loss = 0.0009, Cls Loss = 0.0334, Learning rate = 1.0000e-03
Epoch [70/200], Training Loss: 0.0352, Training Accuracy: 99.00, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.74it/s]


Epoch [70/200], Validation Loss: 3.7148, Validation Accuracy: 58.20%



Iteration 0: Loss = 0.0224, Reg Loss = 8.5107, Reconstruct Loss = 0.0000, Cls Loss = 0.0216, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0331, Reg Loss = 8.9181, Reconstruct Loss = 0.0006, Cls Loss = 0.0317, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0330, Reg Loss = 8.9708, Reconstruct Loss = 0.0006, Cls Loss = 0.0315, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0342, Reg Loss = 8.9691, Reconstruct Loss = 0.0005, Cls Loss = 0.0329, Learning rate = 1.0000e-03
Epoch [71/200], Training Loss: 0.0348, Training Accuracy: 99.02, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.87it/s]


Epoch [71/200], Validation Loss: 3.7709, Validation Accuracy: 55.49%



Iteration 0: Loss = 0.0272, Reg Loss = 9.3324, Reconstruct Loss = 0.0000, Cls Loss = 0.0262, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0334, Reg Loss = 9.3392, Reconstruct Loss = 0.0007, Cls Loss = 0.0318, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0333, Reg Loss = 9.1981, Reconstruct Loss = 0.0003, Cls Loss = 0.0321, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0334, Reg Loss = 9.1867, Reconstruct Loss = 0.0003, Cls Loss = 0.0321, Learning rate = 1.0000e-03
Epoch [72/200], Training Loss: 0.0340, Training Accuracy: 99.01, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.98it/s]


Epoch [72/200], Validation Loss: 3.9078, Validation Accuracy: 55.98%



Iteration 0: Loss = 0.0075, Reg Loss = 8.7688, Reconstruct Loss = 0.0000, Cls Loss = 0.0066, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0369, Reg Loss = 9.0774, Reconstruct Loss = 0.0005, Cls Loss = 0.0355, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0363, Reg Loss = 9.1961, Reconstruct Loss = 0.0005, Cls Loss = 0.0349, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0363, Reg Loss = 9.1533, Reconstruct Loss = 0.0005, Cls Loss = 0.0349, Learning rate = 1.0000e-03
Epoch [73/200], Training Loss: 0.0361, Training Accuracy: 98.89, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.96it/s]


Epoch [73/200], Validation Loss: 4.4345, Validation Accuracy: 54.84%



Iteration 0: Loss = 0.0482, Reg Loss = 8.9562, Reconstruct Loss = 0.0000, Cls Loss = 0.0473, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0333, Reg Loss = 9.1714, Reconstruct Loss = 0.0004, Cls Loss = 0.0320, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0340, Reg Loss = 9.1938, Reconstruct Loss = 0.0002, Cls Loss = 0.0329, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0345, Reg Loss = 9.0958, Reconstruct Loss = 0.0003, Cls Loss = 0.0332, Learning rate = 1.0000e-03
Epoch [74/200], Training Loss: 0.0342, Training Accuracy: 99.03, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 23.11it/s]


Epoch [74/200], Validation Loss: 3.5182, Validation Accuracy: 58.69%



Iteration 0: Loss = 0.0329, Reg Loss = 8.7868, Reconstruct Loss = 0.0000, Cls Loss = 0.0320, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0332, Reg Loss = 9.0294, Reconstruct Loss = 0.0009, Cls Loss = 0.0314, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0328, Reg Loss = 9.1345, Reconstruct Loss = 0.0006, Cls Loss = 0.0313, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0332, Reg Loss = 9.1999, Reconstruct Loss = 0.0005, Cls Loss = 0.0317, Learning rate = 1.0000e-03
Epoch [75/200], Training Loss: 0.0349, Training Accuracy: 98.98, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 23.00it/s]


Epoch [75/200], Validation Loss: 2.9964, Validation Accuracy: 61.17%



Iteration 0: Loss = 0.0483, Reg Loss = 9.2046, Reconstruct Loss = 0.0000, Cls Loss = 0.0473, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0368, Reg Loss = 9.4066, Reconstruct Loss = 0.0007, Cls Loss = 0.0351, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0351, Reg Loss = 9.3570, Reconstruct Loss = 0.0005, Cls Loss = 0.0337, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0355, Reg Loss = 9.3288, Reconstruct Loss = 0.0005, Cls Loss = 0.0342, Learning rate = 1.0000e-03
Epoch [76/200], Training Loss: 0.0361, Training Accuracy: 98.94, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.93it/s]


Epoch [76/200], Validation Loss: 3.7785, Validation Accuracy: 55.44%



Iteration 0: Loss = 0.0231, Reg Loss = 9.0154, Reconstruct Loss = 0.0000, Cls Loss = 0.0222, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0334, Reg Loss = 9.2474, Reconstruct Loss = 0.0002, Cls Loss = 0.0322, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0352, Reg Loss = 9.2790, Reconstruct Loss = 0.0003, Cls Loss = 0.0339, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0346, Reg Loss = 9.2122, Reconstruct Loss = 0.0004, Cls Loss = 0.0333, Learning rate = 1.0000e-03
Epoch [77/200], Training Loss: 0.0346, Training Accuracy: 98.96, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 23.07it/s]


Epoch [77/200], Validation Loss: 6.3099, Validation Accuracy: 44.76%



Iteration 0: Loss = 0.0467, Reg Loss = 8.9731, Reconstruct Loss = 0.0000, Cls Loss = 0.0458, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0350, Reg Loss = 8.9755, Reconstruct Loss = 0.0004, Cls Loss = 0.0337, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0341, Reg Loss = 8.9481, Reconstruct Loss = 0.0005, Cls Loss = 0.0327, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0340, Reg Loss = 9.0387, Reconstruct Loss = 0.0004, Cls Loss = 0.0327, Learning rate = 1.0000e-03
Epoch [78/200], Training Loss: 0.0339, Training Accuracy: 99.00, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 23.13it/s]


Epoch [78/200], Validation Loss: 3.7665, Validation Accuracy: 58.13%



Iteration 0: Loss = 0.0919, Reg Loss = 9.3624, Reconstruct Loss = 0.0000, Cls Loss = 0.0910, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0380, Reg Loss = 9.2323, Reconstruct Loss = 0.0003, Cls Loss = 0.0367, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0358, Reg Loss = 9.1983, Reconstruct Loss = 0.0005, Cls Loss = 0.0345, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0349, Reg Loss = 9.2339, Reconstruct Loss = 0.0003, Cls Loss = 0.0336, Learning rate = 1.0000e-03
Epoch [79/200], Training Loss: 0.0354, Training Accuracy: 98.93, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.97it/s]


Epoch [79/200], Validation Loss: 5.9393, Validation Accuracy: 47.83%



Iteration 0: Loss = 0.0339, Reg Loss = 9.5668, Reconstruct Loss = 0.0000, Cls Loss = 0.0329, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0325, Reg Loss = 9.3040, Reconstruct Loss = 0.0002, Cls Loss = 0.0314, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0341, Reg Loss = 9.2978, Reconstruct Loss = 0.0003, Cls Loss = 0.0329, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0339, Reg Loss = 9.3385, Reconstruct Loss = 0.0003, Cls Loss = 0.0326, Learning rate = 1.0000e-03
Epoch [80/200], Training Loss: 0.0337, Training Accuracy: 99.02, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.96it/s]


Epoch [80/200], Validation Loss: 3.3006, Validation Accuracy: 60.28%



Iteration 0: Loss = 0.0238, Reg Loss = 9.5364, Reconstruct Loss = 0.0000, Cls Loss = 0.0228, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0317, Reg Loss = 9.4098, Reconstruct Loss = 0.0004, Cls Loss = 0.0303, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0335, Reg Loss = 9.5100, Reconstruct Loss = 0.0003, Cls Loss = 0.0322, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0343, Reg Loss = 9.5486, Reconstruct Loss = 0.0004, Cls Loss = 0.0330, Learning rate = 1.0000e-03
Epoch [81/200], Training Loss: 0.0340, Training Accuracy: 98.98, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.92it/s]


Epoch [81/200], Validation Loss: 4.4927, Validation Accuracy: 55.94%



Iteration 0: Loss = 0.0287, Reg Loss = 9.3401, Reconstruct Loss = 0.0000, Cls Loss = 0.0278, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0356, Reg Loss = 9.4343, Reconstruct Loss = 0.0003, Cls Loss = 0.0343, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0346, Reg Loss = 9.4405, Reconstruct Loss = 0.0004, Cls Loss = 0.0332, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0348, Reg Loss = 9.4238, Reconstruct Loss = 0.0004, Cls Loss = 0.0335, Learning rate = 1.0000e-03
Epoch [82/200], Training Loss: 0.0345, Training Accuracy: 98.95, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 23.03it/s]


Epoch [82/200], Validation Loss: 3.8386, Validation Accuracy: 56.93%



Iteration 0: Loss = 0.0339, Reg Loss = 9.3455, Reconstruct Loss = 0.0000, Cls Loss = 0.0330, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0358, Reg Loss = 9.1994, Reconstruct Loss = 0.0004, Cls Loss = 0.0346, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0346, Reg Loss = 9.2283, Reconstruct Loss = 0.0004, Cls Loss = 0.0332, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0343, Reg Loss = 9.1909, Reconstruct Loss = 0.0005, Cls Loss = 0.0329, Learning rate = 1.0000e-03
Epoch [83/200], Training Loss: 0.0338, Training Accuracy: 98.96, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.93it/s]


Epoch [83/200], Validation Loss: 5.5226, Validation Accuracy: 47.78%



Iteration 0: Loss = 0.0221, Reg Loss = 9.7194, Reconstruct Loss = 0.0000, Cls Loss = 0.0212, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0307, Reg Loss = 9.5073, Reconstruct Loss = 0.0004, Cls Loss = 0.0294, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0326, Reg Loss = 9.4214, Reconstruct Loss = 0.0004, Cls Loss = 0.0312, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0329, Reg Loss = 9.3741, Reconstruct Loss = 0.0004, Cls Loss = 0.0315, Learning rate = 1.0000e-03
Epoch [84/200], Training Loss: 0.0335, Training Accuracy: 98.99, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 23.17it/s]


Epoch [84/200], Validation Loss: 3.9889, Validation Accuracy: 56.06%



Iteration 0: Loss = 0.0396, Reg Loss = 8.6286, Reconstruct Loss = 0.0000, Cls Loss = 0.0387, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0293, Reg Loss = 9.2685, Reconstruct Loss = 0.0002, Cls Loss = 0.0282, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0319, Reg Loss = 9.3604, Reconstruct Loss = 0.0002, Cls Loss = 0.0307, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0331, Reg Loss = 9.3804, Reconstruct Loss = 0.0003, Cls Loss = 0.0319, Learning rate = 1.0000e-03
Epoch [85/200], Training Loss: 0.0328, Training Accuracy: 99.03, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.94it/s]


Epoch [85/200], Validation Loss: 4.8053, Validation Accuracy: 51.07%



Iteration 0: Loss = 0.0194, Reg Loss = 9.2245, Reconstruct Loss = 0.0000, Cls Loss = 0.0185, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0327, Reg Loss = 9.1152, Reconstruct Loss = 0.0002, Cls Loss = 0.0316, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0344, Reg Loss = 9.2234, Reconstruct Loss = 0.0006, Cls Loss = 0.0329, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0347, Reg Loss = 9.1193, Reconstruct Loss = 0.0005, Cls Loss = 0.0333, Learning rate = 1.0000e-03
Epoch [86/200], Training Loss: 0.0340, Training Accuracy: 98.99, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 23.05it/s]


Epoch [86/200], Validation Loss: 4.5410, Validation Accuracy: 51.10%



Iteration 0: Loss = 0.0379, Reg Loss = 8.7174, Reconstruct Loss = 0.0000, Cls Loss = 0.0370, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0349, Reg Loss = 9.1136, Reconstruct Loss = 0.0003, Cls Loss = 0.0336, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0346, Reg Loss = 9.1325, Reconstruct Loss = 0.0003, Cls Loss = 0.0334, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0348, Reg Loss = 9.0907, Reconstruct Loss = 0.0004, Cls Loss = 0.0334, Learning rate = 1.0000e-03
Epoch [87/200], Training Loss: 0.0345, Training Accuracy: 98.98, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.98it/s]


Epoch [87/200], Validation Loss: 6.1568, Validation Accuracy: 43.18%



Iteration 0: Loss = 0.0229, Reg Loss = 9.1000, Reconstruct Loss = 0.0000, Cls Loss = 0.0220, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0328, Reg Loss = 9.2677, Reconstruct Loss = 0.0002, Cls Loss = 0.0317, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0326, Reg Loss = 9.3501, Reconstruct Loss = 0.0003, Cls Loss = 0.0314, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0328, Reg Loss = 9.3656, Reconstruct Loss = 0.0004, Cls Loss = 0.0314, Learning rate = 1.0000e-03
Epoch [88/200], Training Loss: 0.0331, Training Accuracy: 99.03, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 23.09it/s]


Epoch [88/200], Validation Loss: 5.8399, Validation Accuracy: 44.48%



Iteration 0: Loss = 0.0560, Reg Loss = 9.1369, Reconstruct Loss = 0.0000, Cls Loss = 0.0550, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0305, Reg Loss = 9.3463, Reconstruct Loss = 0.0006, Cls Loss = 0.0289, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0312, Reg Loss = 9.3850, Reconstruct Loss = 0.0005, Cls Loss = 0.0298, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0312, Reg Loss = 9.3802, Reconstruct Loss = 0.0006, Cls Loss = 0.0296, Learning rate = 1.0000e-03
Epoch [89/200], Training Loss: 0.0324, Training Accuracy: 99.09, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 23.07it/s]


Epoch [89/200], Validation Loss: 5.2921, Validation Accuracy: 48.00%



Iteration 0: Loss = 0.0237, Reg Loss = 9.2494, Reconstruct Loss = 0.0000, Cls Loss = 0.0228, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0344, Reg Loss = 9.4287, Reconstruct Loss = 0.0008, Cls Loss = 0.0327, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0351, Reg Loss = 9.3569, Reconstruct Loss = 0.0005, Cls Loss = 0.0336, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0339, Reg Loss = 9.3729, Reconstruct Loss = 0.0005, Cls Loss = 0.0324, Learning rate = 1.0000e-03
Epoch [90/200], Training Loss: 0.0336, Training Accuracy: 98.99, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 23.02it/s]


Epoch [90/200], Validation Loss: 4.9545, Validation Accuracy: 51.92%



Iteration 0: Loss = 0.0340, Reg Loss = 9.5017, Reconstruct Loss = 0.0000, Cls Loss = 0.0330, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0345, Reg Loss = 9.4671, Reconstruct Loss = 0.0002, Cls Loss = 0.0334, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0354, Reg Loss = 9.6263, Reconstruct Loss = 0.0004, Cls Loss = 0.0340, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0342, Reg Loss = 9.6378, Reconstruct Loss = 0.0004, Cls Loss = 0.0328, Learning rate = 1.0000e-03
Epoch [91/200], Training Loss: 0.0335, Training Accuracy: 98.98, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.89it/s]


Epoch [91/200], Validation Loss: 4.7600, Validation Accuracy: 47.47%



Iteration 0: Loss = 0.0167, Reg Loss = 9.2171, Reconstruct Loss = 0.0000, Cls Loss = 0.0158, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0351, Reg Loss = 9.6037, Reconstruct Loss = 0.0008, Cls Loss = 0.0334, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0331, Reg Loss = 9.5811, Reconstruct Loss = 0.0006, Cls Loss = 0.0316, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0326, Reg Loss = 9.5790, Reconstruct Loss = 0.0006, Cls Loss = 0.0310, Learning rate = 1.0000e-03
Epoch [92/200], Training Loss: 0.0323, Training Accuracy: 99.06, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 23.11it/s]


Epoch [92/200], Validation Loss: 4.3749, Validation Accuracy: 54.30%



Iteration 0: Loss = 0.0465, Reg Loss = 9.0554, Reconstruct Loss = 0.0000, Cls Loss = 0.0456, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0325, Reg Loss = 9.2447, Reconstruct Loss = 0.0006, Cls Loss = 0.0310, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0326, Reg Loss = 9.2923, Reconstruct Loss = 0.0005, Cls Loss = 0.0312, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0332, Reg Loss = 9.2720, Reconstruct Loss = 0.0004, Cls Loss = 0.0318, Learning rate = 1.0000e-03
Epoch [93/200], Training Loss: 0.0333, Training Accuracy: 99.00, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.95it/s]


Epoch [93/200], Validation Loss: 3.3277, Validation Accuracy: 60.70%



Iteration 0: Loss = 0.0713, Reg Loss = 9.8040, Reconstruct Loss = 0.0000, Cls Loss = 0.0703, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0322, Reg Loss = 9.3896, Reconstruct Loss = 0.0004, Cls Loss = 0.0309, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0326, Reg Loss = 9.6792, Reconstruct Loss = 0.0005, Cls Loss = 0.0311, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0325, Reg Loss = 9.5636, Reconstruct Loss = 0.0004, Cls Loss = 0.0311, Learning rate = 1.0000e-03
Epoch [94/200], Training Loss: 0.0323, Training Accuracy: 99.05, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.93it/s]


Epoch [94/200], Validation Loss: 6.0033, Validation Accuracy: 43.07%



Iteration 0: Loss = 0.0323, Reg Loss = 9.5185, Reconstruct Loss = 0.0000, Cls Loss = 0.0313, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0336, Reg Loss = 9.4094, Reconstruct Loss = 0.0002, Cls Loss = 0.0325, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0333, Reg Loss = 9.3149, Reconstruct Loss = 0.0002, Cls Loss = 0.0321, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0330, Reg Loss = 9.2759, Reconstruct Loss = 0.0002, Cls Loss = 0.0319, Learning rate = 1.0000e-03
Epoch [95/200], Training Loss: 0.0327, Training Accuracy: 99.06, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 23.05it/s]


Epoch [95/200], Validation Loss: 4.0065, Validation Accuracy: 50.90%



Iteration 0: Loss = 0.0120, Reg Loss = 9.4860, Reconstruct Loss = 0.0000, Cls Loss = 0.0111, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0325, Reg Loss = 9.5727, Reconstruct Loss = 0.0004, Cls Loss = 0.0312, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0316, Reg Loss = 9.4109, Reconstruct Loss = 0.0003, Cls Loss = 0.0303, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0321, Reg Loss = 9.3621, Reconstruct Loss = 0.0003, Cls Loss = 0.0309, Learning rate = 1.0000e-03
Epoch [96/200], Training Loss: 0.0329, Training Accuracy: 99.03, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.95it/s]


Epoch [96/200], Validation Loss: 3.0265, Validation Accuracy: 61.79%



Iteration 0: Loss = 0.0492, Reg Loss = 8.7267, Reconstruct Loss = 0.0000, Cls Loss = 0.0483, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0358, Reg Loss = 9.2926, Reconstruct Loss = 0.0006, Cls Loss = 0.0342, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0339, Reg Loss = 9.2078, Reconstruct Loss = 0.0006, Cls Loss = 0.0323, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0337, Reg Loss = 9.2516, Reconstruct Loss = 0.0005, Cls Loss = 0.0323, Learning rate = 1.0000e-03
Epoch [97/200], Training Loss: 0.0337, Training Accuracy: 98.94, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.94it/s]


Epoch [97/200], Validation Loss: 3.6039, Validation Accuracy: 57.22%



Iteration 0: Loss = 0.0126, Reg Loss = 9.3737, Reconstruct Loss = 0.0000, Cls Loss = 0.0117, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0281, Reg Loss = 9.2777, Reconstruct Loss = 0.0004, Cls Loss = 0.0268, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0310, Reg Loss = 9.3080, Reconstruct Loss = 0.0005, Cls Loss = 0.0296, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0317, Reg Loss = 9.2700, Reconstruct Loss = 0.0004, Cls Loss = 0.0304, Learning rate = 1.0000e-03
Epoch [98/200], Training Loss: 0.0325, Training Accuracy: 98.98, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.87it/s]


Epoch [98/200], Validation Loss: 2.6770, Validation Accuracy: 63.35%



Iteration 0: Loss = 0.0264, Reg Loss = 8.5813, Reconstruct Loss = 0.0000, Cls Loss = 0.0255, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0307, Reg Loss = 8.9550, Reconstruct Loss = 0.0005, Cls Loss = 0.0293, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0316, Reg Loss = 9.1072, Reconstruct Loss = 0.0003, Cls Loss = 0.0304, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0323, Reg Loss = 9.2824, Reconstruct Loss = 0.0003, Cls Loss = 0.0312, Learning rate = 1.0000e-03
Epoch [99/200], Training Loss: 0.0319, Training Accuracy: 99.10, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 23.02it/s]


Epoch [99/200], Validation Loss: 4.5899, Validation Accuracy: 53.74%



Iteration 0: Loss = 0.0141, Reg Loss = 9.4855, Reconstruct Loss = 0.0000, Cls Loss = 0.0132, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0347, Reg Loss = 9.6541, Reconstruct Loss = 0.0006, Cls Loss = 0.0332, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0326, Reg Loss = 9.8124, Reconstruct Loss = 0.0005, Cls Loss = 0.0312, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0326, Reg Loss = 9.8677, Reconstruct Loss = 0.0005, Cls Loss = 0.0311, Learning rate = 1.0000e-03
Epoch [100/200], Training Loss: 0.0320, Training Accuracy: 99.03, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.85it/s]


Epoch [100/200], Validation Loss: 5.1760, Validation Accuracy: 51.89%



Iteration 0: Loss = 0.0706, Reg Loss = 9.1231, Reconstruct Loss = 0.0000, Cls Loss = 0.0697, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0335, Reg Loss = 9.6107, Reconstruct Loss = 0.0004, Cls Loss = 0.0322, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0346, Reg Loss = 9.7159, Reconstruct Loss = 0.0005, Cls Loss = 0.0332, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0339, Reg Loss = 9.6968, Reconstruct Loss = 0.0005, Cls Loss = 0.0324, Learning rate = 1.0000e-03
Epoch [101/200], Training Loss: 0.0330, Training Accuracy: 99.03, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 23.05it/s]


Epoch [101/200], Validation Loss: 5.3213, Validation Accuracy: 51.83%



Iteration 0: Loss = 0.0406, Reg Loss = 9.1021, Reconstruct Loss = 0.0000, Cls Loss = 0.0396, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0307, Reg Loss = 9.4514, Reconstruct Loss = 0.0003, Cls Loss = 0.0295, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0312, Reg Loss = 9.5166, Reconstruct Loss = 0.0003, Cls Loss = 0.0300, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0318, Reg Loss = 9.5501, Reconstruct Loss = 0.0003, Cls Loss = 0.0305, Learning rate = 1.0000e-03
Epoch [102/200], Training Loss: 0.0319, Training Accuracy: 99.06, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 23.00it/s]


Epoch [102/200], Validation Loss: 5.7246, Validation Accuracy: 47.46%



Iteration 0: Loss = 0.0127, Reg Loss = 9.0798, Reconstruct Loss = 0.0000, Cls Loss = 0.0118, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0329, Reg Loss = 9.7749, Reconstruct Loss = 0.0004, Cls Loss = 0.0315, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0330, Reg Loss = 9.6129, Reconstruct Loss = 0.0004, Cls Loss = 0.0317, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0324, Reg Loss = 9.5290, Reconstruct Loss = 0.0003, Cls Loss = 0.0311, Learning rate = 1.0000e-03
Epoch [103/200], Training Loss: 0.0327, Training Accuracy: 99.09, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.95it/s]


Epoch [103/200], Validation Loss: 3.6233, Validation Accuracy: 58.69%



Iteration 0: Loss = 0.0177, Reg Loss = 9.1974, Reconstruct Loss = 0.0000, Cls Loss = 0.0168, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0338, Reg Loss = 9.4638, Reconstruct Loss = 0.0007, Cls Loss = 0.0321, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0321, Reg Loss = 9.5621, Reconstruct Loss = 0.0006, Cls Loss = 0.0305, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0321, Reg Loss = 9.5143, Reconstruct Loss = 0.0005, Cls Loss = 0.0306, Learning rate = 1.0000e-03
Epoch [104/200], Training Loss: 0.0324, Training Accuracy: 99.04, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 23.08it/s]


Epoch [104/200], Validation Loss: 4.6012, Validation Accuracy: 51.77%



Iteration 0: Loss = 0.0386, Reg Loss = 8.9366, Reconstruct Loss = 0.0000, Cls Loss = 0.0377, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0322, Reg Loss = 9.2863, Reconstruct Loss = 0.0005, Cls Loss = 0.0307, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0320, Reg Loss = 9.4129, Reconstruct Loss = 0.0004, Cls Loss = 0.0306, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0317, Reg Loss = 9.5126, Reconstruct Loss = 0.0004, Cls Loss = 0.0304, Learning rate = 1.0000e-03
Epoch [105/200], Training Loss: 0.0318, Training Accuracy: 99.10, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 23.00it/s]


Epoch [105/200], Validation Loss: 5.7958, Validation Accuracy: 46.14%



Iteration 0: Loss = 0.0270, Reg Loss = 9.0953, Reconstruct Loss = 0.0000, Cls Loss = 0.0261, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0317, Reg Loss = 9.3797, Reconstruct Loss = 0.0005, Cls Loss = 0.0303, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0325, Reg Loss = 9.3678, Reconstruct Loss = 0.0005, Cls Loss = 0.0311, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0328, Reg Loss = 9.3459, Reconstruct Loss = 0.0005, Cls Loss = 0.0314, Learning rate = 1.0000e-03
Epoch [106/200], Training Loss: 0.0334, Training Accuracy: 99.01, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.97it/s]


Epoch [106/200], Validation Loss: 4.4193, Validation Accuracy: 51.89%



Iteration 0: Loss = 0.0149, Reg Loss = 9.1901, Reconstruct Loss = 0.0000, Cls Loss = 0.0140, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0313, Reg Loss = 9.5693, Reconstruct Loss = 0.0006, Cls Loss = 0.0298, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0314, Reg Loss = 9.4974, Reconstruct Loss = 0.0005, Cls Loss = 0.0299, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0308, Reg Loss = 9.6043, Reconstruct Loss = 0.0006, Cls Loss = 0.0292, Learning rate = 1.0000e-03
Epoch [107/200], Training Loss: 0.0312, Training Accuracy: 99.09, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.94it/s]


Epoch [107/200], Validation Loss: 4.3219, Validation Accuracy: 54.43%



Iteration 0: Loss = 0.0650, Reg Loss = 9.8160, Reconstruct Loss = 0.0000, Cls Loss = 0.0640, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0295, Reg Loss = 9.2838, Reconstruct Loss = 0.0002, Cls Loss = 0.0284, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0318, Reg Loss = 9.2535, Reconstruct Loss = 0.0003, Cls Loss = 0.0306, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0316, Reg Loss = 9.2501, Reconstruct Loss = 0.0004, Cls Loss = 0.0303, Learning rate = 1.0000e-03
Epoch [108/200], Training Loss: 0.0319, Training Accuracy: 99.06, Learning Rate: 0.001000


100%|██████████| 79/79 [00:03<00:00, 22.91it/s]


Epoch [108/200], Validation Loss: 3.3205, Validation Accuracy: 58.28%



Iteration 0: Loss = 0.0300, Reg Loss = 9.3198, Reconstruct Loss = 0.0000, Cls Loss = 0.0290, Learning rate = 1.0000e-03
Iteration 100: Loss = 0.0306, Reg Loss = 9.7489, Reconstruct Loss = 0.0002, Cls Loss = 0.0294, Learning rate = 1.0000e-03
Iteration 200: Loss = 0.0325, Reg Loss = 9.6548, Reconstruct Loss = 0.0003, Cls Loss = 0.0313, Learning rate = 1.0000e-03
Iteration 300: Loss = 0.0321, Reg Loss = 9.6015, Reconstruct Loss = 0.0002, Cls Loss = 0.0309, Learning rate = 1.0000e-03
Epoch [109/200], Training Loss: 0.0326, Training Accuracy: 99.10, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 22.80it/s]


Epoch [109/200], Validation Loss: 3.9984, Validation Accuracy: 56.87%



Iteration 0: Loss = 0.0209, Reg Loss = 9.3501, Reconstruct Loss = 0.0000, Cls Loss = 0.0199, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0281, Reg Loss = 9.1990, Reconstruct Loss = 0.0004, Cls Loss = 0.0267, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0283, Reg Loss = 9.2013, Reconstruct Loss = 0.0004, Cls Loss = 0.0270, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0279, Reg Loss = 9.1282, Reconstruct Loss = 0.0003, Cls Loss = 0.0267, Learning rate = 1.0000e-04
Epoch [110/200], Training Loss: 0.0277, Training Accuracy: 99.22, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 22.98it/s]


Epoch [110/200], Validation Loss: 4.9305, Validation Accuracy: 47.58%



Iteration 0: Loss = 0.0237, Reg Loss = 9.1201, Reconstruct Loss = 0.0000, Cls Loss = 0.0228, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0289, Reg Loss = 9.0358, Reconstruct Loss = 0.0003, Cls Loss = 0.0277, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0291, Reg Loss = 8.9955, Reconstruct Loss = 0.0003, Cls Loss = 0.0279, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0294, Reg Loss = 8.9574, Reconstruct Loss = 0.0002, Cls Loss = 0.0283, Learning rate = 1.0000e-04
Epoch [111/200], Training Loss: 0.0293, Training Accuracy: 99.08, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 22.91it/s]


Epoch [111/200], Validation Loss: 2.7134, Validation Accuracy: 62.16%



Iteration 0: Loss = 0.0191, Reg Loss = 8.8621, Reconstruct Loss = 0.0000, Cls Loss = 0.0182, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0319, Reg Loss = 9.0401, Reconstruct Loss = 0.0003, Cls Loss = 0.0307, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0302, Reg Loss = 9.0810, Reconstruct Loss = 0.0004, Cls Loss = 0.0288, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0283, Reg Loss = 9.0151, Reconstruct Loss = 0.0003, Cls Loss = 0.0271, Learning rate = 1.0000e-04
Epoch [112/200], Training Loss: 0.0285, Training Accuracy: 99.19, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 22.86it/s]


Epoch [112/200], Validation Loss: 3.4154, Validation Accuracy: 54.66%



Iteration 0: Loss = 0.0380, Reg Loss = 10.7513, Reconstruct Loss = 0.0082, Cls Loss = 0.0287, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0290, Reg Loss = 8.8994, Reconstruct Loss = 0.0003, Cls Loss = 0.0278, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0273, Reg Loss = 8.9022, Reconstruct Loss = 0.0003, Cls Loss = 0.0261, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0280, Reg Loss = 8.9009, Reconstruct Loss = 0.0004, Cls Loss = 0.0267, Learning rate = 1.0000e-04
Epoch [113/200], Training Loss: 0.0282, Training Accuracy: 99.17, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 23.01it/s]


Epoch [113/200], Validation Loss: 1.4490, Validation Accuracy: 72.34%



Checkpoint saved at epoch 112 with accuracy: 72.34%
Iteration 0: Loss = 0.0320, Reg Loss = 8.8375, Reconstruct Loss = 0.0000, Cls Loss = 0.0311, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0299, Reg Loss = 8.8271, Reconstruct Loss = 0.0003, Cls Loss = 0.0287, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0287, Reg Loss = 8.8449, Reconstruct Loss = 0.0004, Cls Loss = 0.0274, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0286, Reg Loss = 8.8446, Reconstruct Loss = 0.0004, Cls Loss = 0.0273, Learning rate = 1.0000e-04
Epoch [114/200], Training Loss: 0.0289, Training Accuracy: 99.13, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 22.96it/s]


Epoch [114/200], Validation Loss: 1.8487, Validation Accuracy: 69.25%



Iteration 0: Loss = 0.0406, Reg Loss = 8.8316, Reconstruct Loss = 0.0000, Cls Loss = 0.0397, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0269, Reg Loss = 8.7108, Reconstruct Loss = 0.0004, Cls Loss = 0.0256, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0274, Reg Loss = 8.7360, Reconstruct Loss = 0.0004, Cls Loss = 0.0261, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0276, Reg Loss = 8.7151, Reconstruct Loss = 0.0004, Cls Loss = 0.0263, Learning rate = 1.0000e-04
Epoch [115/200], Training Loss: 0.0281, Training Accuracy: 99.20, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 23.03it/s]


Epoch [115/200], Validation Loss: 1.9041, Validation Accuracy: 68.33%



Iteration 0: Loss = 0.0173, Reg Loss = 8.3245, Reconstruct Loss = 0.0000, Cls Loss = 0.0164, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0291, Reg Loss = 8.5992, Reconstruct Loss = 0.0002, Cls Loss = 0.0280, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0282, Reg Loss = 8.6055, Reconstruct Loss = 0.0003, Cls Loss = 0.0270, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0278, Reg Loss = 8.5963, Reconstruct Loss = 0.0003, Cls Loss = 0.0267, Learning rate = 1.0000e-04
Epoch [116/200], Training Loss: 0.0275, Training Accuracy: 99.20, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 23.02it/s]


Epoch [116/200], Validation Loss: 2.4442, Validation Accuracy: 64.84%



Iteration 0: Loss = 0.0364, Reg Loss = 8.3317, Reconstruct Loss = 0.0000, Cls Loss = 0.0355, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0266, Reg Loss = 8.5467, Reconstruct Loss = 0.0003, Cls Loss = 0.0254, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0275, Reg Loss = 8.5814, Reconstruct Loss = 0.0003, Cls Loss = 0.0263, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0276, Reg Loss = 8.5694, Reconstruct Loss = 0.0004, Cls Loss = 0.0264, Learning rate = 1.0000e-04
Epoch [117/200], Training Loss: 0.0272, Training Accuracy: 99.23, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 22.96it/s]


Epoch [117/200], Validation Loss: 2.0820, Validation Accuracy: 67.45%



Iteration 0: Loss = 0.0133, Reg Loss = 8.7524, Reconstruct Loss = 0.0000, Cls Loss = 0.0124, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0253, Reg Loss = 8.5623, Reconstruct Loss = 0.0003, Cls Loss = 0.0242, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0260, Reg Loss = 8.5374, Reconstruct Loss = 0.0002, Cls Loss = 0.0249, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0269, Reg Loss = 8.5010, Reconstruct Loss = 0.0002, Cls Loss = 0.0258, Learning rate = 1.0000e-04
Epoch [118/200], Training Loss: 0.0268, Training Accuracy: 99.26, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 23.11it/s]


Epoch [118/200], Validation Loss: 2.3996, Validation Accuracy: 64.47%



Iteration 0: Loss = 0.0195, Reg Loss = 8.4149, Reconstruct Loss = 0.0000, Cls Loss = 0.0187, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0258, Reg Loss = 8.3868, Reconstruct Loss = 0.0003, Cls Loss = 0.0247, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0256, Reg Loss = 8.3681, Reconstruct Loss = 0.0002, Cls Loss = 0.0246, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0259, Reg Loss = 8.4001, Reconstruct Loss = 0.0003, Cls Loss = 0.0247, Learning rate = 1.0000e-04
Epoch [119/200], Training Loss: 0.0271, Training Accuracy: 99.21, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 22.84it/s]


Epoch [119/200], Validation Loss: 2.4816, Validation Accuracy: 64.42%



Iteration 0: Loss = 0.0333, Reg Loss = 8.0990, Reconstruct Loss = 0.0000, Cls Loss = 0.0324, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0281, Reg Loss = 8.2684, Reconstruct Loss = 0.0003, Cls Loss = 0.0270, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0272, Reg Loss = 8.2513, Reconstruct Loss = 0.0002, Cls Loss = 0.0261, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0281, Reg Loss = 8.2319, Reconstruct Loss = 0.0002, Cls Loss = 0.0271, Learning rate = 1.0000e-04
Epoch [120/200], Training Loss: 0.0284, Training Accuracy: 99.17, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 22.98it/s]


Epoch [120/200], Validation Loss: 2.7126, Validation Accuracy: 60.95%



Iteration 0: Loss = 0.0249, Reg Loss = 10.1056, Reconstruct Loss = 0.0074, Cls Loss = 0.0164, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0261, Reg Loss = 8.2018, Reconstruct Loss = 0.0004, Cls Loss = 0.0248, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0270, Reg Loss = 8.2071, Reconstruct Loss = 0.0005, Cls Loss = 0.0257, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0279, Reg Loss = 8.1703, Reconstruct Loss = 0.0004, Cls Loss = 0.0267, Learning rate = 1.0000e-04
Epoch [121/200], Training Loss: 0.0279, Training Accuracy: 99.17, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 22.95it/s]


Epoch [121/200], Validation Loss: 2.2986, Validation Accuracy: 64.83%



Iteration 0: Loss = 0.0228, Reg Loss = 7.7663, Reconstruct Loss = 0.0000, Cls Loss = 0.0220, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0272, Reg Loss = 8.0644, Reconstruct Loss = 0.0002, Cls Loss = 0.0262, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0271, Reg Loss = 8.0360, Reconstruct Loss = 0.0002, Cls Loss = 0.0261, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0265, Reg Loss = 8.0133, Reconstruct Loss = 0.0002, Cls Loss = 0.0255, Learning rate = 1.0000e-04
Epoch [122/200], Training Loss: 0.0268, Training Accuracy: 99.24, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 22.83it/s]


Epoch [122/200], Validation Loss: 2.3156, Validation Accuracy: 64.81%



Iteration 0: Loss = 0.0359, Reg Loss = 7.9799, Reconstruct Loss = 0.0000, Cls Loss = 0.0351, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0254, Reg Loss = 7.9270, Reconstruct Loss = 0.0002, Cls Loss = 0.0245, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0260, Reg Loss = 7.9927, Reconstruct Loss = 0.0004, Cls Loss = 0.0248, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0258, Reg Loss = 7.9621, Reconstruct Loss = 0.0003, Cls Loss = 0.0246, Learning rate = 1.0000e-04
Epoch [123/200], Training Loss: 0.0259, Training Accuracy: 99.27, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 23.05it/s]


Epoch [123/200], Validation Loss: 2.1995, Validation Accuracy: 64.28%



Iteration 0: Loss = 0.0149, Reg Loss = 7.5712, Reconstruct Loss = 0.0000, Cls Loss = 0.0142, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0253, Reg Loss = 7.8466, Reconstruct Loss = 0.0001, Cls Loss = 0.0244, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0261, Reg Loss = 7.8677, Reconstruct Loss = 0.0002, Cls Loss = 0.0251, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0272, Reg Loss = 7.8716, Reconstruct Loss = 0.0002, Cls Loss = 0.0262, Learning rate = 1.0000e-04
Epoch [124/200], Training Loss: 0.0273, Training Accuracy: 99.22, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 23.19it/s]


Epoch [124/200], Validation Loss: 1.6430, Validation Accuracy: 70.01%



Iteration 0: Loss = 0.0571, Reg Loss = 7.9548, Reconstruct Loss = 0.0000, Cls Loss = 0.0563, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0284, Reg Loss = 7.8567, Reconstruct Loss = 0.0004, Cls Loss = 0.0273, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0273, Reg Loss = 7.8182, Reconstruct Loss = 0.0003, Cls Loss = 0.0262, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0269, Reg Loss = 7.8205, Reconstruct Loss = 0.0003, Cls Loss = 0.0258, Learning rate = 1.0000e-04
Epoch [125/200], Training Loss: 0.0274, Training Accuracy: 99.20, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 23.02it/s]


Epoch [125/200], Validation Loss: 2.3042, Validation Accuracy: 63.74%



Iteration 0: Loss = 0.0610, Reg Loss = 7.7778, Reconstruct Loss = 0.0000, Cls Loss = 0.0602, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0296, Reg Loss = 7.7191, Reconstruct Loss = 0.0003, Cls Loss = 0.0285, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0293, Reg Loss = 7.7087, Reconstruct Loss = 0.0003, Cls Loss = 0.0282, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0284, Reg Loss = 7.7249, Reconstruct Loss = 0.0003, Cls Loss = 0.0273, Learning rate = 1.0000e-04
Epoch [126/200], Training Loss: 0.0278, Training Accuracy: 99.15, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 22.82it/s]


Epoch [126/200], Validation Loss: 2.2986, Validation Accuracy: 64.54%



Iteration 0: Loss = 0.0324, Reg Loss = 7.8525, Reconstruct Loss = 0.0000, Cls Loss = 0.0316, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0268, Reg Loss = 7.6974, Reconstruct Loss = 0.0001, Cls Loss = 0.0259, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0275, Reg Loss = 7.7413, Reconstruct Loss = 0.0003, Cls Loss = 0.0264, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0277, Reg Loss = 7.7594, Reconstruct Loss = 0.0004, Cls Loss = 0.0265, Learning rate = 1.0000e-04
Epoch [127/200], Training Loss: 0.0278, Training Accuracy: 99.21, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 23.02it/s]


Epoch [127/200], Validation Loss: 2.0259, Validation Accuracy: 65.93%



Iteration 0: Loss = 0.0160, Reg Loss = 7.3465, Reconstruct Loss = 0.0000, Cls Loss = 0.0153, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0306, Reg Loss = 7.7083, Reconstruct Loss = 0.0005, Cls Loss = 0.0294, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0280, Reg Loss = 7.6648, Reconstruct Loss = 0.0004, Cls Loss = 0.0269, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0281, Reg Loss = 7.6324, Reconstruct Loss = 0.0003, Cls Loss = 0.0270, Learning rate = 1.0000e-04
Epoch [128/200], Training Loss: 0.0272, Training Accuracy: 99.22, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 23.05it/s]


Epoch [128/200], Validation Loss: 1.6693, Validation Accuracy: 70.12%



Iteration 0: Loss = 0.0221, Reg Loss = 7.3408, Reconstruct Loss = 0.0000, Cls Loss = 0.0214, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0291, Reg Loss = 7.5424, Reconstruct Loss = 0.0003, Cls Loss = 0.0281, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0276, Reg Loss = 7.5006, Reconstruct Loss = 0.0002, Cls Loss = 0.0266, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0272, Reg Loss = 7.4965, Reconstruct Loss = 0.0002, Cls Loss = 0.0262, Learning rate = 1.0000e-04
Epoch [129/200], Training Loss: 0.0273, Training Accuracy: 99.20, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 23.08it/s]


Epoch [129/200], Validation Loss: 1.4784, Validation Accuracy: 71.45%



Iteration 0: Loss = 0.0447, Reg Loss = 7.4543, Reconstruct Loss = 0.0000, Cls Loss = 0.0440, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0285, Reg Loss = 7.4523, Reconstruct Loss = 0.0004, Cls Loss = 0.0274, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0277, Reg Loss = 7.4462, Reconstruct Loss = 0.0003, Cls Loss = 0.0267, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0273, Reg Loss = 7.4608, Reconstruct Loss = 0.0003, Cls Loss = 0.0263, Learning rate = 1.0000e-04
Epoch [130/200], Training Loss: 0.0268, Training Accuracy: 99.21, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 22.96it/s]


Epoch [130/200], Validation Loss: 1.2553, Validation Accuracy: 72.81%



Checkpoint saved at epoch 129 with accuracy: 72.81%
Iteration 0: Loss = 0.0443, Reg Loss = 7.3770, Reconstruct Loss = 0.0000, Cls Loss = 0.0435, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0295, Reg Loss = 7.3859, Reconstruct Loss = 0.0002, Cls Loss = 0.0285, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0283, Reg Loss = 7.3734, Reconstruct Loss = 0.0002, Cls Loss = 0.0274, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0283, Reg Loss = 7.3854, Reconstruct Loss = 0.0002, Cls Loss = 0.0273, Learning rate = 1.0000e-04
Epoch [131/200], Training Loss: 0.0275, Training Accuracy: 99.16, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 22.81it/s]


Epoch [131/200], Validation Loss: 1.7425, Validation Accuracy: 68.60%



Iteration 0: Loss = 0.0184, Reg Loss = 7.4320, Reconstruct Loss = 0.0000, Cls Loss = 0.0177, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0260, Reg Loss = 7.3433, Reconstruct Loss = 0.0003, Cls Loss = 0.0250, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0263, Reg Loss = 7.3306, Reconstruct Loss = 0.0003, Cls Loss = 0.0253, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0263, Reg Loss = 7.3358, Reconstruct Loss = 0.0003, Cls Loss = 0.0253, Learning rate = 1.0000e-04
Epoch [132/200], Training Loss: 0.0267, Training Accuracy: 99.18, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 22.97it/s]


Epoch [132/200], Validation Loss: 1.3963, Validation Accuracy: 71.14%



Iteration 0: Loss = 0.0269, Reg Loss = 7.3898, Reconstruct Loss = 0.0000, Cls Loss = 0.0262, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0266, Reg Loss = 7.3714, Reconstruct Loss = 0.0004, Cls Loss = 0.0255, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0256, Reg Loss = 7.3279, Reconstruct Loss = 0.0003, Cls Loss = 0.0246, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0258, Reg Loss = 7.3125, Reconstruct Loss = 0.0003, Cls Loss = 0.0248, Learning rate = 1.0000e-04
Epoch [133/200], Training Loss: 0.0266, Training Accuracy: 99.26, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 22.98it/s]


Epoch [133/200], Validation Loss: 2.1022, Validation Accuracy: 65.38%



Iteration 0: Loss = 0.0459, Reg Loss = 7.2555, Reconstruct Loss = 0.0000, Cls Loss = 0.0452, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0269, Reg Loss = 7.3002, Reconstruct Loss = 0.0004, Cls Loss = 0.0258, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0271, Reg Loss = 7.3092, Reconstruct Loss = 0.0004, Cls Loss = 0.0260, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0269, Reg Loss = 7.2893, Reconstruct Loss = 0.0004, Cls Loss = 0.0257, Learning rate = 1.0000e-04
Epoch [134/200], Training Loss: 0.0265, Training Accuracy: 99.27, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 23.02it/s]


Epoch [134/200], Validation Loss: 1.8920, Validation Accuracy: 66.85%



Iteration 0: Loss = 0.0270, Reg Loss = 6.9779, Reconstruct Loss = 0.0000, Cls Loss = 0.0263, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0272, Reg Loss = 7.1708, Reconstruct Loss = 0.0001, Cls Loss = 0.0264, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0268, Reg Loss = 7.2120, Reconstruct Loss = 0.0002, Cls Loss = 0.0259, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0273, Reg Loss = 7.2154, Reconstruct Loss = 0.0002, Cls Loss = 0.0263, Learning rate = 1.0000e-04
Epoch [135/200], Training Loss: 0.0271, Training Accuracy: 99.22, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 23.06it/s]


Epoch [135/200], Validation Loss: 2.2589, Validation Accuracy: 61.64%



Iteration 0: Loss = 0.0333, Reg Loss = 7.0042, Reconstruct Loss = 0.0000, Cls Loss = 0.0326, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0290, Reg Loss = 7.2056, Reconstruct Loss = 0.0004, Cls Loss = 0.0279, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0271, Reg Loss = 7.1743, Reconstruct Loss = 0.0002, Cls Loss = 0.0261, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0263, Reg Loss = 7.1596, Reconstruct Loss = 0.0002, Cls Loss = 0.0254, Learning rate = 1.0000e-04
Epoch [136/200], Training Loss: 0.0258, Training Accuracy: 99.27, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 22.86it/s]


Epoch [136/200], Validation Loss: 1.8226, Validation Accuracy: 65.98%



Iteration 0: Loss = 0.0163, Reg Loss = 7.0305, Reconstruct Loss = 0.0000, Cls Loss = 0.0156, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0266, Reg Loss = 7.1603, Reconstruct Loss = 0.0003, Cls Loss = 0.0256, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0269, Reg Loss = 7.1250, Reconstruct Loss = 0.0003, Cls Loss = 0.0259, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0269, Reg Loss = 7.0883, Reconstruct Loss = 0.0002, Cls Loss = 0.0260, Learning rate = 1.0000e-04
Epoch [137/200], Training Loss: 0.0269, Training Accuracy: 99.20, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 22.96it/s]


Epoch [137/200], Validation Loss: 1.2317, Validation Accuracy: 72.45%



Iteration 0: Loss = 0.0528, Reg Loss = 6.9072, Reconstruct Loss = 0.0000, Cls Loss = 0.0521, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0275, Reg Loss = 7.0491, Reconstruct Loss = 0.0004, Cls Loss = 0.0264, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0281, Reg Loss = 7.0230, Reconstruct Loss = 0.0004, Cls Loss = 0.0270, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0277, Reg Loss = 7.0027, Reconstruct Loss = 0.0003, Cls Loss = 0.0267, Learning rate = 1.0000e-04
Epoch [138/200], Training Loss: 0.0276, Training Accuracy: 99.17, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 23.11it/s]


Epoch [138/200], Validation Loss: 1.4019, Validation Accuracy: 71.70%



Iteration 0: Loss = 0.0212, Reg Loss = 6.8453, Reconstruct Loss = 0.0000, Cls Loss = 0.0205, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0262, Reg Loss = 6.9742, Reconstruct Loss = 0.0004, Cls Loss = 0.0251, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0276, Reg Loss = 6.9964, Reconstruct Loss = 0.0004, Cls Loss = 0.0265, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0280, Reg Loss = 7.0261, Reconstruct Loss = 0.0005, Cls Loss = 0.0268, Learning rate = 1.0000e-04
Epoch [139/200], Training Loss: 0.0277, Training Accuracy: 99.16, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 22.84it/s]


Epoch [139/200], Validation Loss: 1.5575, Validation Accuracy: 68.16%



Iteration 0: Loss = 0.0207, Reg Loss = 6.8807, Reconstruct Loss = 0.0000, Cls Loss = 0.0200, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0260, Reg Loss = 6.9993, Reconstruct Loss = 0.0003, Cls Loss = 0.0249, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0264, Reg Loss = 6.9281, Reconstruct Loss = 0.0002, Cls Loss = 0.0255, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0260, Reg Loss = 6.9127, Reconstruct Loss = 0.0002, Cls Loss = 0.0251, Learning rate = 1.0000e-04
Epoch [140/200], Training Loss: 0.0262, Training Accuracy: 99.25, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 22.91it/s]


Epoch [140/200], Validation Loss: 1.5365, Validation Accuracy: 69.17%



Iteration 0: Loss = 0.0392, Reg Loss = 6.6873, Reconstruct Loss = 0.0000, Cls Loss = 0.0386, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0287, Reg Loss = 6.8791, Reconstruct Loss = 0.0003, Cls Loss = 0.0277, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0276, Reg Loss = 6.9027, Reconstruct Loss = 0.0003, Cls Loss = 0.0266, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0270, Reg Loss = 6.9088, Reconstruct Loss = 0.0003, Cls Loss = 0.0260, Learning rate = 1.0000e-04
Epoch [141/200], Training Loss: 0.0273, Training Accuracy: 99.18, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 23.10it/s]


Epoch [141/200], Validation Loss: 1.3507, Validation Accuracy: 70.98%



Iteration 0: Loss = 0.0178, Reg Loss = 6.5797, Reconstruct Loss = 0.0000, Cls Loss = 0.0172, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0255, Reg Loss = 6.7910, Reconstruct Loss = 0.0001, Cls Loss = 0.0247, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0261, Reg Loss = 6.7993, Reconstruct Loss = 0.0001, Cls Loss = 0.0252, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0261, Reg Loss = 6.7725, Reconstruct Loss = 0.0001, Cls Loss = 0.0254, Learning rate = 1.0000e-04
Epoch [142/200], Training Loss: 0.0261, Training Accuracy: 99.26, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 22.82it/s]


Epoch [142/200], Validation Loss: 1.2946, Validation Accuracy: 71.44%



Iteration 0: Loss = 0.0234, Reg Loss = 6.7484, Reconstruct Loss = 0.0000, Cls Loss = 0.0228, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0266, Reg Loss = 6.7488, Reconstruct Loss = 0.0001, Cls Loss = 0.0258, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0269, Reg Loss = 6.7614, Reconstruct Loss = 0.0002, Cls Loss = 0.0260, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0271, Reg Loss = 6.7508, Reconstruct Loss = 0.0002, Cls Loss = 0.0263, Learning rate = 1.0000e-04
Epoch [143/200], Training Loss: 0.0270, Training Accuracy: 99.18, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 22.85it/s]


Epoch [143/200], Validation Loss: 1.8736, Validation Accuracy: 65.08%



Iteration 0: Loss = 0.0548, Reg Loss = 6.7056, Reconstruct Loss = 0.0000, Cls Loss = 0.0542, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0258, Reg Loss = 6.8235, Reconstruct Loss = 0.0003, Cls Loss = 0.0249, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0259, Reg Loss = 6.7913, Reconstruct Loss = 0.0002, Cls Loss = 0.0250, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0260, Reg Loss = 6.7941, Reconstruct Loss = 0.0003, Cls Loss = 0.0250, Learning rate = 1.0000e-04
Epoch [144/200], Training Loss: 0.0258, Training Accuracy: 99.27, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 23.00it/s]


Epoch [144/200], Validation Loss: 1.9743, Validation Accuracy: 64.69%



Iteration 0: Loss = 0.0125, Reg Loss = 6.6065, Reconstruct Loss = 0.0000, Cls Loss = 0.0118, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0264, Reg Loss = 6.6706, Reconstruct Loss = 0.0003, Cls Loss = 0.0255, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0247, Reg Loss = 6.6837, Reconstruct Loss = 0.0003, Cls Loss = 0.0238, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0250, Reg Loss = 6.6638, Reconstruct Loss = 0.0002, Cls Loss = 0.0241, Learning rate = 1.0000e-04
Epoch [145/200], Training Loss: 0.0250, Training Accuracy: 99.26, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 23.07it/s]


Epoch [145/200], Validation Loss: 1.5282, Validation Accuracy: 69.94%



Iteration 0: Loss = 0.0155, Reg Loss = 6.5664, Reconstruct Loss = 0.0000, Cls Loss = 0.0148, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0255, Reg Loss = 6.6974, Reconstruct Loss = 0.0001, Cls Loss = 0.0247, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0273, Reg Loss = 6.7201, Reconstruct Loss = 0.0002, Cls Loss = 0.0264, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0271, Reg Loss = 6.7102, Reconstruct Loss = 0.0002, Cls Loss = 0.0263, Learning rate = 1.0000e-04
Epoch [146/200], Training Loss: 0.0266, Training Accuracy: 99.23, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 23.01it/s]


Epoch [146/200], Validation Loss: 1.9092, Validation Accuracy: 64.83%



Iteration 0: Loss = 0.0280, Reg Loss = 6.5290, Reconstruct Loss = 0.0000, Cls Loss = 0.0273, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0234, Reg Loss = 6.7211, Reconstruct Loss = 0.0003, Cls Loss = 0.0225, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0245, Reg Loss = 6.7302, Reconstruct Loss = 0.0004, Cls Loss = 0.0234, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0249, Reg Loss = 6.6801, Reconstruct Loss = 0.0003, Cls Loss = 0.0239, Learning rate = 1.0000e-04
Epoch [147/200], Training Loss: 0.0246, Training Accuracy: 99.31, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 22.79it/s]


Epoch [147/200], Validation Loss: 0.9816, Validation Accuracy: 75.69%



Checkpoint saved at epoch 146 with accuracy: 75.69%
Iteration 0: Loss = 0.0411, Reg Loss = 6.4940, Reconstruct Loss = 0.0000, Cls Loss = 0.0404, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0262, Reg Loss = 6.5297, Reconstruct Loss = 0.0002, Cls Loss = 0.0254, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0260, Reg Loss = 6.5548, Reconstruct Loss = 0.0003, Cls Loss = 0.0251, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0255, Reg Loss = 6.5340, Reconstruct Loss = 0.0002, Cls Loss = 0.0246, Learning rate = 1.0000e-04
Epoch [148/200], Training Loss: 0.0260, Training Accuracy: 99.25, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 23.10it/s]


Epoch [148/200], Validation Loss: 1.3255, Validation Accuracy: 71.73%



Iteration 0: Loss = 0.0238, Reg Loss = 6.2984, Reconstruct Loss = 0.0000, Cls Loss = 0.0231, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0257, Reg Loss = 6.5465, Reconstruct Loss = 0.0003, Cls Loss = 0.0247, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0263, Reg Loss = 6.4986, Reconstruct Loss = 0.0002, Cls Loss = 0.0254, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0267, Reg Loss = 6.5184, Reconstruct Loss = 0.0003, Cls Loss = 0.0258, Learning rate = 1.0000e-04
Epoch [149/200], Training Loss: 0.0265, Training Accuracy: 99.23, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 22.88it/s]


Epoch [149/200], Validation Loss: 1.2549, Validation Accuracy: 70.83%



Iteration 0: Loss = 0.0337, Reg Loss = 6.2038, Reconstruct Loss = 0.0000, Cls Loss = 0.0331, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0271, Reg Loss = 6.4611, Reconstruct Loss = 0.0004, Cls Loss = 0.0261, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0262, Reg Loss = 6.4425, Reconstruct Loss = 0.0004, Cls Loss = 0.0252, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0267, Reg Loss = 6.4551, Reconstruct Loss = 0.0004, Cls Loss = 0.0257, Learning rate = 1.0000e-04
Epoch [150/200], Training Loss: 0.0268, Training Accuracy: 99.24, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 22.99it/s]


Epoch [150/200], Validation Loss: 1.6084, Validation Accuracy: 66.54%



Iteration 0: Loss = 0.0518, Reg Loss = 6.3294, Reconstruct Loss = 0.0000, Cls Loss = 0.0511, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0263, Reg Loss = 6.5354, Reconstruct Loss = 0.0004, Cls Loss = 0.0252, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0261, Reg Loss = 6.5163, Reconstruct Loss = 0.0004, Cls Loss = 0.0251, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0266, Reg Loss = 6.4867, Reconstruct Loss = 0.0004, Cls Loss = 0.0255, Learning rate = 1.0000e-04
Epoch [151/200], Training Loss: 0.0267, Training Accuracy: 99.22, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 23.16it/s]


Epoch [151/200], Validation Loss: 1.5762, Validation Accuracy: 67.64%



Iteration 0: Loss = 0.0207, Reg Loss = 6.3152, Reconstruct Loss = 0.0000, Cls Loss = 0.0201, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0266, Reg Loss = 6.5030, Reconstruct Loss = 0.0006, Cls Loss = 0.0254, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0249, Reg Loss = 6.4459, Reconstruct Loss = 0.0004, Cls Loss = 0.0239, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0253, Reg Loss = 6.4341, Reconstruct Loss = 0.0004, Cls Loss = 0.0243, Learning rate = 1.0000e-04
Epoch [152/200], Training Loss: 0.0254, Training Accuracy: 99.27, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 22.96it/s]


Epoch [152/200], Validation Loss: 1.4817, Validation Accuracy: 70.22%



Iteration 0: Loss = 0.0589, Reg Loss = 6.2523, Reconstruct Loss = 0.0000, Cls Loss = 0.0583, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0270, Reg Loss = 6.3900, Reconstruct Loss = 0.0003, Cls Loss = 0.0261, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0277, Reg Loss = 6.3992, Reconstruct Loss = 0.0003, Cls Loss = 0.0268, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0266, Reg Loss = 6.3962, Reconstruct Loss = 0.0003, Cls Loss = 0.0257, Learning rate = 1.0000e-04
Epoch [153/200], Training Loss: 0.0263, Training Accuracy: 99.21, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 23.15it/s]


Epoch [153/200], Validation Loss: 1.7360, Validation Accuracy: 66.90%



Iteration 0: Loss = 0.0060, Reg Loss = 6.3127, Reconstruct Loss = 0.0000, Cls Loss = 0.0054, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0255, Reg Loss = 6.4353, Reconstruct Loss = 0.0003, Cls Loss = 0.0245, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0260, Reg Loss = 6.4934, Reconstruct Loss = 0.0005, Cls Loss = 0.0249, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0256, Reg Loss = 6.4757, Reconstruct Loss = 0.0004, Cls Loss = 0.0245, Learning rate = 1.0000e-04
Epoch [154/200], Training Loss: 0.0260, Training Accuracy: 99.22, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 22.81it/s]


Epoch [154/200], Validation Loss: 1.1299, Validation Accuracy: 72.33%



Iteration 0: Loss = 0.0302, Reg Loss = 6.0953, Reconstruct Loss = 0.0000, Cls Loss = 0.0296, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0238, Reg Loss = 6.3642, Reconstruct Loss = 0.0003, Cls Loss = 0.0229, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0238, Reg Loss = 6.3649, Reconstruct Loss = 0.0003, Cls Loss = 0.0229, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0252, Reg Loss = 6.3714, Reconstruct Loss = 0.0003, Cls Loss = 0.0243, Learning rate = 1.0000e-04
Epoch [155/200], Training Loss: 0.0251, Training Accuracy: 99.28, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 23.02it/s]


Epoch [155/200], Validation Loss: 1.1730, Validation Accuracy: 72.39%



Iteration 0: Loss = 0.0109, Reg Loss = 6.1995, Reconstruct Loss = 0.0000, Cls Loss = 0.0103, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0260, Reg Loss = 6.2768, Reconstruct Loss = 0.0002, Cls Loss = 0.0252, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0271, Reg Loss = 6.3109, Reconstruct Loss = 0.0003, Cls Loss = 0.0261, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0270, Reg Loss = 6.3246, Reconstruct Loss = 0.0003, Cls Loss = 0.0260, Learning rate = 1.0000e-04
Epoch [156/200], Training Loss: 0.0275, Training Accuracy: 99.20, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 22.90it/s]


Epoch [156/200], Validation Loss: 1.1278, Validation Accuracy: 72.71%



Iteration 0: Loss = 0.0220, Reg Loss = 6.1100, Reconstruct Loss = 0.0000, Cls Loss = 0.0214, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0277, Reg Loss = 6.2555, Reconstruct Loss = 0.0003, Cls Loss = 0.0268, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0269, Reg Loss = 6.2593, Reconstruct Loss = 0.0002, Cls Loss = 0.0260, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0257, Reg Loss = 6.2727, Reconstruct Loss = 0.0003, Cls Loss = 0.0248, Learning rate = 1.0000e-04
Epoch [157/200], Training Loss: 0.0257, Training Accuracy: 99.26, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 22.82it/s]


Epoch [157/200], Validation Loss: 1.3853, Validation Accuracy: 69.64%



Iteration 0: Loss = 0.0191, Reg Loss = 6.2567, Reconstruct Loss = 0.0000, Cls Loss = 0.0185, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0248, Reg Loss = 6.3533, Reconstruct Loss = 0.0003, Cls Loss = 0.0239, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0257, Reg Loss = 6.3614, Reconstruct Loss = 0.0003, Cls Loss = 0.0248, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0260, Reg Loss = 6.3358, Reconstruct Loss = 0.0003, Cls Loss = 0.0251, Learning rate = 1.0000e-04
Epoch [158/200], Training Loss: 0.0258, Training Accuracy: 99.31, Learning Rate: 0.000100


100%|██████████| 79/79 [00:03<00:00, 22.95it/s]


Epoch [158/200], Validation Loss: 1.0260, Validation Accuracy: 74.81%



Iteration 0: Loss = 0.0359, Reg Loss = 6.2757, Reconstruct Loss = 0.0000, Cls Loss = 0.0353, Learning rate = 1.0000e-04
Iteration 100: Loss = 0.0254, Reg Loss = 6.3446, Reconstruct Loss = 0.0004, Cls Loss = 0.0244, Learning rate = 1.0000e-04
Iteration 200: Loss = 0.0258, Reg Loss = 6.2654, Reconstruct Loss = 0.0003, Cls Loss = 0.0249, Learning rate = 1.0000e-04
Iteration 300: Loss = 0.0257, Reg Loss = 6.2330, Reconstruct Loss = 0.0003, Cls Loss = 0.0249, Learning rate = 1.0000e-04
Epoch [159/200], Training Loss: 0.0253, Training Accuracy: 99.28, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 23.07it/s]


Epoch [159/200], Validation Loss: 1.5630, Validation Accuracy: 67.02%



Iteration 0: Loss = 0.0231, Reg Loss = 6.0265, Reconstruct Loss = 0.0000, Cls Loss = 0.0225, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0242, Reg Loss = 6.1900, Reconstruct Loss = 0.0002, Cls Loss = 0.0234, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0249, Reg Loss = 6.1823, Reconstruct Loss = 0.0002, Cls Loss = 0.0241, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0265, Reg Loss = 6.1878, Reconstruct Loss = 0.0002, Cls Loss = 0.0257, Learning rate = 1.0000e-05
Epoch [160/200], Training Loss: 0.0268, Training Accuracy: 99.20, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 22.98it/s]


Epoch [160/200], Validation Loss: 0.8953, Validation Accuracy: 75.89%



Checkpoint saved at epoch 159 with accuracy: 75.89%
Iteration 0: Loss = 0.0128, Reg Loss = 6.0743, Reconstruct Loss = 0.0000, Cls Loss = 0.0122, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0243, Reg Loss = 6.1287, Reconstruct Loss = 0.0000, Cls Loss = 0.0237, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0246, Reg Loss = 6.2032, Reconstruct Loss = 0.0002, Cls Loss = 0.0237, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0258, Reg Loss = 6.2121, Reconstruct Loss = 0.0003, Cls Loss = 0.0249, Learning rate = 1.0000e-05
Epoch [161/200], Training Loss: 0.0257, Training Accuracy: 99.22, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 23.12it/s]


Epoch [161/200], Validation Loss: 1.3740, Validation Accuracy: 70.45%



Iteration 0: Loss = 0.0286, Reg Loss = 6.1671, Reconstruct Loss = 0.0000, Cls Loss = 0.0279, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0272, Reg Loss = 6.1442, Reconstruct Loss = 0.0001, Cls Loss = 0.0265, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0261, Reg Loss = 6.1953, Reconstruct Loss = 0.0003, Cls Loss = 0.0252, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0256, Reg Loss = 6.1940, Reconstruct Loss = 0.0003, Cls Loss = 0.0248, Learning rate = 1.0000e-05
Epoch [162/200], Training Loss: 0.0260, Training Accuracy: 99.24, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 23.05it/s]


Epoch [162/200], Validation Loss: 1.1610, Validation Accuracy: 71.72%



Iteration 0: Loss = 0.0113, Reg Loss = 6.0694, Reconstruct Loss = 0.0000, Cls Loss = 0.0106, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0261, Reg Loss = 6.1878, Reconstruct Loss = 0.0002, Cls Loss = 0.0253, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0256, Reg Loss = 6.1499, Reconstruct Loss = 0.0001, Cls Loss = 0.0249, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0261, Reg Loss = 6.2055, Reconstruct Loss = 0.0003, Cls Loss = 0.0251, Learning rate = 1.0000e-05
Epoch [163/200], Training Loss: 0.0257, Training Accuracy: 99.23, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 22.94it/s]


Epoch [163/200], Validation Loss: 1.2735, Validation Accuracy: 71.19%



Iteration 0: Loss = 0.0134, Reg Loss = 6.1582, Reconstruct Loss = 0.0000, Cls Loss = 0.0128, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0245, Reg Loss = 6.1467, Reconstruct Loss = 0.0001, Cls Loss = 0.0237, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0243, Reg Loss = 6.1655, Reconstruct Loss = 0.0002, Cls Loss = 0.0235, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0247, Reg Loss = 6.1798, Reconstruct Loss = 0.0002, Cls Loss = 0.0239, Learning rate = 1.0000e-05
Epoch [164/200], Training Loss: 0.0248, Training Accuracy: 99.26, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 23.14it/s]


Epoch [164/200], Validation Loss: 1.2535, Validation Accuracy: 71.56%



Iteration 0: Loss = 0.0526, Reg Loss = 6.1016, Reconstruct Loss = 0.0000, Cls Loss = 0.0520, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0247, Reg Loss = 6.1287, Reconstruct Loss = 0.0001, Cls Loss = 0.0240, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0255, Reg Loss = 6.1429, Reconstruct Loss = 0.0001, Cls Loss = 0.0248, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0254, Reg Loss = 6.1879, Reconstruct Loss = 0.0002, Cls Loss = 0.0245, Learning rate = 1.0000e-05
Epoch [165/200], Training Loss: 0.0252, Training Accuracy: 99.27, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 23.10it/s]


Epoch [165/200], Validation Loss: 1.2676, Validation Accuracy: 70.55%



Iteration 0: Loss = 0.0404, Reg Loss = 6.1344, Reconstruct Loss = 0.0000, Cls Loss = 0.0398, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0254, Reg Loss = 6.2240, Reconstruct Loss = 0.0004, Cls Loss = 0.0244, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0256, Reg Loss = 6.2194, Reconstruct Loss = 0.0004, Cls Loss = 0.0246, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0264, Reg Loss = 6.2114, Reconstruct Loss = 0.0003, Cls Loss = 0.0254, Learning rate = 1.0000e-05
Epoch [166/200], Training Loss: 0.0266, Training Accuracy: 99.18, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 22.94it/s]


Epoch [166/200], Validation Loss: 1.3753, Validation Accuracy: 69.61%



Iteration 0: Loss = 0.0100, Reg Loss = 6.1382, Reconstruct Loss = 0.0000, Cls Loss = 0.0094, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0256, Reg Loss = 6.2746, Reconstruct Loss = 0.0005, Cls Loss = 0.0244, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0256, Reg Loss = 6.2260, Reconstruct Loss = 0.0004, Cls Loss = 0.0246, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0257, Reg Loss = 6.1915, Reconstruct Loss = 0.0003, Cls Loss = 0.0248, Learning rate = 1.0000e-05
Epoch [167/200], Training Loss: 0.0251, Training Accuracy: 99.27, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 22.91it/s]


Epoch [167/200], Validation Loss: 1.3559, Validation Accuracy: 70.06%



Iteration 0: Loss = 0.0411, Reg Loss = 6.0327, Reconstruct Loss = 0.0000, Cls Loss = 0.0405, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0246, Reg Loss = 6.1661, Reconstruct Loss = 0.0002, Cls Loss = 0.0238, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0243, Reg Loss = 6.1851, Reconstruct Loss = 0.0002, Cls Loss = 0.0235, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0239, Reg Loss = 6.1853, Reconstruct Loss = 0.0002, Cls Loss = 0.0231, Learning rate = 1.0000e-05
Epoch [168/200], Training Loss: 0.0244, Training Accuracy: 99.29, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 22.93it/s]


Epoch [168/200], Validation Loss: 1.2279, Validation Accuracy: 71.42%



Iteration 0: Loss = 0.0731, Reg Loss = 5.9842, Reconstruct Loss = 0.0000, Cls Loss = 0.0725, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0260, Reg Loss = 6.1878, Reconstruct Loss = 0.0003, Cls Loss = 0.0251, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0254, Reg Loss = 6.1663, Reconstruct Loss = 0.0002, Cls Loss = 0.0246, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0254, Reg Loss = 6.1829, Reconstruct Loss = 0.0003, Cls Loss = 0.0246, Learning rate = 1.0000e-05
Epoch [169/200], Training Loss: 0.0261, Training Accuracy: 99.30, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 22.88it/s]


Epoch [169/200], Validation Loss: 1.3376, Validation Accuracy: 70.11%



Iteration 0: Loss = 0.0117, Reg Loss = 6.2912, Reconstruct Loss = 0.0000, Cls Loss = 0.0111, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0256, Reg Loss = 6.1969, Reconstruct Loss = 0.0003, Cls Loss = 0.0247, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0263, Reg Loss = 6.1961, Reconstruct Loss = 0.0003, Cls Loss = 0.0253, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0260, Reg Loss = 6.1963, Reconstruct Loss = 0.0003, Cls Loss = 0.0251, Learning rate = 1.0000e-05
Epoch [170/200], Training Loss: 0.0261, Training Accuracy: 99.26, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 23.16it/s]


Epoch [170/200], Validation Loss: 1.4653, Validation Accuracy: 68.96%



Iteration 0: Loss = 0.0190, Reg Loss = 5.9552, Reconstruct Loss = 0.0000, Cls Loss = 0.0184, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0256, Reg Loss = 6.1762, Reconstruct Loss = 0.0004, Cls Loss = 0.0247, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0249, Reg Loss = 6.1838, Reconstruct Loss = 0.0004, Cls Loss = 0.0240, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0252, Reg Loss = 6.2031, Reconstruct Loss = 0.0004, Cls Loss = 0.0242, Learning rate = 1.0000e-05
Epoch [171/200], Training Loss: 0.0250, Training Accuracy: 99.28, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 22.79it/s]


Epoch [171/200], Validation Loss: 1.6021, Validation Accuracy: 67.17%



Iteration 0: Loss = 0.0106, Reg Loss = 6.1159, Reconstruct Loss = 0.0000, Cls Loss = 0.0100, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0248, Reg Loss = 6.2465, Reconstruct Loss = 0.0005, Cls Loss = 0.0237, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0236, Reg Loss = 6.2046, Reconstruct Loss = 0.0003, Cls Loss = 0.0226, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0234, Reg Loss = 6.1593, Reconstruct Loss = 0.0002, Cls Loss = 0.0225, Learning rate = 1.0000e-05
Epoch [172/200], Training Loss: 0.0239, Training Accuracy: 99.32, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 22.99it/s]


Epoch [172/200], Validation Loss: 1.7214, Validation Accuracy: 66.02%



Iteration 0: Loss = 0.0479, Reg Loss = 6.1318, Reconstruct Loss = 0.0000, Cls Loss = 0.0473, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0262, Reg Loss = 6.1181, Reconstruct Loss = 0.0001, Cls Loss = 0.0255, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0259, Reg Loss = 6.1452, Reconstruct Loss = 0.0002, Cls Loss = 0.0251, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0250, Reg Loss = 6.1669, Reconstruct Loss = 0.0003, Cls Loss = 0.0241, Learning rate = 1.0000e-05
Epoch [173/200], Training Loss: 0.0255, Training Accuracy: 99.29, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 22.81it/s]


Epoch [173/200], Validation Loss: 1.0495, Validation Accuracy: 74.31%



Iteration 0: Loss = 0.0101, Reg Loss = 6.1758, Reconstruct Loss = 0.0000, Cls Loss = 0.0095, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0252, Reg Loss = 6.1327, Reconstruct Loss = 0.0001, Cls Loss = 0.0244, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0256, Reg Loss = 6.1480, Reconstruct Loss = 0.0002, Cls Loss = 0.0247, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0250, Reg Loss = 6.1495, Reconstruct Loss = 0.0002, Cls Loss = 0.0242, Learning rate = 1.0000e-05
Epoch [174/200], Training Loss: 0.0249, Training Accuracy: 99.29, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 22.78it/s]


Epoch [174/200], Validation Loss: 1.4564, Validation Accuracy: 68.73%



Iteration 0: Loss = 0.0172, Reg Loss = 6.1392, Reconstruct Loss = 0.0000, Cls Loss = 0.0166, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0256, Reg Loss = 6.1400, Reconstruct Loss = 0.0003, Cls Loss = 0.0247, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0259, Reg Loss = 6.1427, Reconstruct Loss = 0.0003, Cls Loss = 0.0250, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0259, Reg Loss = 6.1469, Reconstruct Loss = 0.0003, Cls Loss = 0.0250, Learning rate = 1.0000e-05
Epoch [175/200], Training Loss: 0.0258, Training Accuracy: 99.25, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 22.93it/s]


Epoch [175/200], Validation Loss: 1.1802, Validation Accuracy: 72.84%



Iteration 0: Loss = 0.0478, Reg Loss = 6.1453, Reconstruct Loss = 0.0000, Cls Loss = 0.0472, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0252, Reg Loss = 6.1230, Reconstruct Loss = 0.0002, Cls Loss = 0.0244, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0251, Reg Loss = 6.1542, Reconstruct Loss = 0.0003, Cls Loss = 0.0242, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0253, Reg Loss = 6.1250, Reconstruct Loss = 0.0002, Cls Loss = 0.0245, Learning rate = 1.0000e-05
Epoch [176/200], Training Loss: 0.0256, Training Accuracy: 99.23, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 22.95it/s]


Epoch [176/200], Validation Loss: 1.0517, Validation Accuracy: 74.04%



Iteration 0: Loss = 0.0384, Reg Loss = 5.9063, Reconstruct Loss = 0.0000, Cls Loss = 0.0378, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0283, Reg Loss = 6.1795, Reconstruct Loss = 0.0004, Cls Loss = 0.0273, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0270, Reg Loss = 6.2169, Reconstruct Loss = 0.0005, Cls Loss = 0.0258, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0265, Reg Loss = 6.1944, Reconstruct Loss = 0.0004, Cls Loss = 0.0254, Learning rate = 1.0000e-05
Epoch [177/200], Training Loss: 0.0262, Training Accuracy: 99.26, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 22.94it/s]


Epoch [177/200], Validation Loss: 1.1212, Validation Accuracy: 73.05%



Iteration 0: Loss = 0.0134, Reg Loss = 6.0829, Reconstruct Loss = 0.0000, Cls Loss = 0.0128, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0247, Reg Loss = 6.1546, Reconstruct Loss = 0.0003, Cls Loss = 0.0238, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0250, Reg Loss = 6.1663, Reconstruct Loss = 0.0003, Cls Loss = 0.0240, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0249, Reg Loss = 6.1483, Reconstruct Loss = 0.0003, Cls Loss = 0.0240, Learning rate = 1.0000e-05
Epoch [178/200], Training Loss: 0.0253, Training Accuracy: 99.26, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 22.99it/s]


Epoch [178/200], Validation Loss: 1.4195, Validation Accuracy: 70.52%



Iteration 0: Loss = 0.0309, Reg Loss = 6.1584, Reconstruct Loss = 0.0000, Cls Loss = 0.0302, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0238, Reg Loss = 6.0793, Reconstruct Loss = 0.0001, Cls Loss = 0.0231, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0240, Reg Loss = 6.1008, Reconstruct Loss = 0.0002, Cls Loss = 0.0231, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0238, Reg Loss = 6.1125, Reconstruct Loss = 0.0002, Cls Loss = 0.0230, Learning rate = 1.0000e-05
Epoch [179/200], Training Loss: 0.0239, Training Accuracy: 99.34, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 23.00it/s]


Epoch [179/200], Validation Loss: 1.4547, Validation Accuracy: 68.57%



Iteration 0: Loss = 0.0199, Reg Loss = 6.2288, Reconstruct Loss = 0.0000, Cls Loss = 0.0193, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0258, Reg Loss = 6.1830, Reconstruct Loss = 0.0004, Cls Loss = 0.0247, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0257, Reg Loss = 6.1604, Reconstruct Loss = 0.0003, Cls Loss = 0.0248, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0260, Reg Loss = 6.1548, Reconstruct Loss = 0.0003, Cls Loss = 0.0251, Learning rate = 1.0000e-05
Epoch [180/200], Training Loss: 0.0257, Training Accuracy: 99.25, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 22.97it/s]


Epoch [180/200], Validation Loss: 1.2339, Validation Accuracy: 70.95%



Iteration 0: Loss = 0.0131, Reg Loss = 5.8758, Reconstruct Loss = 0.0000, Cls Loss = 0.0125, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0233, Reg Loss = 6.0884, Reconstruct Loss = 0.0001, Cls Loss = 0.0226, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0242, Reg Loss = 6.0894, Reconstruct Loss = 0.0002, Cls Loss = 0.0234, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0240, Reg Loss = 6.0932, Reconstruct Loss = 0.0002, Cls Loss = 0.0232, Learning rate = 1.0000e-05
Epoch [181/200], Training Loss: 0.0245, Training Accuracy: 99.29, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 23.08it/s]


Epoch [181/200], Validation Loss: 0.9929, Validation Accuracy: 74.47%



Iteration 0: Loss = 0.0104, Reg Loss = 6.1342, Reconstruct Loss = 0.0000, Cls Loss = 0.0098, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0265, Reg Loss = 6.1393, Reconstruct Loss = 0.0004, Cls Loss = 0.0256, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0256, Reg Loss = 6.1169, Reconstruct Loss = 0.0003, Cls Loss = 0.0247, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0249, Reg Loss = 6.0952, Reconstruct Loss = 0.0003, Cls Loss = 0.0240, Learning rate = 1.0000e-05
Epoch [182/200], Training Loss: 0.0256, Training Accuracy: 99.24, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 23.10it/s]


Epoch [182/200], Validation Loss: 1.1511, Validation Accuracy: 71.87%



Iteration 0: Loss = 0.0100, Reg Loss = 6.1874, Reconstruct Loss = 0.0000, Cls Loss = 0.0094, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0254, Reg Loss = 6.1472, Reconstruct Loss = 0.0004, Cls Loss = 0.0245, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0256, Reg Loss = 6.1234, Reconstruct Loss = 0.0003, Cls Loss = 0.0247, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0255, Reg Loss = 6.1115, Reconstruct Loss = 0.0003, Cls Loss = 0.0246, Learning rate = 1.0000e-05
Epoch [183/200], Training Loss: 0.0261, Training Accuracy: 99.19, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 22.99it/s]


Epoch [183/200], Validation Loss: 1.0722, Validation Accuracy: 73.22%



Iteration 0: Loss = 0.0328, Reg Loss = 6.0165, Reconstruct Loss = 0.0000, Cls Loss = 0.0322, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0248, Reg Loss = 6.0395, Reconstruct Loss = 0.0001, Cls Loss = 0.0240, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0268, Reg Loss = 6.0858, Reconstruct Loss = 0.0002, Cls Loss = 0.0259, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0258, Reg Loss = 6.0882, Reconstruct Loss = 0.0003, Cls Loss = 0.0249, Learning rate = 1.0000e-05
Epoch [184/200], Training Loss: 0.0263, Training Accuracy: 99.22, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 23.07it/s]


Epoch [184/200], Validation Loss: 1.2074, Validation Accuracy: 72.10%



Iteration 0: Loss = 0.0248, Reg Loss = 6.0964, Reconstruct Loss = 0.0000, Cls Loss = 0.0242, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0250, Reg Loss = 6.0913, Reconstruct Loss = 0.0003, Cls Loss = 0.0241, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0244, Reg Loss = 6.0781, Reconstruct Loss = 0.0002, Cls Loss = 0.0235, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0255, Reg Loss = 6.0868, Reconstruct Loss = 0.0003, Cls Loss = 0.0246, Learning rate = 1.0000e-05
Epoch [185/200], Training Loss: 0.0252, Training Accuracy: 99.28, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 22.87it/s]


Epoch [185/200], Validation Loss: 1.0958, Validation Accuracy: 73.43%



Iteration 0: Loss = 0.0456, Reg Loss = 8.3284, Reconstruct Loss = 0.0066, Cls Loss = 0.0382, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0268, Reg Loss = 6.1574, Reconstruct Loss = 0.0006, Cls Loss = 0.0257, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0268, Reg Loss = 6.1208, Reconstruct Loss = 0.0004, Cls Loss = 0.0258, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0260, Reg Loss = 6.1128, Reconstruct Loss = 0.0004, Cls Loss = 0.0250, Learning rate = 1.0000e-05
Epoch [186/200], Training Loss: 0.0261, Training Accuracy: 99.23, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 22.93it/s]


Epoch [186/200], Validation Loss: 1.1139, Validation Accuracy: 73.09%



Iteration 0: Loss = 0.0128, Reg Loss = 5.8852, Reconstruct Loss = 0.0000, Cls Loss = 0.0122, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0229, Reg Loss = 6.1554, Reconstruct Loss = 0.0005, Cls Loss = 0.0218, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0248, Reg Loss = 6.1097, Reconstruct Loss = 0.0003, Cls Loss = 0.0239, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0249, Reg Loss = 6.1079, Reconstruct Loss = 0.0003, Cls Loss = 0.0239, Learning rate = 1.0000e-05
Epoch [187/200], Training Loss: 0.0252, Training Accuracy: 99.29, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 23.22it/s]


Epoch [187/200], Validation Loss: 1.5488, Validation Accuracy: 67.69%



Iteration 0: Loss = 0.0051, Reg Loss = 6.1103, Reconstruct Loss = 0.0000, Cls Loss = 0.0045, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0249, Reg Loss = 6.1274, Reconstruct Loss = 0.0003, Cls Loss = 0.0240, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0242, Reg Loss = 6.0812, Reconstruct Loss = 0.0002, Cls Loss = 0.0234, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0243, Reg Loss = 6.0783, Reconstruct Loss = 0.0003, Cls Loss = 0.0234, Learning rate = 1.0000e-05
Epoch [188/200], Training Loss: 0.0242, Training Accuracy: 99.33, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 23.09it/s]


Epoch [188/200], Validation Loss: 1.1907, Validation Accuracy: 70.50%



Iteration 0: Loss = 0.0191, Reg Loss = 5.8784, Reconstruct Loss = 0.0000, Cls Loss = 0.0185, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0229, Reg Loss = 5.9916, Reconstruct Loss = 0.0001, Cls Loss = 0.0222, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0240, Reg Loss = 6.0105, Reconstruct Loss = 0.0001, Cls Loss = 0.0233, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0242, Reg Loss = 6.0084, Reconstruct Loss = 0.0001, Cls Loss = 0.0235, Learning rate = 1.0000e-05
Epoch [189/200], Training Loss: 0.0241, Training Accuracy: 99.29, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 23.27it/s]


Epoch [189/200], Validation Loss: 1.1882, Validation Accuracy: 71.97%



Iteration 0: Loss = 0.0497, Reg Loss = 5.9921, Reconstruct Loss = 0.0000, Cls Loss = 0.0491, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0262, Reg Loss = 6.0246, Reconstruct Loss = 0.0001, Cls Loss = 0.0255, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0258, Reg Loss = 6.1029, Reconstruct Loss = 0.0004, Cls Loss = 0.0248, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0249, Reg Loss = 6.0626, Reconstruct Loss = 0.0003, Cls Loss = 0.0240, Learning rate = 1.0000e-05
Epoch [190/200], Training Loss: 0.0245, Training Accuracy: 99.31, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 23.28it/s]


Epoch [190/200], Validation Loss: 1.0725, Validation Accuracy: 74.30%



Iteration 0: Loss = 0.0084, Reg Loss = 5.7307, Reconstruct Loss = 0.0000, Cls Loss = 0.0078, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0251, Reg Loss = 6.0834, Reconstruct Loss = 0.0003, Cls Loss = 0.0242, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0250, Reg Loss = 6.0553, Reconstruct Loss = 0.0003, Cls Loss = 0.0241, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0255, Reg Loss = 6.0392, Reconstruct Loss = 0.0002, Cls Loss = 0.0247, Learning rate = 1.0000e-05
Epoch [191/200], Training Loss: 0.0252, Training Accuracy: 99.26, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 23.29it/s]


Epoch [191/200], Validation Loss: 1.5233, Validation Accuracy: 67.84%



Iteration 0: Loss = 0.0083, Reg Loss = 6.1477, Reconstruct Loss = 0.0000, Cls Loss = 0.0077, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0243, Reg Loss = 6.0364, Reconstruct Loss = 0.0003, Cls Loss = 0.0234, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0250, Reg Loss = 6.0384, Reconstruct Loss = 0.0003, Cls Loss = 0.0242, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0250, Reg Loss = 6.0344, Reconstruct Loss = 0.0003, Cls Loss = 0.0242, Learning rate = 1.0000e-05
Epoch [192/200], Training Loss: 0.0249, Training Accuracy: 99.32, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 23.19it/s]


Epoch [192/200], Validation Loss: 1.1175, Validation Accuracy: 73.54%



Iteration 0: Loss = 0.0373, Reg Loss = 5.7584, Reconstruct Loss = 0.0000, Cls Loss = 0.0367, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0246, Reg Loss = 6.0502, Reconstruct Loss = 0.0003, Cls Loss = 0.0238, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0254, Reg Loss = 6.0303, Reconstruct Loss = 0.0002, Cls Loss = 0.0246, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0249, Reg Loss = 6.0401, Reconstruct Loss = 0.0003, Cls Loss = 0.0240, Learning rate = 1.0000e-05
Epoch [193/200], Training Loss: 0.0246, Training Accuracy: 99.30, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 23.02it/s]


Epoch [193/200], Validation Loss: 1.2862, Validation Accuracy: 71.75%



Iteration 0: Loss = 0.0239, Reg Loss = 5.8304, Reconstruct Loss = 0.0000, Cls Loss = 0.0234, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0257, Reg Loss = 6.0483, Reconstruct Loss = 0.0003, Cls Loss = 0.0247, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0259, Reg Loss = 6.0806, Reconstruct Loss = 0.0004, Cls Loss = 0.0249, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0258, Reg Loss = 6.0587, Reconstruct Loss = 0.0003, Cls Loss = 0.0248, Learning rate = 1.0000e-05
Epoch [194/200], Training Loss: 0.0264, Training Accuracy: 99.23, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 23.06it/s]


Epoch [194/200], Validation Loss: 1.3485, Validation Accuracy: 70.23%



Iteration 0: Loss = 0.0106, Reg Loss = 5.7714, Reconstruct Loss = 0.0000, Cls Loss = 0.0100, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0255, Reg Loss = 6.0946, Reconstruct Loss = 0.0005, Cls Loss = 0.0244, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0259, Reg Loss = 6.0555, Reconstruct Loss = 0.0003, Cls Loss = 0.0250, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0262, Reg Loss = 6.0478, Reconstruct Loss = 0.0003, Cls Loss = 0.0253, Learning rate = 1.0000e-05
Epoch [195/200], Training Loss: 0.0259, Training Accuracy: 99.25, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 23.24it/s]


Epoch [195/200], Validation Loss: 1.2433, Validation Accuracy: 71.10%



Iteration 0: Loss = 0.0497, Reg Loss = 5.8380, Reconstruct Loss = 0.0000, Cls Loss = 0.0492, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0252, Reg Loss = 6.0736, Reconstruct Loss = 0.0004, Cls Loss = 0.0242, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0243, Reg Loss = 6.0656, Reconstruct Loss = 0.0004, Cls Loss = 0.0233, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0251, Reg Loss = 6.0566, Reconstruct Loss = 0.0004, Cls Loss = 0.0241, Learning rate = 1.0000e-05
Epoch [196/200], Training Loss: 0.0249, Training Accuracy: 99.29, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 23.10it/s]


Epoch [196/200], Validation Loss: 1.3451, Validation Accuracy: 69.66%



Iteration 0: Loss = 0.0093, Reg Loss = 6.0646, Reconstruct Loss = 0.0000, Cls Loss = 0.0087, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0241, Reg Loss = 6.0928, Reconstruct Loss = 0.0005, Cls Loss = 0.0230, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0246, Reg Loss = 6.0703, Reconstruct Loss = 0.0004, Cls Loss = 0.0236, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0247, Reg Loss = 6.0627, Reconstruct Loss = 0.0004, Cls Loss = 0.0237, Learning rate = 1.0000e-05
Epoch [197/200], Training Loss: 0.0254, Training Accuracy: 99.26, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 23.09it/s]


Epoch [197/200], Validation Loss: 1.5166, Validation Accuracy: 67.25%



Iteration 0: Loss = 0.0309, Reg Loss = 5.9095, Reconstruct Loss = 0.0000, Cls Loss = 0.0303, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0262, Reg Loss = 6.0479, Reconstruct Loss = 0.0003, Cls Loss = 0.0253, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0252, Reg Loss = 6.0864, Reconstruct Loss = 0.0004, Cls Loss = 0.0241, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0248, Reg Loss = 6.0726, Reconstruct Loss = 0.0004, Cls Loss = 0.0238, Learning rate = 1.0000e-05
Epoch [198/200], Training Loss: 0.0243, Training Accuracy: 99.34, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 23.16it/s]


Epoch [198/200], Validation Loss: 1.6786, Validation Accuracy: 66.26%



Iteration 0: Loss = 0.0129, Reg Loss = 5.9768, Reconstruct Loss = 0.0000, Cls Loss = 0.0123, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0253, Reg Loss = 5.9429, Reconstruct Loss = 0.0000, Cls Loss = 0.0247, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0259, Reg Loss = 6.0061, Reconstruct Loss = 0.0002, Cls Loss = 0.0252, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0263, Reg Loss = 6.0112, Reconstruct Loss = 0.0002, Cls Loss = 0.0255, Learning rate = 1.0000e-05
Epoch [199/200], Training Loss: 0.0257, Training Accuracy: 99.28, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 23.41it/s]


Epoch [199/200], Validation Loss: 1.2056, Validation Accuracy: 71.21%



Iteration 0: Loss = 0.0070, Reg Loss = 6.0550, Reconstruct Loss = 0.0000, Cls Loss = 0.0064, Learning rate = 1.0000e-05
Iteration 100: Loss = 0.0229, Reg Loss = 5.9664, Reconstruct Loss = 0.0001, Cls Loss = 0.0223, Learning rate = 1.0000e-05
Iteration 200: Loss = 0.0243, Reg Loss = 6.0275, Reconstruct Loss = 0.0003, Cls Loss = 0.0234, Learning rate = 1.0000e-05
Iteration 300: Loss = 0.0248, Reg Loss = 6.0430, Reconstruct Loss = 0.0003, Cls Loss = 0.0239, Learning rate = 1.0000e-05
Epoch [200/200], Training Loss: 0.0249, Training Accuracy: 99.33, Learning Rate: 0.000010


100%|██████████| 79/79 [00:03<00:00, 23.37it/s]

Epoch [200/200], Validation Loss: 1.3752, Validation Accuracy: 69.37%








In [None]:
wandb.finish()

0,1
Cls Loss,█████▇▇▇▆▆▆▆▆▅▅▅▅▅▅▄▄▄▃▃▃▃▃▃▃▃▃▃▂▃▂▂▂▂▁▁
Learning rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss,█▇▇▇▆▇█▇▇▆▆▆▆▆▅▅▅▄▄▄▄▃▃▃▃▂▂▂▂▂▃▂▂▂▂▁▁▂▂▁
Reconstruct Loss,█▇▇▅▅▅▇▅▅▅▄▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▂▂▂▂▁▁▁▁▁▁▁▁▁
Reg Loss,▁▁▁▇▇█████▇▆▆▅▄▆▆▆▅▅▄▄▄▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇
Training accuracy,▁▁▂▂▃▄▄▄▅▆▆▇▇▇██
Validation Accuracy,▂▁▂▁▁▁▂▆▆▇██▇▇▆█
Validation Loss,▃▃▂▃▃▃▂▂▁▂▃▄▂██▄

0,1
Cls Loss,0.08349
Learning rate,0.001
Loss,0.0917
Reconstruct Loss,0.00755
Reg Loss,6.62567
Training accuracy,0.97296
Validation Accuracy,0.6941
Validation Loss,1.52875


### 7 Testing loop

In [35]:
saved_hypernet_path = args.training.save_model_path + '/cifar10_nerf_best.pth'

In [36]:
saved_hypernet_path

'toy/experiments_densenet/dense_23th_experiment/cifar10_nerf_best.pth'

In [37]:
hyper_model_test = get_hypernetwork(args, number_param)

Hyper model type: resmlp
Using scalar 0.1
num_freqs:  16 <class 'int'>


In [38]:
checkpoint = torch.load(saved_hypernet_path, map_location="cpu")  # or "cuda" if using GPU
hyper_model_test.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [39]:
for hidden_dim in range(12, 49):
    # Create a model for this given dimension
    model_trained = create_model(args.model.type,
                                 layers=args.model.layers,
                                 growth=args.model.growth,
                                 compression=args.model.compression,
                                 bottleneck=args.model.bottleneck,
                                 drop_rate=args.model.drop_rate,
                                 path=args.model.pretrained_path,
                                 hidden_dim=hidden_dim).to(device)
    
    # If EMA is specified, apply it
    if ema:
        print('Applying EMA')
        ema.apply()

    # Sample the merged model
    accumulated_model = sample_merge_model(hyper_model_test, model_trained, args, K=100)

    # Validate the merged model
    val_loss, acc = validate_single(accumulated_model, val_loader, val_criterion, args=args)

    # If EMA is specified, restore the original weights after applying EMA
    if ema:
        ema.restore()  # Restore the original weights after applying 
        
    # Save the model
    save_name = os.path.join(args.training.save_model_path, f"cifar10_{accumulated_model.__class__.__name__}_dim{hidden_dim}_single.pth")
    torch.save(accumulated_model.state_dict(),save_name)

    # Print the results
    print(f"Test using model {args.model}: hidden_dim {hidden_dim}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc*100:.2f}%")
    print('\n')

    # Define the directory and filename structure
    filename = f"cifar10_results_{args.experiment.name}.txt"
    filepath = os.path.join(args.training.save_model_path, filename)

    # Write the results. 'a' is used to append the results; a new file will be created if it doesn't exist.
    with open(filepath, "a") as file:
        file.write(f"Hidden_dim: {hidden_dim}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {acc*100:.2f}%\n")

Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.04it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 12, Validation Loss: 1.2346, Validation Accuracy: 72.07%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.96it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 13, Validation Loss: 2.0399, Validation Accuracy: 62.52%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.05it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 14, Validation Loss: 1.6664, Validation Accuracy: 67.04%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.15it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 15, Validation Loss: 1.8352, Validation Accuracy: 63.33%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.36it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 16, Validation Loss: 1.2926, Validation Accuracy: 69.24%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 20.40it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 17, Validation Loss: 1.6851, Validation Accuracy: 64.91%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:04<00:00, 17.06it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 18, Validation Loss: 1.8128, Validation Accuracy: 64.36%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.60it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 19, Validation Loss: 2.1827, Validation Accuracy: 60.87%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.29it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 20, Validation Loss: 1.5972, Validation Accuracy: 66.97%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.63it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 21, Validation Loss: 1.3696, Validation Accuracy: 70.02%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.88it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 22, Validation Loss: 1.2358, Validation Accuracy: 71.69%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.90it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 23, Validation Loss: 0.9830, Validation Accuracy: 75.92%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.28it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 24, Validation Loss: 1.4240, Validation Accuracy: 69.54%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.51it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 25, Validation Loss: 1.5009, Validation Accuracy: 69.28%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.93it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 26, Validation Loss: 1.3085, Validation Accuracy: 71.54%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.22it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 27, Validation Loss: 1.4000, Validation Accuracy: 70.39%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.24it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 28, Validation Loss: 1.7895, Validation Accuracy: 66.46%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.06it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 29, Validation Loss: 1.5590, Validation Accuracy: 68.27%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.86it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 30, Validation Loss: 1.4931, Validation Accuracy: 69.17%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.12it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 31, Validation Loss: 1.3219, Validation Accuracy: 70.64%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.73it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 32, Validation Loss: 1.1366, Validation Accuracy: 73.95%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.75it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 33, Validation Loss: 1.2054, Validation Accuracy: 72.70%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.62it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 34, Validation Loss: 1.3230, Validation Accuracy: 71.19%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.78it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 35, Validation Loss: 1.3111, Validation Accuracy: 71.49%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.91it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 36, Validation Loss: 1.4216, Validation Accuracy: 69.89%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.14it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 37, Validation Loss: 1.1957, Validation Accuracy: 72.80%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.24it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 38, Validation Loss: 1.2055, Validation Accuracy: 72.84%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.62it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 39, Validation Loss: 1.0883, Validation Accuracy: 74.60%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.20it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 40, Validation Loss: 1.1837, Validation Accuracy: 72.75%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.06it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 41, Validation Loss: 1.2959, Validation Accuracy: 72.07%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.93it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 42, Validation Loss: 1.4700, Validation Accuracy: 69.86%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.37it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 43, Validation Loss: 1.5823, Validation Accuracy: 68.75%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.43it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 44, Validation Loss: 1.2544, Validation Accuracy: 72.46%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.42it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 45, Validation Loss: 1.3352, Validation Accuracy: 71.39%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.11it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 46, Validation Loss: 1.3677, Validation Accuracy: 71.22%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 23.16it/s]


Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 47, Validation Loss: 1.5490, Validation Accuracy: 69.60%


Loading model from toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth
Applying EMA


100%|██████████| 79/79 [00:03<00:00, 22.88it/s]

Test using model {'type': 'DenseNet', 'pretrained_path': 'toy/experiments/densenet_bc_40_12_baseline/densenet_bc_40_12_cifar10_baseline_best.pth', 'layers': 40, 'growth': 12, 'compression': 0.5, 'bottleneck': True, 'drop_rate': 0.0}: hidden_dim 48, Validation Loss: 1.3041, Validation Accuracy: 70.65%





