In [2]:
pip install onnxruntime

Collecting onnxruntime
  Downloading onnxruntime-1.22.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting coloredlogs (from onnxruntime)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)
Downloading onnxruntime-1.22.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (16.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.4/16.4 MB[0m [31m68.7 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hDownloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.8/86.8 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstal

In [3]:
pip install optuna

Note: you may need to restart the kernel to use updated packages.


In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import glob
import pickle
import os
from PIL import Image
import numpy as np
import onnxruntime as ort
import requests
from torch import optim
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
import optuna
from copy import deepcopy
from datetime import datetime

# ===== Configuration =====
TOKEN = "12910150"
SEED = "69713536"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MEAN = [0.2980, 0.2962, 0.2987]
STD = [0.2886, 0.2875, 0.2889]

print(f"\n{'='*50}")
print(f"Initializing Model Stealing Attack")
print(f"Timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Device: {DEVICE}")
print(f"Normalization - Mean: {MEAN}, Std: {STD}")
print(f"{'='*50}\n")

# ===== Dataset =====
# Dataset used for loading queried images and their corresponding embeddings
class TaskDataset(Dataset):
    def __init__(self, transform=None):
        self.ids = []
        self.imgs = []
        self.labels = []
        self.transform = transform

    def __getitem__(self, index):
        id_ = self.ids[index]
        img = self.imgs[index]
        if self.transform is not None:
            img = self.transform(img)
        label = self.labels[index]
        return id_, img, label

    def __len__(self):
        return len(self.ids)

torch.serialization.add_safe_globals({'TaskDataset': TaskDataset})

# Dataset class to load embeddings and match them with input images
class StealingDataset(Dataset):
    def __init__(self, pickle_dir, images):
        print(f"\n[Data] Initializing dataset from {pickle_dir}")
        print(f"[Data] Applying transforms: Resize(32), CenterCrop(32), ColorJitter, Normalize")
        self.transform = transforms.Compose([
            transforms.Resize(32),
            transforms.CenterCrop(32),
            transforms.RandomHorizontalFlip(),
            transforms.ColorJitter(0.2, 0.2, 0.2, 0.1),
            transforms.ToTensor(),
            transforms.Normalize(mean=MEAN, std=STD)
        ])
        self.data = []

        # Load embedding data from multiple pickle files
        pickle_files = sorted(glob.glob(os.path.join(pickle_dir, "out*.pickle")))
        print(f"[Data] Found {len(pickle_files)} pickle files")
        
        for i, file in enumerate(pickle_files):
            with open(file, "rb") as f:
                d = pickle.load(f)
                loaded = 0
                for idx, rep in zip(d["indices"], d["embeddings"]):
                    if idx < len(images):
                        self.data.append((images[idx], torch.tensor(rep, dtype=torch.float32)))
                        loaded += 1
                print(f"[Data] Loaded {loaded} samples from {os.path.basename(file)} ({i+1}/{len(pickle_files)})")
        
        print(f"[Data] Total samples: {len(self.data)}")

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img, emb = self.data[idx]
        img = img.convert("RGB")
        return self.transform(img), emb


# ===== Early Stopping =====
# Class for early stopping during training based on validation loss
class EarlyStopping:
    def __init__(self, patience=7, delta=0, verbose=True):
        self.patience = patience
        self.delta = delta
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.best_model = None
        self.best_loss = np.inf

    def __call__(self, loss, model):
        score = -loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.verbose:
                print(f'[EarlyStopping] Counter: {self.counter}/{self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(loss, model)
            self.counter = 0

    def save_checkpoint(self, loss, model):
        if self.verbose:
            print(f'[EarlyStopping] Loss improved ({self.best_loss:.6f} → {loss:.6f}). Saving model...')
        self.best_loss = loss
        self.best_model = deepcopy(model.state_dict())
        

# ===== Model Architecture =====
# Residual block used in the encoder
class ResidualBlock(nn.Module):
    def __init__(self, channels):
        super().__init__()
        self.conv1 = nn.Conv2d(channels, channels, 3, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(channels)
        self.conv2 = nn.Conv2d(channels, channels, 3, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(channels)
    
    def forward(self, x):
        out = F.gelu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out)) + x
        return F.gelu(out)

# Main encoder model with residual blocks and bottleneck layers
class EnhancedResNetEncoder(nn.Module):
    def __init__(self, bottleneck_width=1024, dropout_rate=0.3):
        super().__init__()
        print(f"\n[Model] Initializing encoder with:")
        print(f"  - Bottleneck width: {bottleneck_width}")
        print(f"  - Dropout rate: {dropout_rate}")
        
        self.conv_in = nn.Conv2d(3, 64, 3, padding=1, bias=False)
        self.bn_in = nn.BatchNorm2d(64)
        self.layer0 = ResidualBlock(64)
        self.layer1 = ResidualBlock(64)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.layer2 = ResidualBlock(64)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.layer3 = ResidualBlock(64)
        self.pool3 = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(dropout_rate)
        self.bottleneck = nn.Sequential(
            nn.Linear(64 * 4 * 4, bottleneck_width),
            nn.GELU(),  # Using GELU activation
            nn.Linear(bottleneck_width, 1024)
        )
        
        # Initialize weights with proper nonlinearity
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
        
        print("[Model] Architecture initialized successfully")

    def forward(self, x):
        x = F.gelu(self.bn_in(self.conv_in(x)))
        x = self.layer0(x)
        x = self.pool1(self.layer1(x))
        x = self.pool2(self.layer2(x))
        x = self.pool3(self.layer3(x))
        x = self.dropout(x)
        return self.bottleneck(torch.flatten(x, 1))

# ===== Hybrid Loss =====
# Loss function combining MSE, cosine similarity, and contrastive-like term
class HybridLoss(nn.Module):
    def __init__(self, alpha=0.7):
        super().__init__()
        print(f"\n[Loss] Initializing hybrid loss with alpha={alpha}")
        self.alpha = alpha
    
    def forward(self, pred, target):
        mse_loss = F.mse_loss(pred, target)
        cosine_loss = 1 - F.cosine_similarity(pred, target).mean()
        shuffled_target = target[torch.randperm(target.size(0))]
        contrastive_loss = F.cosine_similarity(pred, shuffled_target).mean()
        return (self.alpha * mse_loss + 
                (1-self.alpha) * cosine_loss + 
                0.1 * contrastive_loss)

# ===== Optuna Optimization =====
# Objective function for Optuna to minimize the training loss
# It trains the model with trial hyperparameters and uses early stopping
def objective(trial):
    print(f"\n{'='*50}")
    print(f"Starting Optuna Trial {trial.number}")
    print(f"{'='*50}")
    
    # Hyperparameters to tune
    config = {
        'lr': trial.suggest_float('lr', 1e-5, 1e-3, log=True),
        'batch_size': trial.suggest_categorical('batch_size', [32, 64, 128, 256]),
        'bottleneck_width': trial.suggest_categorical('bottleneck_width', [512, 1024, 2048]),
        'dropout_rate': trial.suggest_float('dropout_rate', 0.1, 0.5),
        'alpha': trial.suggest_float('alpha', 0.4, 0.9),
        'weight_decay': trial.suggest_float('weight_decay', 1e-6, 1e-4, log=True)
    }
    
    print("\n[Optuna] Suggested hyperparameters:")
    for k, v in config.items():
        print(f"  {k}: {v}")

    # Load data
    print("\n[Data] Loading dataset...")
    dataset_raw = torch.load("/kaggle/input/modelstealingpub2/ModelStealingPub.pt", weights_only=False)
    dataset = StealingDataset("/kaggle/input/embeddings-dataset", dataset_raw.imgs)
    train_loader = DataLoader(dataset, batch_size=config['batch_size'], shuffle=True, pin_memory=True)
    print(f"[Data] Batch size: {config['batch_size']}, Total batches: {len(train_loader)}")

    # Model
    print("\n[Model] Initializing model...")
    model = EnhancedResNetEncoder(
        bottleneck_width=config['bottleneck_width'],
        dropout_rate=config['dropout_rate']
    ).to(DEVICE)
    print(f"[Model] Total parameters: {sum(p.numel() for p in model.parameters()):,}")
    
    # Optimizer
    optimizer = optim.AdamW(
        model.parameters(),
        lr=config['lr'],
        weight_decay=config['weight_decay']
    )
    print(f"[Optimizer] Initialized with lr={config['lr']:.2e}, weight_decay={config['weight_decay']:.2e}")
    
    # Scheduler
    scheduler = CosineAnnealingWarmRestarts(
        optimizer,
        T_0=10,
        T_mult=1,
        eta_min=1e-6
    )
    print("[Scheduler] CosineAnnealingWarmRestarts (T_0=10, eta_min=1e-6)")
    
    criterion = HybridLoss(alpha=config['alpha'])
    early_stopping = EarlyStopping(patience=5, verbose=True)

    # Training loop
    print("\n[Training] Starting training...")
    for epoch in range(100):
        model.train()
        total_loss = 0.0
        
        for batch_idx, (inputs, targets) in enumerate(train_loader):
            inputs, targets = inputs.to(DEVICE), targets.to(DEVICE)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            total_loss += loss.item()
            
            if batch_idx % 10 == 0:
                print(f"[Training] Epoch {epoch+1:03d} | Batch {batch_idx:03d}/{len(train_loader):03d} | Current Loss: {loss.item():.6f}")
        
        scheduler.step()
        avg_loss = total_loss / len(train_loader)
        print(f"\n[Training] Epoch {epoch+1:03d} Summary:")
        print(f"  Avg Loss: {avg_loss:.6f}")
        print(f"  Current LR: {optimizer.param_groups[0]['lr']:.2e}")
        
        early_stopping(avg_loss, model)
        if early_stopping.early_stop:
            print("\n[Training] Early stopping triggered")
            break
            
        trial.report(avg_loss, epoch)
        if trial.should_prune():
            print("\n[Optuna] Trial pruned")
            raise optuna.TrialPruned()

    print(f"\n[Optuna] Trial {trial.number} completed with best loss: {early_stopping.best_loss:.6f}")
    return early_stopping.best_loss

# Runs Optuna study for hyperparameter search
def optimize_hyperparameters():
    print(f"\n{'='*50}")
    print("Starting Hyperparameter Optimization with Optuna")
    print(f"{'='*50}")
    
    study = optuna.create_study(
        direction='minimize',
        sampler=optuna.samplers.TPESampler(),
        pruner=optuna.pruners.MedianPruner()
    )
    study.optimize(objective, n_trials=20)
    
    print("\n[Optuna] Optimization completed")
    print(f"Best trial:")
    trial = study.best_trial
    print(f"  Value (loss): {trial.value:.6f}")
    print("  Params: ")
    for key, value in trial.params.items():
        print(f"    {key}: {value}")
    
    return trial.params

# ===== Final Training =====
# Trains the final model using best hyperparameters found by Optuna
# Saves the best model based on early stopping
def train_final_model(params):
    print(f"\n{'='*50}")
    print("Starting Final Model Training")
    print(f"Using parameters:")
    for k, v in params.items():
        print(f"  {k}: {v}")
    print(f"{'='*50}")
    
    # Load data
    print("\n[Data] Loading dataset...")
    dataset_raw = torch.load("/kaggle/input/modelstealingpub2/ModelStealingPub.pt", weights_only=False)
    dataset = StealingDataset("/kaggle/input/embeddings-dataset", dataset_raw.imgs)
    train_loader = DataLoader(dataset, batch_size=params['batch_size'], shuffle=True, pin_memory=True)
    print(f"[Data] Batch size: {params['batch_size']}, Total batches: {len(train_loader)}")

    # Model
    print("\n[Model] Initializing final model...")
    model = EnhancedResNetEncoder(
        bottleneck_width=params['bottleneck_width'],
        dropout_rate=params['dropout_rate']
    ).to(DEVICE)
    print(f"[Model] Total parameters: {sum(p.numel() for p in model.parameters()):,}")

    # Optimizer
    optimizer = optim.AdamW(
        model.parameters(),
        lr=params['lr'],
        weight_decay=params['weight_decay']
    )
    print(f"[Optimizer] Initialized with lr={params['lr']:.2e}, weight_decay={params['weight_decay']:.2e}")

    # Scheduler
    scheduler = CosineAnnealingWarmRestarts(
        optimizer,
        T_0=10,
        T_mult=1,
        eta_min=1e-6
    )
    print("[Scheduler] CosineAnnealingWarmRestarts (T_0=10, eta_min=1e-6)")

    criterion = HybridLoss(alpha=params['alpha'])
    early_stopping = EarlyStopping(patience=10, verbose=True)

    # Training loop
    print("\n[Training] Starting final training...")
    for epoch in range(100):
        model.train()
        total_loss = 0.0
        
        for batch_idx, (inputs, targets) in enumerate(train_loader):
            inputs, targets = inputs.to(DEVICE), targets.to(DEVICE)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            total_loss += loss.item()
            
            if batch_idx % 10 == 0:
                print(f"[Training] Epoch {epoch+1:03d} | Batch {batch_idx:03d}/{len(train_loader):03d} | Current Loss: {loss.item():.6f}")
        
        scheduler.step()
        avg_loss = total_loss / len(train_loader)
        print(f"\n[Training] Epoch {epoch+1:03d} Summary:")
        print(f"  Avg Loss: {avg_loss:.6f}")
        print(f"  Current LR: {optimizer.param_groups[0]['lr']:.2e}")
        
        early_stopping(avg_loss, model)
        if early_stopping.early_stop:
            print("\n[Training] Early stopping triggered")
            break

    model.load_state_dict(early_stopping.best_model)
    print(f"\n[Training] Final training completed with best loss: {early_stopping.best_loss:.6f}")

    # Save the final model
    model_path = "stolen_encoder_final.pt"
    torch.save({
        'model_state_dict': model.state_dict(),
        'best_loss': early_stopping.best_loss,
        'params': params
    }, model_path)
    print(f"\n[Training] Model saved to {model_path} with best loss: {early_stopping.best_loss:.6f}")
    
    return model

# ===== Export & Submit =====
# Converts trained model to ONNX format and verifies it
def export_model(model):
    print(f"\n{'='*50}")
    print("Exporting Model to ONNX")
    print(f"{'='*50}")
    
    dummy_input = torch.randn(1, 3, 32, 32).to(DEVICE)
    onnx_path = "stolen_encoder_optimized.onnx"
    
    print("\n[Export] Converting model to ONNX format...")
    torch.onnx.export(
        model,
        dummy_input,
        onnx_path,
        input_names=["x"],
        output_names=["output"],
        dynamic_axes={
            "x": {0: "batch_size"},
            "output": {0: "batch_size"}
        },
        opset_version=11
    )
    print(f"[Export] Model saved to {onnx_path}")

    # Verify
    print("\n[Export] Verifying ONNX model...")
    try:
        ort_session = ort.InferenceSession(onnx_path)
        test_input = np.random.randn(1, 3, 32, 32).astype(np.float32)
        ort_out = ort_session.run(None, {"x": test_input})[0]
        assert ort_out.shape == (1, 1024)
        print("[Export] Verification successful!")
        print(f"[Export] Output shape: {ort_out.shape}")
    except Exception as e:
        print(f"[Export] Verification failed: {str(e)}")
        raise

# Submits ONNX model to the remote evaluation server
def submit_model():
    print(f"\n{'='*50}")
    print("Submitting Model to Server")
    print(f"{'='*50}")
    
    try:
        with open("stolen_encoder_optimized.onnx", "rb") as f:
            print("[Submission] Sending model to server...")
            response = requests.post(
                "http://34.122.51.94:9090/stealing",
                files={"file": f},
                headers={"token": TOKEN, "seed": SEED}
            )
        print("[Submission] Server response:")
        print(response.json())
    except Exception as e:
        print(f"[Submission] Failed: {str(e)}")

# ===== Main Execution =====
if __name__ == "__main__":
    try:
        # Step 1: Hyperparameter optimization
        best_params = optimize_hyperparameters()
        
        # Step 2: Train final model with best params
        final_model = train_final_model(best_params)
        
        # Step 3: Export and submit
        export_model(final_model)
        # submit_model()
        
        print("\n[Main] Pipeline completed successfully!")
    except Exception as e:
        print(f"\n[Main] Error encountered: {str(e)}")
        raise

[I 2025-06-21 14:41:20,587] A new study created in memory with name: no-name-b7c1733c-1524-4aeb-87c1-bd57f4a07b79



Initializing Model Stealing Attack
Timestamp: 2025-06-21 14:41:20
Device: cuda
Normalization - Mean: [0.298, 0.2962, 0.2987], Std: [0.2886, 0.2875, 0.2889]


Starting Hyperparameter Optimization with Optuna

Starting Optuna Trial 0

[Optuna] Suggested hyperparameters:
  lr: 0.0009467904939391108
  batch_size: 32
  bottleneck_width: 512
  dropout_rate: 0.3979044543867316
  alpha: 0.5586241729095667
  weight_decay: 1.0133275210700108e-06

[Data] Loading dataset...

[Data] Initializing dataset from /kaggle/input/embeddings-dataset
[Data] Applying transforms: Resize(32), CenterCrop(32), ColorJitter, Normalize
[Data] Found 13 pickle files
[Data] Loaded 1000 samples from out1.pickle (1/13)
[Data] Loaded 1000 samples from out10.pickle (2/13)
[Data] Loaded 1000 samples from out11.pickle (3/13)
[Data] Loaded 1000 samples from out12.pickle (4/13)
[Data] Loaded 1000 samples from out13.pickle (5/13)
[Data] Loaded 1000 samples from out2.pickle (6/13)
[Data] Loaded 1000 samples from out3.pickle (7/

[I 2025-06-21 14:47:26,593] Trial 0 finished with value: 0.10012221861749668 and parameters: {'lr': 0.0009467904939391108, 'batch_size': 32, 'bottleneck_width': 512, 'dropout_rate': 0.3979044543867316, 'alpha': 0.5586241729095667, 'weight_decay': 1.0133275210700108e-06}. Best is trial 0 with value: 0.10012221861749668.



Starting Optuna Trial 1

[Optuna] Suggested hyperparameters:
  lr: 5.2595132291187947e-05
  batch_size: 64
  bottleneck_width: 1024
  dropout_rate: 0.1789694089827214
  alpha: 0.5576093850912043
  weight_decay: 4.483978503838243e-06

[Data] Loading dataset...

[Data] Initializing dataset from /kaggle/input/embeddings-dataset
[Data] Applying transforms: Resize(32), CenterCrop(32), ColorJitter, Normalize
[Data] Found 13 pickle files
[Data] Loaded 1000 samples from out1.pickle (1/13)
[Data] Loaded 1000 samples from out10.pickle (2/13)
[Data] Loaded 1000 samples from out11.pickle (3/13)
[Data] Loaded 1000 samples from out12.pickle (4/13)
[Data] Loaded 1000 samples from out13.pickle (5/13)
[Data] Loaded 1000 samples from out2.pickle (6/13)
[Data] Loaded 1000 samples from out3.pickle (7/13)
[Data] Loaded 1000 samples from out4.pickle (8/13)
[Data] Loaded 1000 samples from out5.pickle (9/13)
[Data] Loaded 1000 samples from out6.pickle (10/13)
[Data] Loaded 1000 samples from out7.pickle (11/1

[I 2025-06-21 15:09:44,465] Trial 1 finished with value: 0.10011662966480442 and parameters: {'lr': 5.2595132291187947e-05, 'batch_size': 64, 'bottleneck_width': 1024, 'dropout_rate': 0.1789694089827214, 'alpha': 0.5576093850912043, 'weight_decay': 4.483978503838243e-06}. Best is trial 1 with value: 0.10011662966480442.


[Training] Epoch 095 | Batch 200/204 | Current Loss: 0.100109

[Training] Epoch 095 Summary:
  Avg Loss: 0.100119
  Current LR: 2.68e-05
[EarlyStopping] Counter: 5/5

[Training] Early stopping triggered

[Optuna] Trial 1 completed with best loss: 0.100117

Starting Optuna Trial 2

[Optuna] Suggested hyperparameters:
  lr: 0.0005201857972779173
  batch_size: 256
  bottleneck_width: 2048
  dropout_rate: 0.17017258595011986
  alpha: 0.5489804503264961
  weight_decay: 8.950090467096336e-06

[Data] Loading dataset...

[Data] Initializing dataset from /kaggle/input/embeddings-dataset
[Data] Applying transforms: Resize(32), CenterCrop(32), ColorJitter, Normalize
[Data] Found 13 pickle files
[Data] Loaded 1000 samples from out1.pickle (1/13)
[Data] Loaded 1000 samples from out10.pickle (2/13)
[Data] Loaded 1000 samples from out11.pickle (3/13)
[Data] Loaded 1000 samples from out12.pickle (4/13)
[Data] Loaded 1000 samples from out13.pickle (5/13)
[Data] Loaded 1000 samples from out2.pickle (6/1

[I 2025-06-21 15:23:35,025] Trial 2 finished with value: 0.10014381741776186 and parameters: {'lr': 0.0005201857972779173, 'batch_size': 256, 'bottleneck_width': 2048, 'dropout_rate': 0.17017258595011986, 'alpha': 0.5489804503264961, 'weight_decay': 8.950090467096336e-06}. Best is trial 1 with value: 0.10011662966480442.


[Training] Epoch 064 | Batch 050/051 | Current Loss: 0.100160

[Training] Epoch 064 Summary:
  Avg Loss: 0.100160
  Current LR: 3.41e-04
[EarlyStopping] Counter: 5/5

[Training] Early stopping triggered

[Optuna] Trial 2 completed with best loss: 0.100144

Starting Optuna Trial 3

[Optuna] Suggested hyperparameters:
  lr: 0.0001215802890237896
  batch_size: 256
  bottleneck_width: 1024
  dropout_rate: 0.3438623105865585
  alpha: 0.8858620925213587
  weight_decay: 6.498883072157955e-06

[Data] Loading dataset...

[Data] Initializing dataset from /kaggle/input/embeddings-dataset
[Data] Applying transforms: Resize(32), CenterCrop(32), ColorJitter, Normalize
[Data] Found 13 pickle files
[Data] Loaded 1000 samples from out1.pickle (1/13)
[Data] Loaded 1000 samples from out10.pickle (2/13)
[Data] Loaded 1000 samples from out11.pickle (3/13)
[Data] Loaded 1000 samples from out12.pickle (4/13)
[Data] Loaded 1000 samples from out13.pickle (5/13)
[Data] Loaded 1000 samples from out2.pickle (6/13

[I 2025-06-21 15:45:19,861] Trial 3 finished with value: 0.10038027824724421 and parameters: {'lr': 0.0001215802890237896, 'batch_size': 256, 'bottleneck_width': 1024, 'dropout_rate': 0.3438623105865585, 'alpha': 0.8858620925213587, 'weight_decay': 6.498883072157955e-06}. Best is trial 1 with value: 0.10011662966480442.


[Training] Epoch 100 | Batch 050/051 | Current Loss: 0.100441

[Training] Epoch 100 Summary:
  Avg Loss: 0.100380
  Current LR: 1.22e-04
[EarlyStopping] Loss improved (0.100385 → 0.100380). Saving model...

[Optuna] Trial 3 completed with best loss: 0.100380

Starting Optuna Trial 4

[Optuna] Suggested hyperparameters:
  lr: 9.496433119337225e-05
  batch_size: 128
  bottleneck_width: 512
  dropout_rate: 0.20473365624673523
  alpha: 0.6536122610703325
  weight_decay: 4.61096933635227e-06

[Data] Loading dataset...

[Data] Initializing dataset from /kaggle/input/embeddings-dataset
[Data] Applying transforms: Resize(32), CenterCrop(32), ColorJitter, Normalize
[Data] Found 13 pickle files
[Data] Loaded 1000 samples from out1.pickle (1/13)
[Data] Loaded 1000 samples from out10.pickle (2/13)
[Data] Loaded 1000 samples from out11.pickle (3/13)
[Data] Loaded 1000 samples from out12.pickle (4/13)
[Data] Loaded 1000 samples from out13.pickle (5/13)
[Data] Loaded 1000 samples from out2.pickle (6/

[I 2025-06-21 16:07:23,777] Trial 4 finished with value: 0.10019232887847751 and parameters: {'lr': 9.496433119337225e-05, 'batch_size': 128, 'bottleneck_width': 512, 'dropout_rate': 0.20473365624673523, 'alpha': 0.6536122610703325, 'weight_decay': 4.61096933635227e-06}. Best is trial 1 with value: 0.10011662966480442.


[Training] Epoch 100 | Batch 100/102 | Current Loss: 0.100171

[Training] Epoch 100 Summary:
  Avg Loss: 0.100192
  Current LR: 9.50e-05
[EarlyStopping] Loss improved (0.100197 → 0.100192). Saving model...

[Optuna] Trial 4 completed with best loss: 0.100192

Starting Optuna Trial 5

[Optuna] Suggested hyperparameters:
  lr: 0.00012527124122234504
  batch_size: 256
  bottleneck_width: 1024
  dropout_rate: 0.44533386674718345
  alpha: 0.6521192029626868
  weight_decay: 1.1548197110095881e-05

[Data] Loading dataset...

[Data] Initializing dataset from /kaggle/input/embeddings-dataset
[Data] Applying transforms: Resize(32), CenterCrop(32), ColorJitter, Normalize
[Data] Found 13 pickle files
[Data] Loaded 1000 samples from out1.pickle (1/13)
[Data] Loaded 1000 samples from out10.pickle (2/13)
[Data] Loaded 1000 samples from out11.pickle (3/13)
[Data] Loaded 1000 samples from out12.pickle (4/13)
[Data] Loaded 1000 samples from out13.pickle (5/13)
[Data] Loaded 1000 samples from out2.pickle

[I 2025-06-21 16:07:39,317] Trial 5 pruned. 


[Training] Epoch 001 | Batch 050/051 | Current Loss: 0.139761

[Training] Epoch 001 Summary:
  Avg Loss: 0.327803
  Current LR: 1.22e-04
[EarlyStopping] Loss improved (inf → 0.327803). Saving model...

[Optuna] Trial pruned

Starting Optuna Trial 6

[Optuna] Suggested hyperparameters:
  lr: 0.0006171845799545735
  batch_size: 128
  bottleneck_width: 2048
  dropout_rate: 0.1725223414798908
  alpha: 0.787342942048626
  weight_decay: 6.947186900534145e-06

[Data] Loading dataset...

[Data] Initializing dataset from /kaggle/input/embeddings-dataset
[Data] Applying transforms: Resize(32), CenterCrop(32), ColorJitter, Normalize
[Data] Found 13 pickle files
[Data] Loaded 1000 samples from out1.pickle (1/13)
[Data] Loaded 1000 samples from out10.pickle (2/13)
[Data] Loaded 1000 samples from out11.pickle (3/13)
[Data] Loaded 1000 samples from out12.pickle (4/13)
[Data] Loaded 1000 samples from out13.pickle (5/13)
[Data] Loaded 1000 samples from out2.pickle (6/13)
[Data] Loaded 1000 samples from

[I 2025-06-21 16:19:50,326] Trial 6 finished with value: 0.10011763432446648 and parameters: {'lr': 0.0006171845799545735, 'batch_size': 128, 'bottleneck_width': 2048, 'dropout_rate': 0.1725223414798908, 'alpha': 0.787342942048626, 'weight_decay': 6.947186900534145e-06}. Best is trial 1 with value: 0.10011662966480442.


[Training] Epoch 054 | Batch 100/102 | Current Loss: 0.100098

[Training] Epoch 054 Summary:
  Avg Loss: 0.100139
  Current LR: 4.04e-04
[EarlyStopping] Counter: 5/5

[Training] Early stopping triggered

[Optuna] Trial 6 completed with best loss: 0.100118

Starting Optuna Trial 7

[Optuna] Suggested hyperparameters:
  lr: 2.7020501193981095e-05
  batch_size: 32
  bottleneck_width: 512
  dropout_rate: 0.37013712874403026
  alpha: 0.7584180759951304
  weight_decay: 6.253112533848764e-05

[Data] Loading dataset...

[Data] Initializing dataset from /kaggle/input/embeddings-dataset
[Data] Applying transforms: Resize(32), CenterCrop(32), ColorJitter, Normalize
[Data] Found 13 pickle files
[Data] Loaded 1000 samples from out1.pickle (1/13)
[Data] Loaded 1000 samples from out10.pickle (2/13)
[Data] Loaded 1000 samples from out11.pickle (3/13)
[Data] Loaded 1000 samples from out12.pickle (4/13)
[Data] Loaded 1000 samples from out13.pickle (5/13)
[Data] Loaded 1000 samples from out2.pickle (6/13

[I 2025-06-21 16:20:07,743] Trial 7 pruned. 



[Optuna] Trial pruned

Starting Optuna Trial 8

[Optuna] Suggested hyperparameters:
  lr: 0.000254511520589056
  batch_size: 128
  bottleneck_width: 1024
  dropout_rate: 0.41596538820277085
  alpha: 0.40669237845820655
  weight_decay: 5.3058970800461645e-05

[Data] Loading dataset...

[Data] Initializing dataset from /kaggle/input/embeddings-dataset
[Data] Applying transforms: Resize(32), CenterCrop(32), ColorJitter, Normalize
[Data] Found 13 pickle files
[Data] Loaded 1000 samples from out1.pickle (1/13)
[Data] Loaded 1000 samples from out10.pickle (2/13)
[Data] Loaded 1000 samples from out11.pickle (3/13)
[Data] Loaded 1000 samples from out12.pickle (4/13)
[Data] Loaded 1000 samples from out13.pickle (5/13)
[Data] Loaded 1000 samples from out2.pickle (6/13)
[Data] Loaded 1000 samples from out3.pickle (7/13)
[Data] Loaded 1000 samples from out4.pickle (8/13)
[Data] Loaded 1000 samples from out5.pickle (9/13)
[Data] Loaded 1000 samples from out6.pickle (10/13)
[Data] Loaded 1000 sampl

[I 2025-06-21 16:38:36,349] Trial 8 finished with value: 0.10009658994043574 and parameters: {'lr': 0.000254511520589056, 'batch_size': 128, 'bottleneck_width': 1024, 'dropout_rate': 0.41596538820277085, 'alpha': 0.40669237845820655, 'weight_decay': 5.3058970800461645e-05}. Best is trial 8 with value: 0.10009658994043574.


[Training] Epoch 085 | Batch 100/102 | Current Loss: 0.100093

[Training] Epoch 085 Summary:
  Avg Loss: 0.100105
  Current LR: 1.28e-04
[EarlyStopping] Counter: 5/5

[Training] Early stopping triggered

[Optuna] Trial 8 completed with best loss: 0.100097

Starting Optuna Trial 9

[Optuna] Suggested hyperparameters:
  lr: 5.508761126701689e-05
  batch_size: 256
  bottleneck_width: 1024
  dropout_rate: 0.48859735137649907
  alpha: 0.7150027095890954
  weight_decay: 4.4033611599766264e-05

[Data] Loading dataset...

[Data] Initializing dataset from /kaggle/input/embeddings-dataset
[Data] Applying transforms: Resize(32), CenterCrop(32), ColorJitter, Normalize
[Data] Found 13 pickle files
[Data] Loaded 1000 samples from out1.pickle (1/13)
[Data] Loaded 1000 samples from out10.pickle (2/13)
[Data] Loaded 1000 samples from out11.pickle (3/13)
[Data] Loaded 1000 samples from out12.pickle (4/13)
[Data] Loaded 1000 samples from out13.pickle (5/13)
[Data] Loaded 1000 samples from out2.pickle (6/

[I 2025-06-21 16:38:51,424] Trial 9 pruned. 


[Training] Epoch 001 | Batch 050/051 | Current Loss: 0.204330

[Training] Epoch 001 Summary:
  Avg Loss: 0.529125
  Current LR: 5.38e-05
[EarlyStopping] Loss improved (inf → 0.529125). Saving model...

[Optuna] Trial pruned

Starting Optuna Trial 10

[Optuna] Suggested hyperparameters:
  lr: 0.00032621654698148115
  batch_size: 128
  bottleneck_width: 1024
  dropout_rate: 0.25530728358474364
  alpha: 0.4057686474355913
  weight_decay: 2.834417625395888e-05

[Data] Loading dataset...

[Data] Initializing dataset from /kaggle/input/embeddings-dataset
[Data] Applying transforms: Resize(32), CenterCrop(32), ColorJitter, Normalize
[Data] Found 13 pickle files
[Data] Loaded 1000 samples from out1.pickle (1/13)
[Data] Loaded 1000 samples from out10.pickle (2/13)
[Data] Loaded 1000 samples from out11.pickle (3/13)
[Data] Loaded 1000 samples from out12.pickle (4/13)
[Data] Loaded 1000 samples from out13.pickle (5/13)
[Data] Loaded 1000 samples from out2.pickle (6/13)
[Data] Loaded 1000 samples 

[I 2025-06-21 16:52:52,927] Trial 10 finished with value: 0.10011125056474816 and parameters: {'lr': 0.00032621654698148115, 'batch_size': 128, 'bottleneck_width': 1024, 'dropout_rate': 0.25530728358474364, 'alpha': 0.4057686474355913, 'weight_decay': 2.834417625395888e-05}. Best is trial 8 with value: 0.10009658994043574.


[Training] Epoch 065 | Batch 100/102 | Current Loss: 0.100108

[Training] Epoch 065 Summary:
  Avg Loss: 0.100113
  Current LR: 1.64e-04
[EarlyStopping] Counter: 5/5

[Training] Early stopping triggered

[Optuna] Trial 10 completed with best loss: 0.100111

Starting Optuna Trial 11

[Optuna] Suggested hyperparameters:
  lr: 0.0002509917461542358
  batch_size: 128
  bottleneck_width: 1024
  dropout_rate: 0.2711189373865902
  alpha: 0.40628149224477644
  weight_decay: 2.961558479046716e-05

[Data] Loading dataset...

[Data] Initializing dataset from /kaggle/input/embeddings-dataset
[Data] Applying transforms: Resize(32), CenterCrop(32), ColorJitter, Normalize
[Data] Found 13 pickle files
[Data] Loaded 1000 samples from out1.pickle (1/13)
[Data] Loaded 1000 samples from out10.pickle (2/13)
[Data] Loaded 1000 samples from out11.pickle (3/13)
[Data] Loaded 1000 samples from out12.pickle (4/13)
[Data] Loaded 1000 samples from out13.pickle (5/13)
[Data] Loaded 1000 samples from out2.pickle (6

[I 2025-06-21 16:53:21,275] Trial 11 pruned. 


[Training] Epoch 002 | Batch 100/102 | Current Loss: 0.104919

[Training] Epoch 002 Summary:
  Avg Loss: 0.105273
  Current LR: 2.27e-04
[EarlyStopping] Loss improved (0.152522 → 0.105273). Saving model...

[Optuna] Trial pruned

Starting Optuna Trial 12

[Optuna] Suggested hyperparameters:
  lr: 0.0002877230690836362
  batch_size: 128
  bottleneck_width: 1024
  dropout_rate: 0.2816276958807796
  alpha: 0.40123484953380534
  weight_decay: 2.2385618448297324e-05

[Data] Loading dataset...

[Data] Initializing dataset from /kaggle/input/embeddings-dataset
[Data] Applying transforms: Resize(32), CenterCrop(32), ColorJitter, Normalize
[Data] Found 13 pickle files
[Data] Loaded 1000 samples from out1.pickle (1/13)
[Data] Loaded 1000 samples from out10.pickle (2/13)
[Data] Loaded 1000 samples from out11.pickle (3/13)
[Data] Loaded 1000 samples from out12.pickle (4/13)
[Data] Loaded 1000 samples from out13.pickle (5/13)
[Data] Loaded 1000 samples from out2.pickle (6/13)
[Data] Loaded 1000 sam

[I 2025-06-21 17:07:15,485] Trial 12 finished with value: 0.10011996467616044 and parameters: {'lr': 0.0002877230690836362, 'batch_size': 128, 'bottleneck_width': 1024, 'dropout_rate': 0.2816276958807796, 'alpha': 0.40123484953380534, 'weight_decay': 2.2385618448297324e-05}. Best is trial 8 with value: 0.10009658994043574.


[Training] Epoch 065 | Batch 100/102 | Current Loss: 0.100079

[Training] Epoch 065 Summary:
  Avg Loss: 0.100122
  Current LR: 1.44e-04
[EarlyStopping] Counter: 5/5

[Training] Early stopping triggered

[Optuna] Trial 12 completed with best loss: 0.100120

Starting Optuna Trial 13

[Optuna] Suggested hyperparameters:
  lr: 1.067494040100977e-05
  batch_size: 128
  bottleneck_width: 1024
  dropout_rate: 0.25168624317286553
  alpha: 0.476890906756914
  weight_decay: 9.757079467979662e-05

[Data] Loading dataset...

[Data] Initializing dataset from /kaggle/input/embeddings-dataset
[Data] Applying transforms: Resize(32), CenterCrop(32), ColorJitter, Normalize
[Data] Found 13 pickle files
[Data] Loaded 1000 samples from out1.pickle (1/13)
[Data] Loaded 1000 samples from out10.pickle (2/13)
[Data] Loaded 1000 samples from out11.pickle (3/13)
[Data] Loaded 1000 samples from out12.pickle (4/13)
[Data] Loaded 1000 samples from out13.pickle (5/13)
[Data] Loaded 1000 samples from out2.pickle (6/

[I 2025-06-21 17:07:30,809] Trial 13 pruned. 


[Training] Epoch 001 | Batch 100/102 | Current Loss: 0.311085

[Training] Epoch 001 Summary:
  Avg Loss: 0.690516
  Current LR: 1.04e-05
[EarlyStopping] Loss improved (inf → 0.690516). Saving model...

[Optuna] Trial pruned

Starting Optuna Trial 14

[Optuna] Suggested hyperparameters:
  lr: 0.0002700263092093273
  batch_size: 64
  bottleneck_width: 1024
  dropout_rate: 0.32990691085641666
  alpha: 0.4744367150124978
  weight_decay: 1.9592008352031023e-05

[Data] Loading dataset...

[Data] Initializing dataset from /kaggle/input/embeddings-dataset
[Data] Applying transforms: Resize(32), CenterCrop(32), ColorJitter, Normalize
[Data] Found 13 pickle files
[Data] Loaded 1000 samples from out1.pickle (1/13)
[Data] Loaded 1000 samples from out10.pickle (2/13)
[Data] Loaded 1000 samples from out11.pickle (3/13)
[Data] Loaded 1000 samples from out12.pickle (4/13)
[Data] Loaded 1000 samples from out13.pickle (5/13)
[Data] Loaded 1000 samples from out2.pickle (6/13)
[Data] Loaded 1000 samples f

[I 2025-06-21 17:18:02,081] Trial 14 finished with value: 0.10011583698146484 and parameters: {'lr': 0.0002700263092093273, 'batch_size': 64, 'bottleneck_width': 1024, 'dropout_rate': 0.32990691085641666, 'alpha': 0.4744367150124978, 'weight_decay': 1.9592008352031023e-05}. Best is trial 8 with value: 0.10009658994043574.


[Training] Epoch 045 | Batch 200/204 | Current Loss: 0.100155

[Training] Epoch 045 Summary:
  Avg Loss: 0.100119
  Current LR: 1.36e-04
[EarlyStopping] Counter: 5/5

[Training] Early stopping triggered

[Optuna] Trial 14 completed with best loss: 0.100116

Starting Optuna Trial 15

[Optuna] Suggested hyperparameters:
  lr: 0.0004161421207968201
  batch_size: 128
  bottleneck_width: 2048
  dropout_rate: 0.10913518963700314
  alpha: 0.4761145907143287
  weight_decay: 4.534515090977433e-05

[Data] Loading dataset...

[Data] Initializing dataset from /kaggle/input/embeddings-dataset
[Data] Applying transforms: Resize(32), CenterCrop(32), ColorJitter, Normalize
[Data] Found 13 pickle files
[Data] Loaded 1000 samples from out1.pickle (1/13)
[Data] Loaded 1000 samples from out10.pickle (2/13)
[Data] Loaded 1000 samples from out11.pickle (3/13)
[Data] Loaded 1000 samples from out12.pickle (4/13)
[Data] Loaded 1000 samples from out13.pickle (5/13)
[Data] Loaded 1000 samples from out2.pickle (6

[I 2025-06-21 17:27:48,790] Trial 15 finished with value: 0.10012004970043313 and parameters: {'lr': 0.0004161421207968201, 'batch_size': 128, 'bottleneck_width': 2048, 'dropout_rate': 0.10913518963700314, 'alpha': 0.4761145907143287, 'weight_decay': 4.534515090977433e-05}. Best is trial 8 with value: 0.10009658994043574.


[Training] Epoch 045 | Batch 100/102 | Current Loss: 0.100160

[Training] Epoch 045 Summary:
  Avg Loss: 0.100123
  Current LR: 2.09e-04
[EarlyStopping] Counter: 5/5

[Training] Early stopping triggered

[Optuna] Trial 15 completed with best loss: 0.100120

Starting Optuna Trial 16

[Optuna] Suggested hyperparameters:
  lr: 0.00021116852160432329
  batch_size: 128
  bottleneck_width: 1024
  dropout_rate: 0.4113217173328182
  alpha: 0.448443357102263
  weight_decay: 1.4953383419721361e-05

[Data] Loading dataset...

[Data] Initializing dataset from /kaggle/input/embeddings-dataset
[Data] Applying transforms: Resize(32), CenterCrop(32), ColorJitter, Normalize
[Data] Found 13 pickle files
[Data] Loaded 1000 samples from out1.pickle (1/13)
[Data] Loaded 1000 samples from out10.pickle (2/13)
[Data] Loaded 1000 samples from out11.pickle (3/13)
[Data] Loaded 1000 samples from out12.pickle (4/13)
[Data] Loaded 1000 samples from out13.pickle (5/13)
[Data] Loaded 1000 samples from out2.pickle (6

[I 2025-06-21 17:28:03,802] Trial 16 pruned. 


[Training] Epoch 001 | Batch 100/102 | Current Loss: 0.109635

[Training] Epoch 001 Summary:
  Avg Loss: 0.177752
  Current LR: 2.06e-04
[EarlyStopping] Loss improved (inf → 0.177752). Saving model...

[Optuna] Trial pruned

Starting Optuna Trial 17

[Optuna] Suggested hyperparameters:
  lr: 0.00016646983286589253
  batch_size: 128
  bottleneck_width: 1024
  dropout_rate: 0.4940204733500383
  alpha: 0.5236893016977584
  weight_decay: 7.306004493082655e-05

[Data] Loading dataset...

[Data] Initializing dataset from /kaggle/input/embeddings-dataset
[Data] Applying transforms: Resize(32), CenterCrop(32), ColorJitter, Normalize
[Data] Found 13 pickle files
[Data] Loaded 1000 samples from out1.pickle (1/13)
[Data] Loaded 1000 samples from out10.pickle (2/13)
[Data] Loaded 1000 samples from out11.pickle (3/13)
[Data] Loaded 1000 samples from out12.pickle (4/13)
[Data] Loaded 1000 samples from out13.pickle (5/13)
[Data] Loaded 1000 samples from out2.pickle (6/13)
[Data] Loaded 1000 samples f

[I 2025-06-21 17:28:18,743] Trial 17 pruned. 


[Training] Epoch 001 | Batch 100/102 | Current Loss: 0.117746

[Training] Epoch 001 Summary:
  Avg Loss: 0.205552
  Current LR: 1.62e-04
[EarlyStopping] Loss improved (inf → 0.205552). Saving model...

[Optuna] Trial pruned

Starting Optuna Trial 18

[Optuna] Suggested hyperparameters:
  lr: 0.0008632097996657894
  batch_size: 64
  bottleneck_width: 512
  dropout_rate: 0.3140554789948301
  alpha: 0.5913638011196574
  weight_decay: 3.485896826371103e-05

[Data] Loading dataset...

[Data] Initializing dataset from /kaggle/input/embeddings-dataset
[Data] Applying transforms: Resize(32), CenterCrop(32), ColorJitter, Normalize
[Data] Found 13 pickle files
[Data] Loaded 1000 samples from out1.pickle (1/13)
[Data] Loaded 1000 samples from out10.pickle (2/13)
[Data] Loaded 1000 samples from out11.pickle (3/13)
[Data] Loaded 1000 samples from out12.pickle (4/13)
[Data] Loaded 1000 samples from out13.pickle (5/13)
[Data] Loaded 1000 samples from out2.pickle (6/13)
[Data] Loaded 1000 samples from

[I 2025-06-21 17:39:06,770] Trial 18 finished with value: 0.1000964782547717 and parameters: {'lr': 0.0008632097996657894, 'batch_size': 64, 'bottleneck_width': 512, 'dropout_rate': 0.3140554789948301, 'alpha': 0.5913638011196574, 'weight_decay': 3.485896826371103e-05}. Best is trial 18 with value: 0.1000964782547717.


[Training] Epoch 045 | Batch 200/204 | Current Loss: 0.100013

[Training] Epoch 045 Summary:
  Avg Loss: 0.100116
  Current LR: 4.32e-04
[EarlyStopping] Counter: 5/5

[Training] Early stopping triggered

[Optuna] Trial 18 completed with best loss: 0.100096

Starting Optuna Trial 19

[Optuna] Suggested hyperparameters:
  lr: 0.0009587140972521137
  batch_size: 64
  bottleneck_width: 512
  dropout_rate: 0.31858741775550603
  alpha: 0.6130990679467545
  weight_decay: 1.467896189415769e-06

[Data] Loading dataset...

[Data] Initializing dataset from /kaggle/input/embeddings-dataset
[Data] Applying transforms: Resize(32), CenterCrop(32), ColorJitter, Normalize
[Data] Found 13 pickle files
[Data] Loaded 1000 samples from out1.pickle (1/13)
[Data] Loaded 1000 samples from out10.pickle (2/13)
[Data] Loaded 1000 samples from out11.pickle (3/13)
[Data] Loaded 1000 samples from out12.pickle (4/13)
[Data] Loaded 1000 samples from out13.pickle (5/13)
[Data] Loaded 1000 samples from out2.pickle (6/1

[I 2025-06-21 17:47:23,611] Trial 19 finished with value: 0.10012369365522675 and parameters: {'lr': 0.0009587140972521137, 'batch_size': 64, 'bottleneck_width': 512, 'dropout_rate': 0.31858741775550603, 'alpha': 0.6130990679467545, 'weight_decay': 1.467896189415769e-06}. Best is trial 18 with value: 0.1000964782547717.


[Training] Epoch 035 | Batch 200/204 | Current Loss: 0.100205

[Training] Epoch 035 Summary:
  Avg Loss: 0.100160
  Current LR: 4.80e-04
[EarlyStopping] Counter: 5/5

[Training] Early stopping triggered

[Optuna] Trial 19 completed with best loss: 0.100124

[Optuna] Optimization completed
Best trial:
  Value (loss): 0.100096
  Params: 
    lr: 0.0008632097996657894
    batch_size: 64
    bottleneck_width: 512
    dropout_rate: 0.3140554789948301
    alpha: 0.5913638011196574
    weight_decay: 3.485896826371103e-05

Starting Final Model Training
Using parameters:
  lr: 0.0008632097996657894
  batch_size: 64
  bottleneck_width: 512
  dropout_rate: 0.3140554789948301
  alpha: 0.5913638011196574
  weight_decay: 3.485896826371103e-05

[Data] Loading dataset...

[Data] Initializing dataset from /kaggle/input/embeddings-dataset
[Data] Applying transforms: Resize(32), CenterCrop(32), ColorJitter, Normalize
[Data] Found 13 pickle files
[Data] Loaded 1000 samples from out1.pickle (1/13)
[Data] L

In [5]:
submit_model() 


Submitting Model to Server
[Submission] Sending model to server...
[Submission] Server response:
{'L2': 4.706040859222412}
