# Probing Experiment on SSL Models

This is effectively a notebook-ized version of the old experiment runner script. It compartmentalizes everything so we don't lose state between small errors.

### Imports, Logging Setup

In [15]:
# Set environment variables before imports
import os
os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'

# Imports
import hydra
from omegaconf import DictConfig, OmegaConf
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import numpy as np
from pathlib import Path
import logging
import wandb
from typing import Dict, List, Tuple, Optional
from tqdm import tqdm


from src.models.feature_extractor import FeatureExtractor, load_feature_extractor
from src.datasets.shapenet_3dr2n2 import create_3dr2n2_dataloaders
from src.probing.probes import create_probe, ProbeTrainer
from src.probing.data_preprocessing import (
    FeatureExtractorPipeline,
    create_probing_dataloaders,
    ProbingDataset,
)
from src.probing.metrics import (
    compute_regression_metrics,
    compute_viewpoint_specific_metrics,
    MetricsTracker,
)
from src.analysis.layer_analysis import LayerWiseAnalyzer

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")

### Probing Setup
This class is the overarching "manager" that is responsible for the entire experiment. It contains all the functionalities required to:

- Create & setup dataloaders 
- Extract features from the frozen layers of the ViT models 
- Train MLP & Linear probes on those layers 
- Summarize results

In [16]:
class ProbingExperiment:
    """Orchestrates probing experiments"""

    def __init__(self, config: DictConfig):
        self.config = config
        # Determine device: prioritize models.device, then top-level device, then auto-detect
        device_to_use = config.models.get("device", config.get("device"))
        if device_to_use:
            self.device = device_to_use
        else:
            self.device = (
                "cuda"
                if torch.cuda.is_available()
                else "mps" if torch.backends.mps.is_available() else "cpu"
            )
        logger.info(f"Using device: {self.device}")

        # Initialize wandb
        if config.get("wandb", {}).get("enabled", False):
            wandb.init(
                project=config.wandb.project,
                entity=config.wandb.get("entity"),
                name=config.experiment.name,
                config=OmegaConf.to_container(config, resolve=True),
            )

        # Setup paths
        self.results_dir = Path(config.get("results_dir", "./results"))
        self.results_dir.mkdir(parents=True, exist_ok=True)
        self.cache_dir = Path(config.get("cache_dir", "./cache"))
        self.cache_dir.mkdir(parents=True, exist_ok=True)
        
        # Setup probe save directory
        self.probe_save_dir = self.cache_dir / "probes" / self.config.experiment.name
        self.probe_save_dir.mkdir(parents=True, exist_ok=True)

        # Initialize analyzer
        self.analyzer = LayerWiseAnalyzer(self.results_dir / config.experiment.name)

   
    def load_dataset(self) -> Tuple[DataLoader, DataLoader, DataLoader]:
        """Load the dataset"""
        subset_percentage = self.config.datasets.get("subset_percentage", None)
        return create_3dr2n2_dataloaders(
            self.config.datasets, subset_percentage=subset_percentage
        )

    def load_feature_extractor(self) -> FeatureExtractor:
        """Load and setup feature extractor"""
        model_config = self.config.models
        model_config.device = self.device
        model_config.cache_dir = str(self.cache_dir / "models")

        feature_extractor = load_feature_extractor(OmegaConf.to_container(model_config))
        logger.info(f"Loaded {model_config.model_name} feature extractor")
        return feature_extractor

    def extract_features_for_layer(
        self,
        feature_extractor: FeatureExtractor,
        train_loader: DataLoader,
        val_loader: DataLoader,
        test_loader: DataLoader,
        layer: int,
        feature_type: str,
        task_type: str,
    ) -> Tuple[ProbingDataset, ProbingDataset, ProbingDataset]:
        """Extract features for a specific layer"""
        pipeline = FeatureExtractorPipeline(
            feature_extractor=feature_extractor,
            device=self.device,
            batch_size=self.config.get("extraction_batch_size", 32),
            cache_dir=str(self.cache_dir / "features"),
        )

        experiment_name = f"{self.config.models.model_name}_{self.config.experiment.name}_layer_{layer}"

        return pipeline.create_probing_datasets(
            train_loader=train_loader,
            val_loader=val_loader,
            test_loader=test_loader,
            layers=[layer],
            feature_type=feature_type,
            task_type=task_type,
            experiment_name=experiment_name,
        )

    def run_probe_experiment(
        self,
        probe_type: str,
        train_loader: DataLoader,
        val_loader: DataLoader,
        test_loader: DataLoader,
        feature_dim: int,
        layer: int,
    ) -> Dict:
        """Run a single probe experiment"""

        logger.info(
            f"Running {probe_type} probe on layer {layer} (feature_dim: {feature_dim})"
        )

        # Get probe configuration
        probe_config = self.config.probing.get(probe_type, {})
        # Make a mutable copy for modification
        probe_config = OmegaConf.to_container(probe_config, resolve=True)

        # Create probe
        probe_config["input_dim"] = feature_dim
        probe_config["output_dim"] = self.config.probing.get("output_dim", 2)

        main_task_type = self.config.probing.get("task_type", "regression")
        if main_task_type == "viewpoint_regression":
            probe_config["task_type"] = "regression"
        elif main_task_type == "view_classification":
            probe_config["task_type"] = "classification"
        else:
            probe_config["task_type"] = main_task_type

        probe = create_probe(probe_config)

        # Setup trainer
        trainer = ProbeTrainer(probe, device=self.device)

        # Setup optimizer and scheduler
        training_config = probe_config.get("training", {})
        optimizer = self.create_optimizer(probe, training_config.get("optimizer", {}))
        scheduler = self.create_scheduler(
            optimizer, training_config.get("scheduler", {})
        )

        # Training parameters"results/phase1_dinov2_viewpoint_probing/results.json"
        epochs = training_config.get("epochs", 30)
        early_stopping_patience = training_config.get("early_stopping_patience", 15)

        metrics_tracker = MetricsTracker()
        trainer = ProbeTrainer(
            probe, device=self.device, MetricsTracker=metrics_tracker
        )

        # Check if wandb is enabled
        wandb_enabled = self.config.get("wandb", {}).get("enabled", False)

        best_model, best_val_loss = trainer.train(
            epochs,
            optimizer,
            scheduler,
            early_stopping_patience,
            train_loader,
            val_loader,
            probe_type=probe_type,
            layer=layer,
            wandb_enabled=wandb_enabled,
        )
        
        # Save the trained probe
        probe_save_dir = self.cache_dir / "probes" / self.config.experiment.name
        probe_save_dir.mkdir(parents=True, exist_ok=True)
        probe_filename = f"{probe_type}_layer_{layer}_probe.pth"
        probe_save_path = probe_save_dir / probe_filename
        
        torch.save({
            'model_state_dict': best_model,  # best_model is already a state_dict
            'probe_config': probe_config,
            'layer': layer,
            'probe_type': probe_type,
            'experiment_name': self.config.experiment.name,
            'model_name': self.config.models.model_name,
            'best_val_loss': best_val_loss,
            'feature_dim': feature_dim
        }, probe_save_path)
        
        logger.info(f"Saved {probe_type} probe for layer {layer} to {probe_save_path}")

        test_metrics = trainer.evaluate(test_loader)

        detailed_metrics = self.compute_detailed_metrics(probe, test_loader)

        total_epochs = len(metrics_tracker.get_history("train"))

        results = {
            "train_history": metrics_tracker.get_history("train"),
            "val_history": metrics_tracker.get_history("val"),
            "test_metrics": test_metrics,
            "detailed_metrics": detailed_metrics,
            "best_epoch": metrics_tracker.best_epoch,
            "total_epochs": total_epochs,
        }

        return results

    def save_probe(self, probe: nn.Module, probe_type: str, layer: int, probe_config: Dict):
        """Save the trained probe model and its configuration"""
        import json
        
        # Create filename with model name, probe type, and layer
        model_name = self.config.models.model_name
        filename = f"{model_name}_{probe_type}_layer_{layer}.pth"
        probe_path = self.probe_save_dir / filename
        
        # Save the probe state dict
        torch.save({
            'model_state_dict': probe.state_dict(),
            'probe_config': probe_config,
            'model_name': model_name,
            'probe_type': probe_type,
            'layer': layer,
            'experiment_name': self.config.experiment.name
        }, probe_path)
        
        # Also save the config as JSON
        config_filename = f"{model_name}_{probe_type}_layer_{layer}_config.json"
        config_path = self.probe_save_dir / config_filename
        
        with open(config_path, 'w') as f:
            json.dump({
                'probe_config': probe_config,
                'model_name': model_name,
                'probe_type': probe_type,
                'layer': layer,
                'experiment_name': self.config.experiment.name
            }, f, indent=2)
        
        logger.info(f"Probe saved to {probe_path}")
        logger.info(f"Probe config saved to {config_path}")

    def load_probe(self, probe_type: str, layer: int, device: Optional[str] = None) -> nn.Module:
        """Load a previously saved probe"""
        if device is None:
            device = self.device
            
        model_name = self.config.models.model_name
        filename = f"{model_name}_{probe_type}_layer_{layer}.pth"
        probe_path = self.probe_save_dir / filename
        
        if not probe_path.exists():
            raise FileNotFoundError(f"Probe not found at {probe_path}")
        
        # Load the saved data
        saved_data = torch.load(probe_path, map_location=device)
        
        # Recreate the probe using the saved config
        probe_config = saved_data['probe_config']
        probe = create_probe(probe_config)
        
        # Load the state dict
        probe.load_state_dict(saved_data['model_state_dict'])
        probe.to(device)
        
        logger.info(f"Probe loaded from {probe_path}")
        return probe

    def create_optimizer(
        self, model: nn.Module, optimizer_config: Dict
    ) -> torch.optim.Optimizer:
        """Create optimizer from config using Hydra instantiate"""
        from hydra.utils import instantiate

        # Create a copy of config and add model parameters
        optimizer_config = optimizer_config.copy()
        optimizer_config["params"] = model.parameters()

        return instantiate(optimizer_config)

    def create_scheduler(
        self, optimizer: torch.optim.Optimizer, scheduler_config: Dict
    ):
        """Create learning rate scheduler from config using Hydra instantiate"""
        if not scheduler_config:
            return None

        from hydra.utils import instantiate

        scheduler_config = scheduler_config.copy()
        scheduler_config["optimizer"] = optimizer

        return instantiate(scheduler_config)

    def compute_detailed_metrics(
        self, probe: nn.Module, test_loader: DataLoader
    ) -> Dict:
        """Compute alles metrics"""
        probe.eval()

        all_predictions = []
        all_targets = []
        all_categories = []

        with torch.no_grad():
            for batch in test_loader:
                features = batch["features"].to(self.device)
                targets = batch["targets"]

                outputs = probe(features)

                all_predictions.append(outputs.cpu())
                all_targets.append(targets)

                # Get categories if available
                if "categories" in batch:
                    all_categories.extend(batch["categories"])

        predictions = torch.cat(all_predictions, dim=0)
        targets = torch.cat(all_targets, dim=0)

        # Basic regression metrics
        metrics = compute_regression_metrics(predictions, targets, return_per_dim=True)

        # Viewpoint-specific metrics
        if predictions.shape[1] == 2:
            viewpoint_metrics = compute_viewpoint_specific_metrics(
                azimuth_pred=predictions[:, 0],
                elevation_pred=predictions[:, 1],
                azimuth_target=targets[:, 0],
                elevation_target=targets[:, 1],
            )
            metrics.update(viewpoint_metrics)

        return metrics

    def save_results(self, results: Dict) -> str:
        """Save results to disk"""
        import json

        # Create experiment directory
        exp_dir = self.results_dir / self.config.experiment.name
        exp_dir.mkdir(parents=True, exist_ok=True)

        # Save results
        results_file = exp_dir / "results.json"

        # Convert tensors to lists for JSON serialization
        serializable_results = self.make_json_serializable(results)

        combined_results = {
            "config": OmegaConf.to_container(self.config, resolve=True),
            "results": serializable_results,
        }

        with open(results_file, "w") as f:
            json.dump(combined_results, f, indent=2)

        logger.info(f"Results saved to {results_file}")
        return results_file

    def make_json_serializable(self, obj):
        """Convert object to JSON-serializable format"""
        if isinstance(obj, dict):
            return {k: self.make_json_serializable(v) for k, v in obj.items()}
        elif isinstance(obj, list):
            return [self.make_json_serializable(v) for v in obj]
        elif isinstance(obj, (torch.Tensor, np.ndarray)):
            return obj.tolist() if hasattr(obj, "tolist") else float(obj)
        elif isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        else:
            return obj



### Hydra Configuration Loading / Setup

In [17]:
from hydra import initialize, compose
from hydra.core.global_hydra import GlobalHydra
import os 
from pathlib import Path #

CONFIG_PATH = "../configs"
CONFIG_NAME = "experiment_config"

cfg: Optional[DictConfig] = None

if GlobalHydra.instance().is_initialized():
    logger.info("Clearing existing Hydra global state.")
    GlobalHydra.instance().clear()

try:
    project_root = Path(os.getcwd()).parent 
    data_dir_abs = project_root / "data"
    
    os.environ["DATA_DIR"] = str(data_dir_abs)

    logger.info(f"Initializing Hydra with config_path: '{CONFIG_PATH}'")
    
    initialize(version_base=None, config_path=CONFIG_PATH)
    
    logger.info(f"Composing configuration with config_name: '{CONFIG_NAME}'")
    
    cfg = compose(config_name=CONFIG_NAME)

except Exception as e:
    logger.error(f"Error initializing Hydra or loading configuration: {e}", exc_info=True)

if cfg:
    logger.info("Hydra configuration loaded successfully.")


2025-06-01 22:01:32,148 - __main__ - INFO - Clearing existing Hydra global state.
2025-06-01 22:01:32,149 - __main__ - INFO - Initializing Hydra with config_path: '../configs'
2025-06-01 22:01:32,161 - __main__ - INFO - Composing configuration with config_name: 'experiment_config'
2025-06-01 22:01:32,232 - __main__ - INFO - Hydra configuration loaded successfully.


## Running the Experiment
The following code uses the above configurations and utility functions to run the actual experiment.

In [18]:
results = None
logger.info("Starting experiment execution")
experiment = ProbingExperiment(cfg)
    

2025-06-01 22:01:32,256 - __main__ - INFO - Starting experiment execution
2025-06-01 22:01:32,257 - __main__ - INFO - Using device: cuda


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01693333333338766, max=1.0)…

### Load the Feature Extractor & Dataset

In [19]:
feature_extractor = experiment.load_feature_extractor()
extraction_config = cfg.models.get("feature_extraction", {})
layers = extraction_config.get("layers", [11])
feature_type = extraction_config.get("feature_type", "cls_token")
task_type = cfg.probing.get("task_type", "viewpoint_regression")

2025-06-01 22:01:43,214 - src.models.feature_extractor - INFO - Loaded dinov2 model on cuda
2025-06-01 22:01:43,215 - __main__ - INFO - Loaded dinov2 feature extractor


In [20]:
train_loader, val_loader, test_loader = experiment.load_dataset()

100%|██████████| 30648/30648 [00:30<00:00, 995.28it/s] 


Using 5.00% of train data: 36777 samples.


100%|██████████| 6567/6567 [00:06<00:00, 1000.44it/s]


Using 5.00% of val data: 7880 samples.


100%|██████████| 6569/6569 [00:06<00:00, 992.17it/s] 


Using 5.00% of test data: 7882 samples.


### Train the Probes

In [21]:
results = {}
for layer in tqdm(layers):
    logger.info(f"Processing layer {layer}...")

    # Extract features for this layer
    train_dataset, val_dataset, test_dataset = experiment.extract_features_for_layer(
        feature_extractor,
        train_loader,
        val_loader,
        test_loader,
        layer,
        feature_type,
        task_type,
    )

    # Create probing dataloaders
    probe_train_loader, probe_val_loader, probe_test_loader = (
       create_probing_dataloaders(
            train_dataset,
            val_dataset,
            test_dataset,
            batch_size=cfg.probing.get("training", {}).get(
                "batch_size", 64
            ),
            num_workers=cfg.get("num_workers", 4),
        )
    )

    # Run probing experiments for each probe type
    layer_results = {}
    for probe_type in cfg.probing.probe_types:
        logger.info(f"Running {probe_type} probe on layer {layer}...")
        probe_results = experiment.run_probe_experiment(
            probe_type,
            probe_train_loader,
            probe_val_loader,
            probe_test_loader,
            train_dataset.features.shape[1],
            layer,
        )
        layer_results[probe_type] = probe_results

    results[f"layer_{layer}"] = layer_results

  0%|          | 0/6 [00:00<?, ?it/s]2025-06-01 22:02:27,389 - __main__ - INFO - Processing layer 2...
2025-06-01 22:02:27,389 - src.probing.data_preprocessing - INFO - Extracting features from 1149 batches...

Extracting features:   0%|          | 0/1149 [00:00<?, ?it/s]
Extracting features:   0%|          | 1/1149 [00:07<2:24:36,  7.56s/it]
Extracting features:   0%|          | 2/1149 [00:07<1:03:45,  3.34s/it]
Extracting features:   0%|          | 3/1149 [00:08<37:43,  1.98s/it]  
Extracting features:   0%|          | 4/1149 [00:08<25:48,  1.35s/it]
Extracting features:   0%|          | 5/1149 [00:09<19:00,  1.00it/s]
Extracting features:   1%|          | 6/1149 [00:09<14:54,  1.28it/s]
Extracting features:   1%|          | 7/1149 [00:09<12:19,  1.55it/s]
Extracting features:   1%|          | 8/1149 [00:10<10:43,  1.77it/s]
Extracting features:   1%|          | 9/1149 [00:10<09:34,  1.98it/s]
Extracting features:   1%|          | 10/1149 [00:10<08:57,  2.12it/s]
Extracting features:

Epoch 0: train_loss=0.1652, val_loss=0.1592



Training 2/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/30:   1%|          | 1/143 [00:08<20:37,  8.71s/it]
Training 2/30:  11%|█         | 16/143 [00:08<00:50,  2.53it/s]
Training 2/30:  23%|██▎       | 33/143 [00:08<00:17,  6.30it/s]
Training 2/30:  38%|███▊      | 54/143 [00:09<00:07, 12.64it/s]
Training 2/30:  55%|█████▍    | 78/143 [00:09<00:02, 22.25it/s]
Training 2/30: 100%|██████████| 143/143 [00:09<00:00, 14.76it/s]


Epoch 1: train_loss=0.1599, val_loss=0.1575



Training 3/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/30:   1%|          | 1/143 [00:08<20:57,  8.86s/it]
Training 3/30:   8%|▊         | 12/143 [00:08<01:10,  1.86it/s]
Training 3/30:  21%|██        | 30/143 [00:09<00:19,  5.80it/s]
Training 3/30:  37%|███▋      | 53/143 [00:09<00:07, 12.67it/s]
Training 3/30:  57%|█████▋    | 81/143 [00:09<00:02, 23.84it/s]
Training 3/30: 100%|██████████| 143/143 [00:09<00:00, 14.57it/s]


Epoch 2: train_loss=0.1586, val_loss=0.1567



Training 4/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/30:   1%|          | 1/143 [00:09<22:26,  9.48s/it]
Training 4/30:   8%|▊         | 12/143 [00:09<01:15,  1.73it/s]
Training 4/30:  22%|██▏       | 32/143 [00:09<00:18,  5.84it/s]
Training 4/30:  37%|███▋      | 53/143 [00:09<00:07, 11.71it/s]
Training 4/30:  61%|██████    | 87/143 [00:09<00:02, 24.64it/s]
Training 4/30: 100%|██████████| 143/143 [00:10<00:00, 13.65it/s]


Epoch 3: train_loss=0.1576, val_loss=0.1560



Training 5/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/30:   1%|          | 1/143 [00:09<21:26,  9.06s/it]
Training 5/30:   8%|▊         | 12/143 [00:09<01:12,  1.82it/s]
Training 5/30:  19%|█▉        | 27/143 [00:09<00:23,  5.03it/s]
Training 5/30: 100%|██████████| 143/143 [00:09<00:00, 14.52it/s]


Epoch 4: train_loss=0.1569, val_loss=0.1555



Training 6/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/30:   1%|          | 1/143 [00:08<21:03,  8.90s/it]
Training 6/30:  14%|█▍        | 20/143 [00:09<00:39,  3.11it/s]
Training 6/30:  27%|██▋       | 38/143 [00:09<00:14,  7.01it/s]
Training 6/30:  45%|████▍     | 64/143 [00:09<00:05, 14.73it/s]
Training 6/30:  66%|██████▋   | 95/143 [00:09<00:01, 27.04it/s]
Training 6/30: 100%|██████████| 143/143 [00:09<00:00, 14.59it/s]


Epoch 5: train_loss=0.1562, val_loss=0.1553



Training 7/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/30:   1%|          | 1/143 [00:08<21:12,  8.96s/it]
Training 7/30:  10%|▉         | 14/143 [00:09<01:00,  2.15it/s]
Training 7/30:  24%|██▍       | 34/143 [00:09<00:16,  6.48it/s]
Training 7/30:  38%|███▊      | 55/143 [00:09<00:06, 12.65it/s]
Training 7/30:  60%|██████    | 86/143 [00:09<00:02, 24.94it/s]
Training 7/30: 100%|██████████| 143/143 [00:09<00:00, 14.44it/s]


Epoch 6: train_loss=0.1557, val_loss=0.1550



Training 8/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/30:   1%|          | 1/143 [00:08<20:35,  8.70s/it]
Training 8/30:  10%|█         | 15/143 [00:08<00:53,  2.37it/s]
Training 8/30:  24%|██▍       | 34/143 [00:08<00:16,  6.60it/s]
Training 8/30:  39%|███▉      | 56/143 [00:09<00:06, 13.25it/s]
Training 8/30:  58%|█████▊    | 83/143 [00:09<00:02, 24.17it/s]
Training 8/30: 100%|██████████| 143/143 [00:09<00:00, 14.83it/s]


Epoch 7: train_loss=0.1554, val_loss=0.1545



Training 9/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/30:   1%|          | 1/143 [00:08<20:48,  8.79s/it]
Training 9/30:   8%|▊         | 12/143 [00:08<01:10,  1.87it/s]
Training 9/30:  22%|██▏       | 32/143 [00:08<00:17,  6.29it/s]
Training 9/30:  41%|████      | 58/143 [00:09<00:06, 14.12it/s]
Training 9/30:  63%|██████▎   | 90/143 [00:09<00:01, 27.04it/s]
Training 9/30: 100%|██████████| 143/143 [00:09<00:00, 14.74it/s]


Epoch 8: train_loss=0.1548, val_loss=0.1540



Training 10/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/30:   1%|          | 1/143 [00:09<21:40,  9.16s/it]
Training 10/30:  10%|█         | 15/143 [00:09<00:56,  2.26it/s]
Training 10/30:  24%|██▍       | 35/143 [00:09<00:16,  6.50it/s]
Training 10/30:  37%|███▋      | 53/143 [00:09<00:07, 11.66it/s]
Training 10/30:  57%|█████▋    | 81/143 [00:09<00:02, 22.57it/s]
Training 10/30: 100%|██████████| 143/143 [00:10<00:00, 14.12it/s]


Epoch 9: train_loss=0.1544, val_loss=0.1539



Training 11/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/30:   1%|          | 1/143 [00:09<21:30,  9.09s/it]
Training 11/30:   7%|▋         | 10/143 [00:09<01:28,  1.50it/s]
Training 11/30:  17%|█▋        | 25/143 [00:09<00:25,  4.71it/s]
Training 11/30:  36%|███▌      | 51/143 [00:09<00:07, 12.36it/s]
Training 11/30:  56%|█████▌    | 80/143 [00:09<00:02, 23.73it/s]
Training 11/30: 100%|██████████| 143/143 [00:10<00:00, 14.24it/s]


Epoch 10: train_loss=0.1542, val_loss=0.1535



Training 12/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/30:   1%|          | 1/143 [00:09<21:20,  9.02s/it]
Training 12/30:  10%|█         | 15/143 [00:09<00:55,  2.29it/s]
Training 12/30:  22%|██▏       | 31/143 [00:09<00:19,  5.71it/s]
Training 12/30: 100%|██████████| 143/143 [00:09<00:00, 14.69it/s]


Epoch 11: train_loss=0.1539, val_loss=0.1532



Training 13/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/30:   1%|          | 1/143 [00:08<21:13,  8.97s/it]
Training 13/30:   7%|▋         | 10/143 [00:09<01:27,  1.52it/s]
Training 13/30:  21%|██        | 30/143 [00:09<00:19,  5.86it/s]
Training 13/30:  33%|███▎      | 47/143 [00:09<00:08, 10.83it/s]
Training 13/30:  52%|█████▏    | 75/143 [00:09<00:03, 21.99it/s]
Training 13/30:  73%|███████▎  | 105/143 [00:09<00:01, 37.49it/s]
Training 13/30: 100%|██████████| 143/143 [00:09<00:00, 14.34it/s]


Epoch 12: train_loss=0.1536, val_loss=0.1528



Training 14/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/30:   1%|          | 1/143 [00:08<20:48,  8.79s/it]
Training 14/30:  10%|▉         | 14/143 [00:08<00:58,  2.19it/s]
Training 14/30:  21%|██        | 30/143 [00:09<00:19,  5.70it/s]
Training 14/30:  38%|███▊      | 55/143 [00:09<00:06, 13.23it/s]
Training 14/30:  57%|█████▋    | 82/143 [00:09<00:02, 24.05it/s]
Training 14/30: 100%|██████████| 143/143 [00:09<00:00, 14.64it/s]


Epoch 13: train_loss=0.1535, val_loss=0.1526



Training 15/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/30:   1%|          | 1/143 [00:09<22:35,  9.55s/it]
Training 15/30:  12%|█▏        | 17/143 [00:09<00:51,  2.46it/s]
Training 15/30:  23%|██▎       | 33/143 [00:09<00:19,  5.71it/s]
Training 15/30: 100%|██████████| 143/143 [00:10<00:00, 13.95it/s]


Epoch 14: train_loss=0.1530, val_loss=0.1531



Training 16/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/30:   1%|          | 1/143 [00:09<22:43,  9.60s/it]
Training 16/30:  12%|█▏        | 17/143 [00:09<00:51,  2.45it/s]
Training 16/30:  24%|██▍       | 34/143 [00:09<00:18,  5.88it/s]
Training 16/30:  40%|███▉      | 57/143 [00:09<00:07, 12.25it/s]
Training 16/30:  62%|██████▏   | 88/143 [00:10<00:02, 23.87it/s]
Training 16/30: 100%|██████████| 143/143 [00:10<00:00, 13.61it/s]


Epoch 15: train_loss=0.1530, val_loss=0.1524



Training 17/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/30:   1%|          | 1/143 [00:08<21:13,  8.97s/it]
Training 17/30:  11%|█         | 16/143 [00:09<00:51,  2.46it/s]
Training 17/30:  27%|██▋       | 38/143 [00:09<00:14,  7.22it/s]
Training 17/30:  43%|████▎     | 62/143 [00:09<00:05, 14.27it/s]
Training 17/30:  66%|██████▌   | 94/143 [00:09<00:01, 26.95it/s]
Training 17/30: 100%|██████████| 143/143 [00:09<00:00, 14.44it/s]


Epoch 16: train_loss=0.1527, val_loss=0.1521



Training 18/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/30:   1%|          | 1/143 [00:08<21:15,  8.98s/it]
Training 18/30:  13%|█▎        | 18/143 [00:09<00:45,  2.77it/s]
Training 18/30:  29%|██▉       | 42/143 [00:09<00:12,  7.96it/s]
Training 18/30:  45%|████▌     | 65/143 [00:09<00:05, 14.68it/s]
Training 18/30:  64%|██████▍   | 92/143 [00:09<00:02, 25.25it/s]
Training 18/30: 100%|██████████| 143/143 [00:09<00:00, 14.45it/s]


Epoch 17: train_loss=0.1526, val_loss=0.1520



Training 19/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/30:   1%|          | 1/143 [00:09<23:01,  9.73s/it]
Training 19/30:  11%|█         | 16/143 [00:09<00:55,  2.27it/s]
Training 19/30:  24%|██▍       | 34/143 [00:09<00:18,  5.87it/s]
Training 19/30:  39%|███▉      | 56/143 [00:10<00:07, 11.88it/s]
Training 19/30:  64%|██████▎   | 91/143 [00:10<00:02, 24.92it/s]
Training 19/30: 100%|██████████| 143/143 [00:10<00:00, 13.43it/s]


Epoch 18: train_loss=0.1524, val_loss=0.1519



Training 20/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/30:   1%|          | 1/143 [00:08<20:20,  8.60s/it]
Training 20/30:  11%|█         | 16/143 [00:08<00:49,  2.57it/s]
Training 20/30:  29%|██▊       | 41/143 [00:08<00:12,  8.21it/s]
Training 20/30:  49%|████▉     | 70/143 [00:08<00:04, 17.09it/s]
Training 20/30:  72%|███████▏  | 103/143 [00:09<00:01, 30.55it/s]
Training 20/30: 100%|██████████| 143/143 [00:09<00:00, 15.06it/s]


Epoch 19: train_loss=0.1524, val_loss=0.1517



Training 21/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/30:   1%|          | 1/143 [00:08<20:02,  8.47s/it]
Training 21/30:  10%|▉         | 14/143 [00:08<00:56,  2.27it/s]
Training 21/30:  28%|██▊       | 40/143 [00:08<00:12,  8.22it/s]
Training 21/30:  45%|████▌     | 65/143 [00:08<00:04, 15.92it/s]
Training 21/30:  67%|██████▋   | 96/143 [00:08<00:01, 28.74it/s]
Training 21/30: 100%|██████████| 143/143 [00:09<00:00, 15.20it/s]


Epoch 20: train_loss=0.1523, val_loss=0.1516



Training 22/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/30:   1%|          | 1/143 [00:08<20:48,  8.79s/it]
Training 22/30:  11%|█         | 16/143 [00:08<00:50,  2.51it/s]
Training 22/30:  28%|██▊       | 40/143 [00:08<00:13,  7.81it/s]
Training 22/30:  48%|████▊     | 68/143 [00:09<00:04, 16.21it/s]
Training 22/30:  70%|██████▉   | 100/143 [00:09<00:01, 29.03it/s]
Training 22/30: 100%|██████████| 143/143 [00:09<00:00, 14.76it/s]


Epoch 21: train_loss=0.1520, val_loss=0.1518



Training 23/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/30:   1%|          | 1/143 [00:08<20:33,  8.69s/it]
Training 23/30:  10%|█         | 15/143 [00:08<00:53,  2.37it/s]
Training 23/30:  25%|██▌       | 36/143 [00:08<00:15,  7.05it/s]
Training 23/30:  42%|████▏     | 60/143 [00:09<00:05, 14.31it/s]
Training 23/30:  63%|██████▎   | 90/143 [00:09<00:02, 26.47it/s]
Training 23/30: 100%|██████████| 143/143 [00:09<00:00, 14.91it/s]


Epoch 22: train_loss=0.1519, val_loss=0.1513



Training 24/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/30:   1%|          | 1/143 [00:09<21:22,  9.03s/it]
Training 24/30:  13%|█▎        | 18/143 [00:09<00:45,  2.76it/s]
Training 24/30:  30%|███       | 43/143 [00:09<00:12,  8.13it/s]
Training 24/30:  48%|████▊     | 68/143 [00:09<00:04, 15.41it/s]
Training 24/30: 100%|██████████| 143/143 [00:09<00:00, 14.42it/s]


Epoch 23: train_loss=0.1518, val_loss=0.1513



Training 25/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/30:   1%|          | 1/143 [00:08<20:59,  8.87s/it]
Training 25/30:  12%|█▏        | 17/143 [00:08<00:47,  2.65it/s]
Training 25/30:  29%|██▉       | 42/143 [00:09<00:12,  8.12it/s]
Training 25/30:  52%|█████▏    | 75/143 [00:09<00:03, 17.97it/s]
Training 25/30: 100%|██████████| 143/143 [00:09<00:00, 14.64it/s]


Epoch 24: train_loss=0.1516, val_loss=0.1512



Training 26/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/30:   1%|          | 1/143 [00:08<20:10,  8.53s/it]
Training 26/30:  13%|█▎        | 19/143 [00:08<00:40,  3.08it/s]
Training 26/30:  29%|██▉       | 42/143 [00:08<00:12,  8.29it/s]
Training 26/30:  48%|████▊     | 69/143 [00:08<00:04, 16.60it/s]
Training 26/30:  69%|██████▉   | 99/143 [00:08<00:01, 28.85it/s]
Training 26/30: 100%|██████████| 143/143 [00:09<00:00, 15.15it/s]


Epoch 25: train_loss=0.1515, val_loss=0.1512



Training 27/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/30:   1%|          | 1/143 [00:08<20:20,  8.60s/it]
Training 27/30:  13%|█▎        | 19/143 [00:08<00:40,  3.05it/s]
Training 27/30:  27%|██▋       | 38/143 [00:08<00:14,  7.31it/s]
Training 27/30:  48%|████▊     | 69/143 [00:08<00:04, 16.83it/s]
Training 27/30: 100%|██████████| 143/143 [00:09<00:00, 15.08it/s]


Epoch 26: train_loss=0.1515, val_loss=0.1511



Training 28/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/30:   1%|          | 1/143 [00:08<20:12,  8.54s/it]
Training 28/30:  13%|█▎        | 18/143 [00:08<00:42,  2.91it/s]
Training 28/30:  31%|███▏      | 45/143 [00:08<00:10,  9.04it/s]
Training 28/30:  48%|████▊     | 69/143 [00:08<00:04, 16.36it/s]
Training 28/30:  71%|███████▏  | 102/143 [00:08<00:01, 29.95it/s]
Training 28/30: 100%|██████████| 143/143 [00:09<00:00, 15.15it/s]


Epoch 27: train_loss=0.1512, val_loss=0.1510



Training 29/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/30:   1%|          | 1/143 [00:08<20:24,  8.62s/it]
Training 29/30:  10%|▉         | 14/143 [00:08<00:57,  2.23it/s]
Training 29/30:  29%|██▊       | 41/143 [00:08<00:12,  8.31it/s]
Training 29/30:  46%|████▌     | 66/143 [00:08<00:04, 15.89it/s]
Training 29/30:  70%|██████▉   | 100/143 [00:09<00:01, 29.78it/s]
Training 29/30: 100%|██████████| 143/143 [00:09<00:00, 14.94it/s]


Epoch 28: train_loss=0.1513, val_loss=0.1513



Training 30/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/30:   1%|          | 1/143 [00:08<20:08,  8.51s/it]
Training 30/30:  11%|█         | 16/143 [00:08<00:49,  2.59it/s]
Training 30/30:  28%|██▊       | 40/143 [00:08<00:12,  8.06it/s]
Training 30/30:  45%|████▍     | 64/143 [00:08<00:05, 15.43it/s]
Training 30/30:  64%|██████▍   | 92/143 [00:08<00:01, 26.94it/s]
Training 30/30: 100%|██████████| 143/143 [00:09<00:00, 15.14it/s]


Epoch 29: train_loss=0.1512, val_loss=0.1507


2025-06-01 22:23:36,312 - __main__ - INFO - Saved linear probe for layer 2 to cache\probes\phase1_dinov2_viewpoint_probing\linear_layer_2_probe.pth
2025-06-01 22:23:54,184 - __main__ - INFO - Running mlp probe on layer 2...
2025-06-01 22:23:54,185 - __main__ - INFO - Running mlp probe on layer 2 (feature_dim: 768)

Training 1/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 1/40:   1%|          | 1/143 [00:08<20:30,  8.67s/it]
Training 1/40:   7%|▋         | 10/143 [00:08<01:24,  1.57it/s]
Training 1/40:  15%|█▍        | 21/143 [00:08<00:30,  4.03it/s]
Training 1/40:  24%|██▍       | 35/143 [00:08<00:13,  8.27it/s]
Training 1/40: 100%|██████████| 143/143 [00:09<00:00, 15.09it/s]


Epoch 0: train_loss=0.1620, val_loss=0.1553



Training 2/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/40:   1%|          | 1/143 [00:08<19:38,  8.30s/it]
Training 2/40:   8%|▊         | 11/143 [00:08<01:12,  1.81it/s]
Training 2/40:  15%|█▍        | 21/143 [00:08<00:29,  4.12it/s]
Training 2/40: 100%|██████████| 143/143 [00:09<00:00, 15.83it/s]


Epoch 1: train_loss=0.1536, val_loss=0.1506



Training 3/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/40:   1%|          | 1/143 [00:08<20:41,  8.74s/it]
Training 3/40:   6%|▋         | 9/143 [00:08<01:35,  1.40it/s]
Training 3/40:  13%|█▎        | 18/143 [00:08<00:36,  3.38it/s]
Training 3/40: 100%|██████████| 143/143 [00:09<00:00, 15.06it/s]


Epoch 2: train_loss=0.1477, val_loss=0.1445



Training 4/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/40:   1%|          | 1/143 [00:08<20:56,  8.85s/it]
Training 4/40:   7%|▋         | 10/143 [00:08<01:26,  1.54it/s]
Training 4/40:  16%|█▌        | 23/143 [00:09<00:27,  4.37it/s]
Training 4/40:  26%|██▌       | 37/143 [00:09<00:12,  8.51it/s]
Training 4/40:  38%|███▊      | 55/143 [00:09<00:05, 15.66it/s]
Training 4/40:  55%|█████▍    | 78/143 [00:09<00:02, 27.72it/s]
Training 4/40:  74%|███████▍  | 106/143 [00:09<00:00, 46.47it/s]
Training 4/40: 100%|██████████| 143/143 [00:09<00:00, 14.32it/s]


Epoch 3: train_loss=0.1425, val_loss=0.1395



Training 5/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/40:   1%|          | 1/143 [00:08<20:11,  8.53s/it]
Training 5/40:   6%|▌         | 8/143 [00:08<01:46,  1.27it/s]
Training 5/40:  13%|█▎        | 18/143 [00:08<00:35,  3.53it/s]
Training 5/40: 100%|██████████| 143/143 [00:09<00:00, 15.40it/s]


Epoch 4: train_loss=0.1439, val_loss=0.1393



Training 6/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/40:   1%|          | 1/143 [00:08<20:23,  8.61s/it]
Training 6/40:   6%|▋         | 9/143 [00:08<01:34,  1.42it/s]
Training 6/40:  15%|█▍        | 21/143 [00:08<00:29,  4.11it/s]
Training 6/40: 100%|██████████| 143/143 [00:09<00:00, 15.25it/s]


Epoch 5: train_loss=0.1374, val_loss=0.1382



Training 7/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/40:   1%|          | 1/143 [00:08<21:09,  8.94s/it]
Training 7/40:   6%|▌         | 8/143 [00:09<01:51,  1.21it/s]
Training 7/40:  15%|█▍        | 21/143 [00:09<00:30,  4.03it/s]
Training 7/40: 100%|██████████| 143/143 [00:09<00:00, 14.72it/s]


Epoch 6: train_loss=0.1380, val_loss=0.1353



Training 8/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/40:   1%|          | 1/143 [00:08<20:29,  8.66s/it]
Training 8/40:   6%|▋         | 9/143 [00:08<01:34,  1.41it/s]
Training 8/40:  15%|█▌        | 22/143 [00:08<00:28,  4.32it/s]
Training 8/40:  26%|██▌       | 37/143 [00:08<00:11,  8.87it/s]
Training 8/40: 100%|██████████| 143/143 [00:09<00:00, 15.08it/s]


Epoch 7: train_loss=0.1343, val_loss=0.1339



Training 9/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/40:   1%|          | 1/143 [00:08<20:13,  8.55s/it]
Training 9/40:   7%|▋         | 10/143 [00:08<01:23,  1.59it/s]
Training 9/40:  15%|█▌        | 22/143 [00:08<00:28,  4.30it/s]
Training 9/40:  30%|███       | 43/143 [00:08<00:09, 10.81it/s]
Training 9/40: 100%|██████████| 143/143 [00:09<00:00, 15.28it/s]


Epoch 8: train_loss=0.1337, val_loss=0.1327



Training 10/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/40:   1%|          | 1/143 [00:09<21:25,  9.05s/it]
Training 10/40:   6%|▌         | 8/143 [00:09<01:52,  1.20it/s]
Training 10/40:  14%|█▍        | 20/143 [00:09<00:32,  3.77it/s]
Training 10/40:  22%|██▏       | 31/143 [00:09<00:16,  6.95it/s]
Training 10/40: 100%|██████████| 143/143 [00:09<00:00, 14.49it/s]


Epoch 9: train_loss=0.1339, val_loss=0.1304



Training 11/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/40:   1%|          | 1/143 [00:08<21:08,  8.93s/it]
Training 11/40:   8%|▊         | 11/143 [00:09<01:18,  1.68it/s]
Training 11/40:  17%|█▋        | 24/143 [00:09<00:26,  4.49it/s]
Training 11/40:  27%|██▋       | 38/143 [00:09<00:12,  8.60it/s]
Training 11/40: 100%|██████████| 143/143 [00:09<00:00, 14.66it/s]


Epoch 10: train_loss=0.1306, val_loss=0.1294



Training 12/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/40:   1%|          | 1/143 [00:08<20:41,  8.74s/it]
Training 12/40:   5%|▍         | 7/143 [00:08<02:06,  1.08it/s]
Training 12/40:  13%|█▎        | 19/143 [00:08<00:33,  3.74it/s]
Training 12/40:  23%|██▎       | 33/143 [00:09<00:13,  7.96it/s]
Training 12/40:  38%|███▊      | 54/143 [00:09<00:05, 16.51it/s]
Training 12/40:  52%|█████▏    | 74/143 [00:09<00:02, 26.95it/s]
Training 12/40: 100%|██████████| 143/143 [00:09<00:00, 14.69it/s]


Epoch 11: train_loss=0.1303, val_loss=0.1286



Training 13/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/40:   1%|          | 1/143 [00:08<20:20,  8.60s/it]
Training 13/40:   8%|▊         | 11/143 [00:08<01:15,  1.75it/s]
Training 13/40:  14%|█▍        | 20/143 [00:08<00:32,  3.76it/s]
Training 13/40: 100%|██████████| 143/143 [00:09<00:00, 15.26it/s]


Epoch 12: train_loss=0.1303, val_loss=0.1369



Training 14/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/40:   1%|          | 1/143 [00:08<20:27,  8.65s/it]
Training 14/40:   6%|▋         | 9/143 [00:08<01:34,  1.41it/s]
Training 14/40:  15%|█▍        | 21/143 [00:08<00:29,  4.10it/s]
Training 14/40:  26%|██▌       | 37/143 [00:08<00:11,  8.96it/s]
Training 14/40:  38%|███▊      | 54/143 [00:09<00:05, 15.84it/s]
Training 14/40:  54%|█████▍    | 77/143 [00:09<00:02, 28.15it/s]
Training 14/40: 100%|██████████| 143/143 [00:09<00:00, 14.80it/s]


Epoch 13: train_loss=0.1280, val_loss=0.1273



Training 15/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/40:   1%|          | 1/143 [00:08<20:12,  8.54s/it]
Training 15/40:   6%|▌         | 8/143 [00:08<01:46,  1.27it/s]
Training 15/40:  14%|█▍        | 20/143 [00:08<00:30,  3.99it/s]
Training 15/40:  24%|██▍       | 34/143 [00:08<00:13,  8.29it/s]
Training 15/40:  36%|███▋      | 52/143 [00:08<00:05, 15.69it/s]
Training 15/40:  50%|█████     | 72/143 [00:09<00:02, 26.33it/s]
Training 15/40:  66%|██████▋   | 95/143 [00:09<00:01, 41.79it/s]
Training 15/40: 100%|██████████| 143/143 [00:09<00:00, 14.77it/s]


Epoch 14: train_loss=0.1273, val_loss=0.1264



Training 16/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/40:   1%|          | 1/143 [00:08<20:47,  8.79s/it]
Training 16/40:   7%|▋         | 10/143 [00:08<01:25,  1.55it/s]
Training 16/40:  13%|█▎        | 19/143 [00:08<00:35,  3.53it/s]
Training 16/40: 100%|██████████| 143/143 [00:09<00:00, 14.96it/s]


Epoch 15: train_loss=0.1271, val_loss=0.1256



Training 17/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/40:   1%|          | 1/143 [00:08<20:30,  8.67s/it]
Training 17/40:   6%|▋         | 9/143 [00:08<01:34,  1.41it/s]
Training 17/40:  14%|█▍        | 20/143 [00:08<00:31,  3.87it/s]
Training 17/40:  27%|██▋       | 38/143 [00:08<00:11,  9.38it/s]
Training 17/40: 100%|██████████| 143/143 [00:09<00:00, 15.09it/s]


Epoch 16: train_loss=0.1249, val_loss=0.1295



Training 18/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/40:   1%|          | 1/143 [00:08<20:38,  8.72s/it]
Training 18/40:   6%|▋         | 9/143 [00:08<01:35,  1.40it/s]
Training 18/40:  14%|█▍        | 20/143 [00:08<00:32,  3.84it/s]
Training 18/40: 100%|██████████| 143/143 [00:09<00:00, 15.09it/s]


Epoch 17: train_loss=0.1247, val_loss=0.1398



Training 19/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/40:   1%|          | 1/143 [00:08<20:04,  8.48s/it]
Training 19/40:   7%|▋         | 10/143 [00:08<01:22,  1.61it/s]
Training 19/40:  13%|█▎        | 19/143 [00:08<00:34,  3.64it/s]
Training 19/40:  46%|████▌     | 66/143 [00:08<00:04, 18.52it/s]
Training 19/40: 100%|██████████| 143/143 [00:09<00:00, 15.42it/s]


Epoch 18: train_loss=0.1251, val_loss=0.1304



Training 20/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/40:   1%|          | 1/143 [00:08<20:34,  8.70s/it]
Training 20/40:   6%|▌         | 8/143 [00:08<01:48,  1.24it/s]
Training 20/40:  13%|█▎        | 18/143 [00:08<00:36,  3.47it/s]
Training 20/40: 100%|██████████| 143/143 [00:09<00:00, 15.17it/s]


Epoch 19: train_loss=0.1239, val_loss=0.1274



Training 21/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/40:   1%|          | 1/143 [00:08<20:00,  8.46s/it]
Training 21/40:   6%|▋         | 9/143 [00:08<01:32,  1.44it/s]
Training 21/40:  15%|█▌        | 22/143 [00:08<00:27,  4.42it/s]
Training 21/40:  24%|██▍       | 35/143 [00:08<00:12,  8.43it/s]
Training 21/40:  38%|███▊      | 54/143 [00:08<00:05, 16.36it/s]
Training 21/40:  54%|█████▍    | 77/143 [00:08<00:02, 28.90it/s]
Training 21/40:  68%|██████▊   | 97/143 [00:09<00:01, 42.12it/s]
Training 21/40: 100%|██████████| 143/143 [00:09<00:00, 14.92it/s]


Epoch 20: train_loss=0.1228, val_loss=0.1233



Training 22/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/40:   1%|          | 1/143 [00:08<20:32,  8.68s/it]
Training 22/40:   7%|▋         | 10/143 [00:08<01:24,  1.57it/s]
Training 22/40:  13%|█▎        | 19/143 [00:08<00:34,  3.57it/s]
Training 22/40:  22%|██▏       | 32/143 [00:08<00:14,  7.51it/s]
Training 22/40: 100%|██████████| 143/143 [00:09<00:00, 15.01it/s]


Epoch 21: train_loss=0.1232, val_loss=0.1251



Training 23/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/40:   1%|          | 1/143 [00:09<21:26,  9.06s/it]
Training 23/40:   7%|▋         | 10/143 [00:09<01:28,  1.50it/s]
Training 23/40:  15%|█▌        | 22/143 [00:09<00:29,  4.07it/s]
Training 23/40:  26%|██▌       | 37/143 [00:09<00:12,  8.44it/s]
Training 23/40:  38%|███▊      | 54/143 [00:09<00:05, 15.05it/s]
Training 23/40:  51%|█████     | 73/143 [00:09<00:02, 24.72it/s]
Training 23/40: 100%|██████████| 143/143 [00:10<00:00, 14.19it/s]


Epoch 22: train_loss=0.1244, val_loss=0.1253



Training 24/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/40:   1%|          | 1/143 [00:08<20:47,  8.79s/it]
Training 24/40:   6%|▌         | 8/143 [00:08<01:49,  1.23it/s]
Training 24/40:  15%|█▍        | 21/143 [00:08<00:29,  4.10it/s]
Training 24/40:  23%|██▎       | 33/143 [00:09<00:14,  7.65it/s]
Training 24/40:  38%|███▊      | 55/143 [00:09<00:05, 16.58it/s]
Training 24/40:  53%|█████▎    | 76/143 [00:09<00:02, 27.51it/s]
Training 24/40:  73%|███████▎  | 104/143 [00:09<00:00, 46.24it/s]
Training 24/40: 100%|██████████| 143/143 [00:09<00:00, 14.42it/s]


Epoch 23: train_loss=0.1200, val_loss=0.1272



Training 25/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/40:   1%|          | 1/143 [00:08<20:16,  8.57s/it]
Training 25/40:   6%|▌         | 8/143 [00:08<01:46,  1.26it/s]
Training 25/40:  14%|█▍        | 20/143 [00:08<00:30,  3.97it/s]
Training 25/40:  22%|██▏       | 32/143 [00:08<00:14,  7.63it/s]
Training 25/40: 100%|██████████| 143/143 [00:09<00:00, 15.22it/s]


Epoch 24: train_loss=0.1203, val_loss=0.1263



Training 26/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/40:   1%|          | 1/143 [00:08<21:17,  9.00s/it]
Training 26/40:   5%|▍         | 7/143 [00:09<02:09,  1.05it/s]
Training 26/40:  12%|█▏        | 17/143 [00:09<00:39,  3.20it/s]
Training 26/40:  20%|██        | 29/143 [00:09<00:16,  6.72it/s]
Training 26/40:  34%|███▎      | 48/143 [00:09<00:06, 14.26it/s]
Training 26/40:  46%|████▌     | 66/143 [00:09<00:03, 23.43it/s]
Training 26/40:  61%|██████    | 87/143 [00:09<00:01, 37.06it/s]
Training 26/40: 100%|██████████| 143/143 [00:10<00:00, 14.12it/s]


Epoch 25: train_loss=0.1200, val_loss=0.1222



Training 27/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/40:   1%|          | 1/143 [00:09<21:45,  9.20s/it]
Training 27/40:   6%|▋         | 9/143 [00:09<01:40,  1.33it/s]
Training 27/40:  13%|█▎        | 18/143 [00:09<00:38,  3.22it/s]
Training 27/40: 100%|██████████| 143/143 [00:09<00:00, 14.38it/s]


Epoch 26: train_loss=0.1197, val_loss=0.1300



Training 28/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/40:   1%|          | 1/143 [00:08<20:06,  8.49s/it]
Training 28/40:   5%|▍         | 7/143 [00:08<02:02,  1.11it/s]
Training 28/40:  20%|█▉        | 28/143 [00:08<00:19,  5.92it/s]
Training 28/40: 100%|██████████| 143/143 [00:09<00:00, 15.50it/s]


Epoch 27: train_loss=0.1200, val_loss=0.1259



Training 29/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/40:   1%|          | 1/143 [00:08<20:41,  8.75s/it]
Training 29/40:   6%|▋         | 9/143 [00:08<01:35,  1.40it/s]
Training 29/40:  13%|█▎        | 18/143 [00:08<00:37,  3.38it/s]
Training 29/40: 100%|██████████| 143/143 [00:09<00:00, 15.06it/s]


Epoch 28: train_loss=0.1224, val_loss=0.1320



Training 30/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/40:   1%|          | 1/143 [00:08<20:14,  8.55s/it]
Training 30/40:   6%|▌         | 8/143 [00:08<01:46,  1.26it/s]
Training 30/40:  14%|█▍        | 20/143 [00:08<00:30,  3.98it/s]
Training 30/40:  23%|██▎       | 33/143 [00:08<00:13,  7.97it/s]
Training 30/40:  36%|███▋      | 52/143 [00:08<00:05, 15.83it/s]
Training 30/40:  51%|█████     | 73/143 [00:09<00:02, 27.12it/s]
Training 30/40:  69%|██████▉   | 99/143 [00:09<00:00, 44.92it/s]
Training 30/40: 100%|██████████| 143/143 [00:09<00:00, 14.78it/s]


Epoch 29: train_loss=0.1182, val_loss=0.1226



Training 31/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 31/40:   1%|          | 1/143 [00:08<20:48,  8.79s/it]
Training 31/40:   6%|▌         | 8/143 [00:08<01:49,  1.23it/s]
Training 31/40:  13%|█▎        | 18/143 [00:09<00:36,  3.43it/s]
Training 31/40: 100%|██████████| 143/143 [00:09<00:00, 14.96it/s]


Epoch 30: train_loss=0.1197, val_loss=0.1197



Training 32/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 32/40:   1%|          | 1/143 [00:08<20:06,  8.50s/it]
Training 32/40:   6%|▌         | 8/143 [00:08<01:46,  1.27it/s]
Training 32/40:  13%|█▎        | 19/143 [00:08<00:32,  3.78it/s]
Training 32/40:  22%|██▏       | 32/143 [00:08<00:14,  7.80it/s]
Training 32/40:  34%|███▍      | 49/143 [00:08<00:06, 14.85it/s]
Training 32/40:  48%|████▊     | 69/143 [00:09<00:02, 25.59it/s]
Training 32/40: 100%|██████████| 143/143 [00:09<00:00, 15.04it/s]


Epoch 31: train_loss=0.1207, val_loss=0.1215



Training 33/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 33/40:   1%|          | 1/143 [00:08<20:25,  8.63s/it]
Training 33/40:   6%|▌         | 8/143 [00:08<01:47,  1.25it/s]
Training 33/40:  13%|█▎        | 19/143 [00:08<00:33,  3.72it/s]
Training 33/40:  23%|██▎       | 33/143 [00:08<00:13,  7.98it/s]
Training 33/40:  38%|███▊      | 55/143 [00:09<00:05, 17.02it/s]
Training 33/40:  56%|█████▌    | 80/143 [00:09<00:02, 30.32it/s]
Training 33/40:  75%|███████▍  | 107/143 [00:09<00:00, 48.41it/s]
Training 33/40: 100%|██████████| 143/143 [00:09<00:00, 14.65it/s]


Epoch 32: train_loss=0.1205, val_loss=0.1198



Training 34/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 34/40:   1%|          | 1/143 [00:08<19:48,  8.37s/it]
Training 34/40:   7%|▋         | 10/143 [00:08<01:21,  1.63it/s]
Training 34/40:  24%|██▍       | 35/143 [00:08<00:14,  7.44it/s]
Training 34/40: 100%|██████████| 143/143 [00:09<00:00, 15.76it/s]


Epoch 33: train_loss=0.1184, val_loss=0.1207



Training 35/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 35/40:   1%|          | 1/143 [00:08<20:01,  8.46s/it]
Training 35/40:   7%|▋         | 10/143 [00:08<01:22,  1.61it/s]
Training 35/40:  16%|█▌        | 23/143 [00:08<00:26,  4.57it/s]
Training 35/40:  28%|██▊       | 40/143 [00:08<00:10,  9.86it/s]
Training 35/40: 100%|██████████| 143/143 [00:09<00:00, 15.40it/s]


Epoch 34: train_loss=0.1181, val_loss=0.1190



Training 36/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 36/40:   1%|          | 1/143 [00:08<20:28,  8.65s/it]
Training 36/40:   6%|▌         | 8/143 [00:08<01:48,  1.25it/s]
Training 36/40:  13%|█▎        | 18/143 [00:08<00:35,  3.48it/s]
Training 36/40: 100%|██████████| 143/143 [00:09<00:00, 15.22it/s]


Epoch 35: train_loss=0.1178, val_loss=0.1189



Training 37/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 37/40:   1%|          | 1/143 [00:08<19:59,  8.44s/it]
Training 37/40:   6%|▌         | 8/143 [00:08<01:45,  1.28it/s]
Training 37/40:  14%|█▍        | 20/143 [00:08<00:30,  4.02it/s]
Training 37/40: 100%|██████████| 143/143 [00:09<00:00, 15.51it/s]


Epoch 36: train_loss=0.1181, val_loss=0.1178



Training 38/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 38/40:   1%|          | 1/143 [04:53<11:34:15, 293.35s/it]
Training 38/40: 100%|██████████| 143/143 [04:53<00:00,  2.06s/it]  


Epoch 37: train_loss=0.1174, val_loss=0.1230



Training 39/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 39/40:   1%|          | 1/143 [00:10<25:56, 10.96s/it]
Training 39/40:   6%|▋         | 9/143 [00:11<01:59,  1.12it/s]
Training 39/40:  12%|█▏        | 17/143 [00:11<00:49,  2.53it/s]
Training 39/40: 100%|██████████| 143/143 [00:11<00:00, 12.15it/s]


Epoch 38: train_loss=0.1190, val_loss=0.1291



Training 40/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 40/40:   1%|          | 1/143 [00:09<22:19,  9.44s/it]
Training 40/40:   5%|▍         | 7/143 [00:09<02:16,  1.00s/it]
Training 40/40:  10%|█         | 15/143 [00:09<00:48,  2.64it/s]
Training 40/40:  18%|█▊        | 26/143 [00:09<00:20,  5.74it/s]
Training 40/40: 100%|██████████| 143/143 [00:10<00:00, 13.91it/s]


Epoch 39: train_loss=0.1178, val_loss=0.1206


2025-06-01 22:41:12,213 - __main__ - INFO - Saved mlp probe for layer 2 to cache\probes\phase1_dinov2_viewpoint_probing\mlp_layer_2_probe.pth
 17%|█▋        | 1/6 [39:03<3:15:17, 2343.43s/it]2025-06-01 22:41:30,816 - __main__ - INFO - Processing layer 4...
2025-06-01 22:41:30,817 - src.probing.data_preprocessing - INFO - Extracting features from 1149 batches...

Extracting features:   0%|          | 0/1149 [00:00<?, ?it/s]
Extracting features:   0%|          | 1/1149 [00:06<2:13:54,  7.00s/it]
Extracting features:   0%|          | 2/1149 [00:07<59:47,  3.13s/it]  
Extracting features:   0%|          | 3/1149 [00:07<36:39,  1.92s/it]
Extracting features:   0%|          | 4/1149 [00:08<24:49,  1.30s/it]
Extracting features:   0%|          | 5/1149 [00:08<18:15,  1.04it/s]
Extracting features:   1%|          | 6/1149 [00:08<14:22,  1.32it/s]
Extracting features:   1%|          | 7/1149 [00:09<12:00,  1.58it/s]
Extracting features:   1%|          | 8/1149 [00:09<10:15,  1.85it/s]
Extractin

Epoch 0: train_loss=0.1600, val_loss=0.1550



Training 2/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/30:   1%|          | 1/143 [00:08<20:06,  8.49s/it]
Training 2/30:  15%|█▍        | 21/143 [00:08<00:35,  3.42it/s]
Training 2/30:  32%|███▏      | 46/143 [00:08<00:10,  9.11it/s]
Training 2/30:  52%|█████▏    | 75/143 [00:08<00:03, 18.06it/s]
Training 2/30:  75%|███████▍  | 107/143 [00:08<00:01, 31.20it/s]
Training 2/30: 100%|██████████| 143/143 [00:09<00:00, 15.23it/s]


Epoch 1: train_loss=0.1543, val_loss=0.1512



Training 3/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/30:   1%|          | 1/143 [00:08<20:20,  8.60s/it]
Training 3/30:  13%|█▎        | 19/143 [00:08<00:40,  3.05it/s]
Training 3/30:  31%|███▏      | 45/143 [00:08<00:11,  8.91it/s]
Training 3/30:  50%|████▉     | 71/143 [00:08<00:04, 16.82it/s]
Training 3/30:  71%|███████▏  | 102/143 [00:09<00:01, 29.43it/s]
Training 3/30: 100%|██████████| 143/143 [00:09<00:00, 15.08it/s]


Epoch 2: train_loss=0.1508, val_loss=0.1483



Training 4/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/30:   1%|          | 1/143 [00:08<21:09,  8.94s/it]
Training 4/30:  15%|█▍        | 21/143 [00:09<00:37,  3.25it/s]
Training 4/30:  29%|██▊       | 41/143 [00:09<00:13,  7.57it/s]
Training 4/30:  44%|████▍     | 63/143 [00:09<00:05, 14.02it/s]
Training 4/30:  66%|██████▌   | 94/143 [00:09<00:01, 26.31it/s]
Training 4/30: 100%|██████████| 143/143 [00:09<00:00, 14.45it/s]


Epoch 3: train_loss=0.1479, val_loss=0.1459



Training 5/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/30:   1%|          | 1/143 [00:09<21:33,  9.11s/it]
Training 5/30:  13%|█▎        | 18/143 [00:09<00:45,  2.73it/s]
Training 5/30:  29%|██▉       | 42/143 [00:09<00:12,  7.85it/s]
Training 5/30:  47%|████▋     | 67/143 [00:09<00:05, 15.07it/s]
Training 5/30:  66%|██████▌   | 94/143 [00:09<00:01, 25.48it/s]
Training 5/30: 100%|██████████| 143/143 [00:10<00:00, 14.27it/s]


Epoch 4: train_loss=0.1458, val_loss=0.1439



Training 6/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/30:   1%|          | 1/143 [00:08<20:05,  8.49s/it]
Training 6/30:  18%|█▊        | 26/143 [00:08<00:27,  4.25it/s]
Training 6/30:  37%|███▋      | 53/143 [00:08<00:08, 10.38it/s]
Training 6/30:  56%|█████▌    | 80/143 [00:08<00:03, 18.66it/s]
Training 6/30: 100%|██████████| 143/143 [00:09<00:00, 15.29it/s]


Epoch 5: train_loss=0.1438, val_loss=0.1421



Training 7/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/30:   1%|          | 1/143 [00:09<22:03,  9.32s/it]
Training 7/30:  15%|█▌        | 22/143 [00:09<00:36,  3.27it/s]
Training 7/30:  33%|███▎      | 47/143 [00:09<00:11,  8.47it/s]
Training 7/30:  52%|█████▏    | 74/143 [00:09<00:04, 16.10it/s]
Training 7/30:  71%|███████   | 101/143 [00:09<00:01, 26.25it/s]
Training 7/30: 100%|██████████| 143/143 [00:10<00:00, 13.99it/s]


Epoch 6: train_loss=0.1420, val_loss=0.1408



Training 8/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/30:   1%|          | 1/143 [00:08<20:05,  8.49s/it]
Training 8/30:  17%|█▋        | 24/143 [00:08<00:30,  3.92it/s]
Training 8/30:  34%|███▍      | 49/143 [00:08<00:09,  9.59it/s]
Training 8/30:  55%|█████▌    | 79/143 [00:08<00:03, 18.85it/s]
Training 8/30: 100%|██████████| 143/143 [00:09<00:00, 15.29it/s]


Epoch 7: train_loss=0.1404, val_loss=0.1392



Training 9/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/30:   1%|          | 1/143 [00:08<20:05,  8.49s/it]
Training 9/30:  15%|█▍        | 21/143 [00:08<00:35,  3.42it/s]
Training 9/30:  33%|███▎      | 47/143 [00:08<00:10,  9.34it/s]
Training 9/30:  53%|█████▎    | 76/143 [00:08<00:03, 18.26it/s]
Training 9/30:  75%|███████▍  | 107/143 [00:08<00:01, 30.93it/s]
Training 9/30: 100%|██████████| 143/143 [00:09<00:00, 15.23it/s]


Epoch 8: train_loss=0.1393, val_loss=0.1382



Training 10/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/30:   1%|          | 1/143 [00:08<20:49,  8.80s/it]
Training 10/30:  14%|█▍        | 20/143 [00:08<00:39,  3.14it/s]
Training 10/30:  31%|███▏      | 45/143 [00:09<00:11,  8.64it/s]
Training 10/30:  52%|█████▏    | 74/143 [00:09<00:03, 17.30it/s]
Training 10/30:  72%|███████▏  | 103/143 [00:09<00:01, 28.78it/s]
Training 10/30: 100%|██████████| 143/143 [00:09<00:00, 14.73it/s]


Epoch 9: train_loss=0.1380, val_loss=0.1371



Training 11/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/30:   1%|          | 1/143 [00:08<21:15,  8.98s/it]
Training 11/30:  15%|█▍        | 21/143 [00:09<00:37,  3.24it/s]
Training 11/30:  31%|███       | 44/143 [00:09<00:12,  8.19it/s]
Training 11/30:  47%|████▋     | 67/143 [00:09<00:05, 14.90it/s]
Training 11/30:  70%|██████▉   | 100/143 [00:09<00:01, 27.94it/s]
Training 11/30: 100%|██████████| 143/143 [00:09<00:00, 14.46it/s]


Epoch 10: train_loss=0.1372, val_loss=0.1361



Training 12/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/30:   1%|          | 1/143 [00:08<20:19,  8.59s/it]
Training 12/30:  15%|█▌        | 22/143 [00:08<00:34,  3.55it/s]
Training 12/30:  33%|███▎      | 47/143 [00:08<00:10,  9.17it/s]
Training 12/30:  55%|█████▌    | 79/143 [00:08<00:03, 18.98it/s]
Training 12/30: 100%|██████████| 143/143 [00:09<00:00, 15.12it/s]


Epoch 11: train_loss=0.1364, val_loss=0.1355



Training 13/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/30:   1%|          | 1/143 [00:10<25:22, 10.72s/it]
Training 13/30:  14%|█▍        | 20/143 [00:10<00:47,  2.59it/s]
Training 13/30:  29%|██▉       | 42/143 [00:10<00:15,  6.59it/s]
Training 13/30:  47%|████▋     | 67/143 [00:11<00:05, 12.82it/s]
Training 13/30:  66%|██████▌   | 94/143 [00:11<00:02, 21.88it/s]
Training 13/30: 100%|██████████| 143/143 [00:11<00:00, 12.22it/s]


Epoch 12: train_loss=0.1354, val_loss=0.1346



Training 14/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/30:   1%|          | 1/143 [00:08<20:16,  8.57s/it]
Training 14/30:  17%|█▋        | 24/143 [00:08<00:30,  3.89it/s]
Training 14/30:  33%|███▎      | 47/143 [00:08<00:10,  9.06it/s]
Training 14/30:  54%|█████▍    | 77/143 [00:08<00:03, 18.27it/s]
Training 14/30:  75%|███████▍  | 107/143 [00:08<00:01, 30.42it/s]
Training 14/30: 100%|██████████| 143/143 [00:09<00:00, 15.08it/s]


Epoch 13: train_loss=0.1347, val_loss=0.1345



Training 15/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/30:   1%|          | 1/143 [00:08<21:06,  8.92s/it]
Training 15/30:  15%|█▌        | 22/143 [00:09<00:35,  3.42it/s]
Training 15/30:  34%|███▎      | 48/143 [00:09<00:10,  9.07it/s]
Training 15/30:  53%|█████▎    | 76/143 [00:09<00:03, 17.31it/s]
Training 15/30:  75%|███████▍  | 107/143 [00:09<00:01, 29.50it/s]
Training 15/30: 100%|██████████| 143/143 [00:09<00:00, 14.58it/s]


Epoch 14: train_loss=0.1340, val_loss=0.1334



Training 16/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/30:   1%|          | 1/143 [00:08<20:28,  8.65s/it]
Training 16/30:  15%|█▍        | 21/143 [00:08<00:36,  3.36it/s]
Training 16/30:  31%|███       | 44/143 [00:08<00:11,  8.49it/s]
Training 16/30:  53%|█████▎    | 76/143 [00:08<00:03, 18.25it/s]
Training 16/30:  74%|███████▍  | 106/143 [00:09<00:01, 30.28it/s]
Training 16/30: 100%|██████████| 143/143 [00:09<00:00, 14.93it/s]


Epoch 15: train_loss=0.1336, val_loss=0.1328



Training 17/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/30:   1%|          | 1/143 [00:08<20:26,  8.64s/it]
Training 17/30:  17%|█▋        | 24/143 [00:08<00:30,  3.85it/s]
Training 17/30:  31%|███▏      | 45/143 [00:08<00:11,  8.53it/s]
Training 17/30:  56%|█████▌    | 80/143 [00:08<00:03, 19.24it/s]
Training 17/30: 100%|██████████| 143/143 [00:09<00:00, 15.11it/s]


Epoch 16: train_loss=0.1330, val_loss=0.1324



Training 18/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/30:   1%|          | 1/143 [00:08<20:46,  8.78s/it]
Training 18/30:  18%|█▊        | 26/143 [00:08<00:28,  4.11it/s]
Training 18/30:  35%|███▍      | 50/143 [00:08<00:09,  9.38it/s]
Training 18/30:  58%|█████▊    | 83/143 [00:09<00:03, 19.28it/s]
Training 18/30: 100%|██████████| 143/143 [00:09<00:00, 14.84it/s]


Epoch 17: train_loss=0.1325, val_loss=0.1319



Training 19/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/30:   1%|          | 1/143 [00:08<20:54,  8.83s/it]
Training 19/30:  16%|█▌        | 23/143 [00:08<00:33,  3.61it/s]
Training 19/30:  31%|███▏      | 45/143 [00:09<00:11,  8.42it/s]
Training 19/30:  52%|█████▏    | 74/143 [00:09<00:04, 17.07it/s]
Training 19/30: 100%|██████████| 143/143 [00:09<00:00, 14.73it/s]


Epoch 18: train_loss=0.1322, val_loss=0.1315



Training 20/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/30:   1%|          | 1/143 [00:08<19:42,  8.33s/it]
Training 20/30:  17%|█▋        | 24/143 [00:08<00:29,  4.00it/s]
Training 20/30:  36%|███▋      | 52/143 [00:08<00:08, 10.48it/s]
Training 20/30:  57%|█████▋    | 82/143 [00:08<00:03, 19.87it/s]
Training 20/30: 100%|██████████| 143/143 [00:09<00:00, 15.57it/s]


Epoch 19: train_loss=0.1316, val_loss=0.1312



Training 21/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/30:   1%|          | 1/143 [00:08<20:31,  8.67s/it]
Training 21/30:  11%|█         | 16/143 [00:08<00:49,  2.54it/s]
Training 21/30:  30%|███       | 43/143 [00:08<00:11,  8.58it/s]
Training 21/30:  50%|████▉     | 71/143 [00:08<00:04, 17.03it/s]
Training 21/30:  69%|██████▉   | 99/143 [00:09<00:01, 28.24it/s]
Training 21/30: 100%|██████████| 143/143 [00:09<00:00, 14.87it/s]


Epoch 20: train_loss=0.1313, val_loss=0.1311



Training 22/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/30:   1%|          | 1/143 [00:08<20:18,  8.58s/it]
Training 22/30:  15%|█▌        | 22/143 [00:08<00:34,  3.55it/s]
Training 22/30:  33%|███▎      | 47/143 [00:08<00:10,  9.18it/s]
Training 22/30:  51%|█████     | 73/143 [00:08<00:04, 17.09it/s]
Training 22/30:  76%|███████▌  | 109/143 [00:08<00:01, 31.87it/s]
Training 22/30: 100%|██████████| 143/143 [00:09<00:00, 15.11it/s]


Epoch 21: train_loss=0.1309, val_loss=0.1305



Training 23/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/30:   1%|          | 1/143 [00:08<20:33,  8.68s/it]
Training 23/30:  15%|█▌        | 22/143 [00:08<00:34,  3.51it/s]
Training 23/30:  35%|███▍      | 50/143 [00:08<00:09,  9.75it/s]
Training 23/30:  53%|█████▎    | 76/143 [00:08<00:03, 17.55it/s]
Training 23/30:  76%|███████▌  | 109/143 [00:09<00:01, 30.87it/s]
Training 23/30: 100%|██████████| 143/143 [00:09<00:00, 14.96it/s]


Epoch 22: train_loss=0.1307, val_loss=0.1302



Training 24/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/30:   1%|          | 1/143 [00:08<20:55,  8.84s/it]
Training 24/30:  17%|█▋        | 24/143 [00:08<00:31,  3.77it/s]
Training 24/30:  35%|███▍      | 50/143 [00:09<00:09,  9.45it/s]
Training 24/30:  58%|█████▊    | 83/143 [00:09<00:03, 19.28it/s]
Training 24/30: 100%|██████████| 143/143 [00:09<00:00, 14.74it/s]


Epoch 23: train_loss=0.1302, val_loss=0.1301



Training 25/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/30:   1%|          | 1/143 [00:08<20:15,  8.56s/it]
Training 25/30:  12%|█▏        | 17/143 [00:08<00:45,  2.74it/s]
Training 25/30:  29%|██▉       | 42/143 [00:08<00:12,  8.40it/s]
Training 25/30:  47%|████▋     | 67/143 [00:08<00:04, 16.04it/s]
Training 25/30:  69%|██████▉   | 99/143 [00:08<00:01, 29.15it/s]
Training 25/30: 100%|██████████| 143/143 [00:09<00:00, 15.10it/s]


Epoch 24: train_loss=0.1302, val_loss=0.1297



Training 26/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/30:   1%|          | 1/143 [00:08<20:14,  8.56s/it]
Training 26/30:  14%|█▍        | 20/143 [00:08<00:38,  3.23it/s]
Training 26/30:  30%|███       | 43/143 [00:08<00:11,  8.43it/s]
Training 26/30:  44%|████▍     | 63/143 [00:08<00:05, 14.49it/s]
Training 26/30:  63%|██████▎   | 90/143 [00:08<00:02, 25.55it/s]
Training 26/30: 100%|██████████| 143/143 [00:09<00:00, 15.06it/s]


Epoch 25: train_loss=0.1299, val_loss=0.1297



Training 27/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/30:   1%|          | 1/143 [00:08<20:07,  8.50s/it]
Training 27/30:  17%|█▋        | 25/143 [00:08<00:28,  4.08it/s]
Training 27/30:  38%|███▊      | 55/143 [00:08<00:08, 10.89it/s]
Training 27/30:  61%|██████    | 87/143 [00:08<00:02, 20.71it/s]
Training 27/30: 100%|██████████| 143/143 [00:09<00:00, 15.28it/s]


Epoch 26: train_loss=0.1295, val_loss=0.1295



Training 28/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/30:   1%|          | 1/143 [00:08<20:22,  8.61s/it]
Training 28/30:  14%|█▍        | 20/143 [00:08<00:38,  3.21it/s]
Training 28/30:  31%|███       | 44/143 [00:08<00:11,  8.60it/s]
Training 28/30:  48%|████▊     | 69/143 [00:08<00:04, 16.20it/s]
Training 28/30:  71%|███████   | 101/143 [00:09<00:01, 29.21it/s]
Training 28/30: 100%|██████████| 143/143 [00:09<00:00, 14.99it/s]


Epoch 27: train_loss=0.1292, val_loss=0.1296



Training 29/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/30:   1%|          | 1/143 [00:08<19:59,  8.45s/it]
Training 29/30:  16%|█▌        | 23/143 [00:08<00:31,  3.78it/s]
Training 29/30:  35%|███▍      | 50/143 [00:08<00:09,  9.94it/s]
Training 29/30:  54%|█████▍    | 77/143 [00:08<00:03, 18.27it/s]
Training 29/30:  73%|███████▎  | 105/143 [00:08<00:01, 29.72it/s]
Training 29/30: 100%|██████████| 143/143 [00:09<00:00, 15.33it/s]


Epoch 28: train_loss=0.1291, val_loss=0.1290



Training 30/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/30:   1%|          | 1/143 [00:08<21:17,  8.99s/it]
Training 30/30:  13%|█▎        | 18/143 [00:09<00:45,  2.77it/s]
Training 30/30:  32%|███▏      | 46/143 [00:09<00:10,  8.82it/s]
Training 30/30:  52%|█████▏    | 74/143 [00:09<00:04, 17.02it/s]
Training 30/30: 100%|██████████| 143/143 [00:09<00:00, 14.50it/s]


Epoch 29: train_loss=0.1290, val_loss=0.1292


2025-06-01 23:01:05,323 - __main__ - INFO - Saved linear probe for layer 4 to cache\probes\phase1_dinov2_viewpoint_probing\linear_layer_4_probe.pth
2025-06-01 23:01:23,129 - __main__ - INFO - Running mlp probe on layer 4...
2025-06-01 23:01:23,130 - __main__ - INFO - Running mlp probe on layer 4 (feature_dim: 768)

Training 1/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 1/40:   1%|          | 1/143 [00:08<19:44,  8.34s/it]
Training 1/40:   9%|▉         | 13/143 [00:08<01:00,  2.14it/s]
Training 1/40:  17%|█▋        | 25/143 [00:08<00:24,  4.89it/s]
Training 1/40:  29%|██▉       | 42/143 [00:08<00:09, 10.22it/s]
Training 1/40:  43%|████▎     | 61/143 [00:08<00:04, 18.15it/s]
Training 1/40:  57%|█████▋    | 81/143 [00:08<00:02, 28.89it/s]
Training 1/40:  75%|███████▍  | 107/143 [00:08<00:00, 46.88it/s]
Training 1/40: 100%|██████████| 143/143 [00:09<00:00, 15.12it/s]


Epoch 0: train_loss=0.1578, val_loss=0.1440



Training 2/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/40:   1%|          | 1/143 [00:08<20:31,  8.67s/it]
Training 2/40:   9%|▉         | 13/143 [00:08<01:03,  2.06it/s]
Training 2/40:  17%|█▋        | 25/143 [00:08<00:25,  4.71it/s]
Training 2/40: 100%|██████████| 143/143 [00:09<00:00, 15.20it/s]


Epoch 1: train_loss=0.1318, val_loss=0.1259



Training 3/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/40:   1%|          | 1/143 [00:08<21:01,  8.88s/it]
Training 3/40:  10%|▉         | 14/143 [00:08<00:59,  2.17it/s]
Training 3/40:  20%|█▉        | 28/143 [00:09<00:22,  5.21it/s]
Training 3/40: 100%|██████████| 143/143 [00:09<00:00, 14.85it/s]


Epoch 2: train_loss=0.1217, val_loss=0.1266



Training 4/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/40:   1%|          | 1/143 [00:08<20:20,  8.60s/it]
Training 4/40:   9%|▉         | 13/143 [00:08<01:02,  2.08it/s]
Training 4/40:  19%|█▉        | 27/143 [00:08<00:22,  5.22it/s]
Training 4/40:  30%|███       | 43/143 [00:08<00:09, 10.08it/s]
Training 4/40: 100%|██████████| 143/143 [00:09<00:00, 15.19it/s]


Epoch 3: train_loss=0.1164, val_loss=0.1127



Training 5/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/40:   1%|          | 1/143 [00:08<20:28,  8.65s/it]
Training 5/40:   8%|▊         | 12/143 [00:08<01:08,  1.90it/s]
Training 5/40:  17%|█▋        | 25/143 [00:08<00:24,  4.80it/s]
Training 5/40:  27%|██▋       | 39/143 [00:08<00:11,  9.01it/s]
Training 5/40:  41%|████▏     | 59/143 [00:09<00:04, 17.15it/s]
Training 5/40:  57%|█████▋    | 81/143 [00:09<00:02, 28.76it/s]
Training 5/40:  73%|███████▎  | 105/143 [00:09<00:00, 44.67it/s]
Training 5/40: 100%|██████████| 143/143 [00:09<00:00, 14.63it/s]


Epoch 4: train_loss=0.1104, val_loss=0.1096



Training 6/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/40:   1%|          | 1/143 [00:08<20:45,  8.77s/it]
Training 6/40:   9%|▉         | 13/143 [00:08<01:03,  2.04it/s]
Training 6/40:  17%|█▋        | 25/143 [00:08<00:25,  4.67it/s]
Training 6/40:  29%|██▊       | 41/143 [00:09<00:10,  9.46it/s]
Training 6/40: 100%|██████████| 143/143 [00:09<00:00, 14.88it/s]


Epoch 5: train_loss=0.1075, val_loss=0.1046



Training 7/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/40:   1%|          | 1/143 [00:08<20:30,  8.67s/it]
Training 7/40:   9%|▉         | 13/143 [00:08<01:03,  2.06it/s]
Training 7/40:  20%|█▉        | 28/143 [00:08<00:21,  5.40it/s]
Training 7/40: 100%|██████████| 143/143 [00:09<00:00, 15.11it/s]


Epoch 6: train_loss=0.1046, val_loss=0.0988



Training 8/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/40:   1%|          | 1/143 [00:10<24:34, 10.39s/it]
Training 8/40:  43%|████▎     | 62/143 [00:10<00:09,  8.37it/s]
Training 8/40: 100%|██████████| 143/143 [00:11<00:00, 12.94it/s]


Epoch 7: train_loss=0.1005, val_loss=0.0978



Training 9/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/40:   1%|          | 1/143 [00:09<21:46,  9.20s/it]
Training 9/40:   7%|▋         | 10/143 [00:09<01:29,  1.48it/s]
Training 9/40:  14%|█▍        | 20/143 [00:09<00:34,  3.58it/s]
Training 9/40:  20%|██        | 29/143 [00:09<00:18,  6.13it/s]
Training 9/40:  27%|██▋       | 39/143 [00:09<00:10,  9.91it/s]
Training 9/40:  36%|███▋      | 52/143 [00:09<00:05, 16.44it/s]
Training 9/40:  51%|█████     | 73/143 [00:09<00:02, 30.36it/s]
Training 9/40:  66%|██████▌   | 94/143 [00:09<00:01, 46.98it/s]
Training 9/40: 100%|██████████| 143/143 [00:10<00:00, 13.52it/s]


Epoch 8: train_loss=0.0987, val_loss=0.0955



Training 10/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/40:   1%|          | 1/143 [00:08<21:05,  8.91s/it]
Training 10/40:   8%|▊         | 11/143 [00:09<01:18,  1.69it/s]
Training 10/40:  16%|█▌        | 23/143 [00:09<00:27,  4.29it/s]
Training 10/40:  27%|██▋       | 38/143 [00:09<00:12,  8.74it/s]
Training 10/40: 100%|██████████| 143/143 [00:09<00:00, 14.68it/s]


Epoch 9: train_loss=0.0975, val_loss=0.0997



Training 11/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/40:   1%|          | 1/143 [00:08<20:45,  8.77s/it]
Training 11/40:   8%|▊         | 11/143 [00:08<01:17,  1.71it/s]
Training 11/40:  17%|█▋        | 24/143 [00:08<00:26,  4.57it/s]
Training 11/40:  27%|██▋       | 38/143 [00:09<00:12,  8.75it/s]
Training 11/40:  39%|███▉      | 56/143 [00:09<00:05, 15.97it/s]
Training 11/40:  53%|█████▎    | 76/143 [00:09<00:02, 26.40it/s]
Training 11/40: 100%|██████████| 143/143 [00:09<00:00, 14.53it/s]


Epoch 10: train_loss=0.0968, val_loss=0.0943



Training 12/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/40:   1%|          | 1/143 [00:09<23:01,  9.73s/it]
Training 12/40:   3%|▎         | 5/143 [00:09<03:23,  1.47s/it]
Training 12/40:   9%|▉         | 13/143 [00:09<00:57,  2.28it/s]
Training 12/40:  27%|██▋       | 39/143 [00:10<00:10,  9.51it/s]
Training 12/40: 100%|██████████| 143/143 [00:10<00:00, 13.48it/s]


Epoch 11: train_loss=0.0944, val_loss=0.0951



Training 13/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/40:   1%|          | 1/143 [00:09<22:57,  9.70s/it]
Training 13/40:   6%|▋         | 9/143 [00:09<01:46,  1.26it/s]
Training 13/40:  13%|█▎        | 18/143 [00:09<00:40,  3.06it/s]
Training 13/40:  20%|██        | 29/143 [00:10<00:18,  6.07it/s]
Training 13/40: 100%|██████████| 143/143 [00:10<00:00, 13.53it/s]


Epoch 12: train_loss=0.0945, val_loss=0.1033



Training 14/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/40:   1%|          | 1/143 [00:10<24:47, 10.48s/it]
Training 14/40:   5%|▍         | 7/143 [00:10<02:31,  1.11s/it]
Training 14/40:  11%|█         | 16/143 [00:10<00:49,  2.58it/s]
Training 14/40:  29%|██▉       | 42/143 [00:10<00:10,  9.32it/s]
Training 14/40: 100%|██████████| 143/143 [00:11<00:00, 12.65it/s]


Epoch 13: train_loss=0.0939, val_loss=0.0937



Training 15/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/40:   1%|          | 1/143 [00:09<22:22,  9.45s/it]
Training 15/40:   5%|▍         | 7/143 [00:09<02:16,  1.00s/it]
Training 15/40:  11%|█         | 16/143 [00:09<00:44,  2.85it/s]
Training 15/40: 100%|██████████| 143/143 [00:10<00:00, 14.00it/s]


Epoch 14: train_loss=0.0907, val_loss=0.1054



Training 16/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/40:   1%|          | 1/143 [00:10<24:36, 10.40s/it]
Training 16/40:   4%|▍         | 6/143 [00:10<02:57,  1.30s/it]
Training 16/40:  12%|█▏        | 17/143 [00:10<00:44,  2.84it/s]
Training 16/40:  20%|██        | 29/143 [00:10<00:19,  5.93it/s]
Training 16/40:  31%|███       | 44/143 [00:10<00:08, 11.15it/s]
Training 16/40:  43%|████▎     | 62/143 [00:10<00:04, 19.43it/s]
Training 16/40: 100%|██████████| 143/143 [00:11<00:00, 12.50it/s]


Epoch 15: train_loss=0.0907, val_loss=0.0963



Training 17/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/40:   1%|          | 1/143 [00:11<28:14, 11.93s/it]
Training 17/40:   5%|▍         | 7/143 [00:12<02:51,  1.26s/it]
Training 17/40:   9%|▉         | 13/143 [00:12<01:13,  1.77it/s]
Training 17/40:  43%|████▎     | 61/143 [00:12<00:06, 12.82it/s]
Training 17/40: 100%|██████████| 143/143 [00:12<00:00, 11.17it/s]


Epoch 16: train_loss=0.0884, val_loss=0.0869



Training 18/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/40:   1%|          | 1/143 [00:10<24:36, 10.40s/it]
Training 18/40:   6%|▌         | 8/143 [00:10<02:09,  1.04it/s]
Training 18/40:  13%|█▎        | 18/143 [00:10<00:42,  2.92it/s]
Training 18/40:  20%|█▉        | 28/143 [00:10<00:20,  5.48it/s]
Training 18/40:  32%|███▏      | 46/143 [00:10<00:08, 11.81it/s]
Training 18/40:  46%|████▌     | 66/143 [00:10<00:03, 21.06it/s]
Training 18/40:  63%|██████▎   | 90/143 [00:11<00:01, 35.36it/s]
Training 18/40: 100%|██████████| 143/143 [00:11<00:00, 12.37it/s]


Epoch 17: train_loss=0.0868, val_loss=0.0863



Training 19/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/40:   1%|          | 1/143 [00:09<23:28,  9.92s/it]
Training 19/40:   5%|▍         | 7/143 [00:10<02:23,  1.05s/it]
Training 19/40:  11%|█         | 16/143 [00:10<00:46,  2.72it/s]
Training 19/40:  20%|█▉        | 28/143 [00:10<00:19,  5.94it/s]
Training 19/40:  31%|███▏      | 45/143 [00:10<00:08, 12.14it/s]
Training 19/40:  45%|████▌     | 65/143 [00:10<00:03, 21.76it/s]
Training 19/40: 100%|██████████| 143/143 [00:10<00:00, 13.05it/s]


Epoch 18: train_loss=0.0877, val_loss=0.0901



Training 20/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/40:   1%|          | 1/143 [00:10<24:43, 10.45s/it]
Training 20/40:   5%|▍         | 7/143 [00:10<02:30,  1.11s/it]
Training 20/40:  10%|▉         | 14/143 [00:10<00:58,  2.20it/s]
Training 20/40: 100%|██████████| 143/143 [00:11<00:00, 12.74it/s]


Epoch 19: train_loss=0.0874, val_loss=0.0887



Training 21/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/40:   1%|          | 1/143 [00:10<24:23, 10.31s/it]
Training 21/40:   6%|▌         | 8/143 [00:10<02:08,  1.05it/s]
Training 21/40:  14%|█▍        | 20/143 [00:10<00:37,  3.32it/s]
Training 21/40:  23%|██▎       | 33/143 [00:10<00:16,  6.68it/s]
Training 21/40:  35%|███▍      | 50/143 [00:10<00:07, 12.64it/s]
Training 21/40:  49%|████▉     | 70/143 [00:10<00:03, 21.84it/s]
Training 21/40: 100%|██████████| 143/143 [00:11<00:00, 12.60it/s]


Epoch 20: train_loss=0.0860, val_loss=0.0858



Training 22/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/40:   1%|          | 1/143 [00:10<24:57, 10.55s/it]
Training 22/40:   4%|▍         | 6/143 [00:10<03:00,  1.32s/it]
Training 22/40:   9%|▉         | 13/143 [00:10<01:03,  2.05it/s]
Training 22/40:  19%|█▉        | 27/143 [00:10<00:20,  5.63it/s]
Training 22/40:  29%|██▉       | 42/143 [00:10<00:09, 10.78it/s]
Training 22/40:  43%|████▎     | 61/143 [00:11<00:04, 19.48it/s]
Training 22/40: 100%|██████████| 143/143 [00:11<00:00, 12.29it/s]


Epoch 21: train_loss=0.0846, val_loss=0.0865



Training 23/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/40:   1%|          | 1/143 [00:08<20:57,  8.86s/it]
Training 23/40:   5%|▍         | 7/143 [00:08<02:07,  1.06it/s]
Training 23/40:  12%|█▏        | 17/143 [00:09<00:38,  3.25it/s]
Training 23/40: 100%|██████████| 143/143 [00:09<00:00, 14.83it/s]


Epoch 22: train_loss=0.0856, val_loss=0.0845



Training 24/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/40:   1%|          | 1/143 [00:09<21:31,  9.09s/it]
Training 24/40:   6%|▌         | 8/143 [00:09<01:53,  1.19it/s]
Training 24/40:  11%|█         | 16/143 [00:09<00:43,  2.89it/s]
Training 24/40: 100%|██████████| 143/143 [00:09<00:00, 14.51it/s]


Epoch 23: train_loss=0.0853, val_loss=0.0926



Training 25/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/40:   1%|          | 1/143 [00:09<21:33,  9.11s/it]
Training 25/40:   6%|▌         | 8/143 [00:09<01:53,  1.19it/s]
Training 25/40:  13%|█▎        | 18/143 [00:09<00:37,  3.32it/s]
Training 25/40: 100%|██████████| 143/143 [00:09<00:00, 14.51it/s]


Epoch 24: train_loss=0.0835, val_loss=0.0834



Training 26/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/40:   1%|          | 1/143 [00:09<23:04,  9.75s/it]
Training 26/40:   5%|▍         | 7/143 [00:09<02:20,  1.04s/it]
Training 26/40:   8%|▊         | 11/143 [00:09<01:15,  1.74it/s]
Training 26/40: 100%|██████████| 143/143 [00:10<00:00, 13.45it/s]


Epoch 25: train_loss=0.0829, val_loss=0.0872



Training 27/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/40:   1%|          | 1/143 [00:09<22:59,  9.71s/it]
Training 27/40:   8%|▊         | 12/143 [00:09<01:17,  1.69it/s]
Training 27/40:  20%|█▉        | 28/143 [00:09<00:23,  4.90it/s]
Training 27/40:  31%|███▏      | 45/143 [00:10<00:10,  9.54it/s]
Training 27/40:  46%|████▌     | 66/143 [00:10<00:04, 17.25it/s]
Training 27/40: 100%|██████████| 143/143 [00:10<00:00, 13.41it/s]


Epoch 26: train_loss=0.0830, val_loss=0.0834



Training 28/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/40:   1%|          | 1/143 [00:09<23:28,  9.92s/it]
Training 28/40:   3%|▎         | 5/143 [00:10<03:26,  1.50s/it]
Training 28/40:  10%|▉         | 14/143 [00:10<00:52,  2.44it/s]
Training 28/40:  22%|██▏       | 31/143 [00:10<00:15,  7.05it/s]
Training 28/40: 100%|██████████| 143/143 [00:10<00:00, 13.24it/s]


Epoch 27: train_loss=0.0823, val_loss=0.0824



Training 29/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/40:   1%|          | 1/143 [00:09<21:39,  9.15s/it]
Training 29/40:   8%|▊         | 11/143 [00:09<01:20,  1.64it/s]
Training 29/40:  16%|█▌        | 23/143 [00:09<00:28,  4.18it/s]
Training 29/40:  24%|██▍       | 35/143 [00:09<00:14,  7.61it/s]
Training 29/40: 100%|██████████| 143/143 [00:10<00:00, 14.27it/s]


Epoch 28: train_loss=0.0822, val_loss=0.0827



Training 30/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/40:   1%|          | 1/143 [00:09<22:28,  9.50s/it]
Training 30/40:   6%|▌         | 8/143 [00:09<01:58,  1.14it/s]
Training 30/40:  24%|██▍       | 35/143 [00:09<00:16,  6.71it/s]
Training 30/40: 100%|██████████| 143/143 [00:10<00:00, 13.99it/s]


Epoch 29: train_loss=0.0806, val_loss=0.0845



Training 31/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 31/40:   1%|          | 1/143 [00:10<23:50, 10.07s/it]
Training 31/40:   6%|▌         | 8/143 [00:10<02:05,  1.07it/s]
Training 31/40:  12%|█▏        | 17/143 [00:10<00:44,  2.81it/s]
Training 31/40: 100%|██████████| 143/143 [00:10<00:00, 13.14it/s]


Epoch 30: train_loss=0.0804, val_loss=0.0905



Training 32/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 32/40:   1%|          | 1/143 [00:08<21:11,  8.95s/it]
Training 32/40:   6%|▌         | 8/143 [00:09<01:51,  1.21it/s]
Training 32/40:  12%|█▏        | 17/143 [00:09<00:40,  3.15it/s]
Training 32/40:  31%|███       | 44/143 [00:09<00:08, 11.23it/s]
Training 32/40: 100%|██████████| 143/143 [00:09<00:00, 14.64it/s]


Epoch 31: train_loss=0.0797, val_loss=0.0804



Training 33/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 33/40:   1%|          | 1/143 [00:08<21:10,  8.94s/it]
Training 33/40:   5%|▍         | 7/143 [00:09<02:09,  1.05it/s]
Training 33/40:  12%|█▏        | 17/143 [00:09<00:39,  3.21it/s]
Training 33/40:  20%|█▉        | 28/143 [00:09<00:17,  6.45it/s]
Training 33/40: 100%|██████████| 143/143 [00:09<00:00, 14.60it/s]


Epoch 32: train_loss=0.0802, val_loss=0.0822



Training 34/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 34/40:   1%|          | 1/143 [00:09<22:13,  9.39s/it]
Training 34/40:   4%|▍         | 6/143 [00:09<02:40,  1.17s/it]
Training 34/40:  10%|▉         | 14/143 [00:09<00:51,  2.51it/s]
Training 34/40: 100%|██████████| 143/143 [00:10<00:00, 14.12it/s]


Epoch 33: train_loss=0.0791, val_loss=0.0810



Training 35/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 35/40:   1%|          | 1/143 [00:08<21:14,  8.98s/it]
Training 35/40:   4%|▍         | 6/143 [00:09<02:33,  1.12s/it]
Training 35/40:  10%|█         | 15/143 [00:09<00:45,  2.84it/s]
Training 35/40:  18%|█▊        | 26/143 [00:09<00:19,  6.08it/s]
Training 35/40: 100%|██████████| 143/143 [00:09<00:00, 14.54it/s]


Epoch 34: train_loss=0.0821, val_loss=0.0881



Training 36/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 36/40:   1%|          | 1/143 [00:08<21:10,  8.95s/it]
Training 36/40:   4%|▍         | 6/143 [00:09<02:33,  1.12s/it]
Training 36/40:  10%|▉         | 14/143 [00:09<00:49,  2.63it/s]
Training 36/40:  17%|█▋        | 25/143 [00:09<00:20,  5.88it/s]
Training 36/40: 100%|██████████| 143/143 [00:09<00:00, 14.57it/s]


Epoch 35: train_loss=0.0781, val_loss=0.0801



Training 37/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 37/40:   1%|          | 1/143 [00:09<21:26,  9.06s/it]
Training 37/40:   5%|▍         | 7/143 [00:09<02:10,  1.04it/s]
Training 37/40:  11%|█         | 16/143 [00:09<00:42,  2.97it/s]
Training 37/40: 100%|██████████| 143/143 [00:09<00:00, 14.56it/s]


Epoch 36: train_loss=0.0769, val_loss=0.0813



Training 38/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 38/40:   1%|          | 1/143 [00:09<21:25,  9.05s/it]
Training 38/40:   6%|▌         | 8/143 [00:09<01:52,  1.20it/s]
Training 38/40:  12%|█▏        | 17/143 [00:09<00:40,  3.12it/s]
Training 38/40:  20%|█▉        | 28/143 [00:09<00:18,  6.31it/s]
Training 38/40: 100%|██████████| 143/143 [00:09<00:00, 14.45it/s]


Epoch 37: train_loss=0.0789, val_loss=0.0809



Training 39/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 39/40:   1%|          | 1/143 [00:09<22:17,  9.42s/it]
Training 39/40:   5%|▍         | 7/143 [00:09<02:15,  1.00it/s]
Training 39/40:  10%|█         | 15/143 [00:09<00:48,  2.64it/s]
Training 39/40: 100%|██████████| 143/143 [00:10<00:00, 14.05it/s]


Epoch 38: train_loss=0.0776, val_loss=0.0804



Training 40/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 40/40:   1%|          | 1/143 [00:09<21:19,  9.01s/it]
Training 40/40:   5%|▍         | 7/143 [00:09<02:10,  1.04it/s]
Training 40/40:  11%|█         | 16/143 [00:09<00:42,  2.98it/s]
Training 40/40:  31%|███▏      | 45/143 [00:09<00:08, 11.63it/s]
Training 40/40: 100%|██████████| 143/143 [00:09<00:00, 14.55it/s]


Epoch 39: train_loss=0.0789, val_loss=0.0804


2025-06-01 23:28:05,491 - __main__ - INFO - Saved mlp probe for layer 4 to cache\probes\phase1_dinov2_viewpoint_probing\mlp_layer_4_probe.pth
 33%|███▎      | 2/6 [1:25:56<2:54:39, 2619.89s/it]2025-06-01 23:28:24,227 - __main__ - INFO - Processing layer 6...
2025-06-01 23:28:24,228 - src.probing.data_preprocessing - INFO - Extracting features from 1149 batches...

Extracting features:   0%|          | 0/1149 [00:00<?, ?it/s]
Extracting features:   0%|          | 1/1149 [00:07<2:17:13,  7.17s/it]
Extracting features:   0%|          | 2/1149 [00:07<1:00:29,  3.16s/it]
Extracting features:   0%|          | 3/1149 [00:07<35:51,  1.88s/it]  
Extracting features:   0%|          | 4/1149 [00:08<24:23,  1.28s/it]
Extracting features:   0%|          | 5/1149 [00:08<17:59,  1.06it/s]
Extracting features:   1%|          | 6/1149 [00:08<14:10,  1.34it/s]
Extracting features:   1%|          | 7/1149 [00:09<11:50,  1.61it/s]
Extracting features:   1%|          | 8/1149 [00:09<10:13,  1.86it/s]
Extra

Epoch 0: train_loss=0.1577, val_loss=0.1444



Training 2/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/30:   1%|          | 1/143 [00:09<21:29,  9.08s/it]
Training 2/30:   8%|▊         | 12/143 [00:09<01:12,  1.81it/s]
Training 2/30:  22%|██▏       | 32/143 [00:09<00:18,  6.10it/s]
Training 2/30:  41%|████      | 58/143 [00:09<00:06, 13.70it/s]
Training 2/30:  59%|█████▉    | 85/143 [00:09<00:02, 24.20it/s]
Training 2/30: 100%|██████████| 143/143 [00:10<00:00, 14.22it/s]


Epoch 1: train_loss=0.1399, val_loss=0.1350



Training 3/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/30:   1%|          | 1/143 [00:09<21:25,  9.05s/it]
Training 3/30:   7%|▋         | 10/143 [00:09<01:28,  1.51it/s]
Training 3/30:  17%|█▋        | 24/143 [00:09<00:26,  4.51it/s]
Training 3/30: 100%|██████████| 143/143 [00:09<00:00, 14.56it/s]


Epoch 2: train_loss=0.1330, val_loss=0.1298



Training 4/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/30:   1%|          | 1/143 [00:08<20:42,  8.75s/it]
Training 4/30:   8%|▊         | 11/143 [00:08<01:16,  1.72it/s]
Training 4/30:  18%|█▊        | 26/143 [00:08<00:23,  5.04it/s]
Training 4/30: 100%|██████████| 143/143 [00:09<00:00, 15.12it/s]


Epoch 3: train_loss=0.1284, val_loss=0.1268



Training 5/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/30:   1%|          | 1/143 [00:09<21:55,  9.27s/it]
Training 5/30:  10%|▉         | 14/143 [00:09<01:02,  2.08it/s]
Training 5/30:  21%|██        | 30/143 [00:09<00:20,  5.43it/s]
Training 5/30: 100%|██████████| 143/143 [00:10<00:00, 14.29it/s]


Epoch 4: train_loss=0.1250, val_loss=0.1234



Training 6/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/30:   1%|          | 1/143 [00:08<21:02,  8.89s/it]
Training 6/30:  10%|█         | 15/143 [00:08<00:55,  2.32it/s]
Training 6/30:  26%|██▌       | 37/143 [00:09<00:14,  7.13it/s]
Training 6/30:  41%|████      | 58/143 [00:09<00:06, 13.30it/s]
Training 6/30:  64%|██████▍   | 92/143 [00:09<00:01, 26.94it/s]
Training 6/30: 100%|██████████| 143/143 [00:09<00:00, 14.54it/s]


Epoch 5: train_loss=0.1223, val_loss=0.1214



Training 7/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/30:   1%|          | 1/143 [00:09<21:36,  9.13s/it]
Training 7/30:  10%|█         | 15/143 [00:09<00:56,  2.26it/s]
Training 7/30:  22%|██▏       | 31/143 [00:09<00:19,  5.65it/s]
Training 7/30:  36%|███▌      | 51/143 [00:09<00:08, 11.43it/s]
Training 7/30:  57%|█████▋    | 81/143 [00:09<00:02, 23.20it/s]
Training 7/30: 100%|██████████| 143/143 [00:10<00:00, 14.12it/s]


Epoch 6: train_loss=0.1204, val_loss=0.1199



Training 8/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/30:   1%|          | 1/143 [00:08<21:02,  8.89s/it]
Training 8/30:  13%|█▎        | 18/143 [00:09<00:44,  2.80it/s]
Training 8/30:  23%|██▎       | 33/143 [00:09<00:18,  6.04it/s]
Training 8/30: 100%|██████████| 143/143 [00:09<00:00, 14.84it/s]


Epoch 7: train_loss=0.1186, val_loss=0.1184



Training 9/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/30:   1%|          | 1/143 [00:08<21:09,  8.94s/it]
Training 9/30:  11%|█         | 16/143 [00:09<00:51,  2.47it/s]
Training 9/30:  22%|██▏       | 32/143 [00:09<00:18,  5.93it/s]
Training 9/30:  41%|████▏     | 59/143 [00:09<00:06, 13.96it/s]
Training 9/30:  58%|█████▊    | 83/143 [00:09<00:02, 23.38it/s]
Training 9/30: 100%|██████████| 143/143 [00:09<00:00, 14.45it/s]


Epoch 8: train_loss=0.1176, val_loss=0.1171



Training 10/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/30:   1%|          | 1/143 [00:08<20:43,  8.76s/it]
Training 10/30:   9%|▉         | 13/143 [00:08<01:03,  2.04it/s]
Training 10/30:  22%|██▏       | 32/143 [00:08<00:17,  6.25it/s]
Training 10/30:  37%|███▋      | 53/143 [00:09<00:07, 12.56it/s]
Training 10/30:  57%|█████▋    | 81/143 [00:09<00:02, 23.87it/s]
Training 10/30: 100%|██████████| 143/143 [00:09<00:00, 14.71it/s]


Epoch 9: train_loss=0.1165, val_loss=0.1162



Training 11/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/30:   1%|          | 1/143 [00:08<21:03,  8.90s/it]
Training 11/30:  12%|█▏        | 17/143 [00:08<00:47,  2.64it/s]
Training 11/30:  31%|███       | 44/143 [00:09<00:11,  8.54it/s]
Training 11/30:  45%|████▌     | 65/143 [00:09<00:05, 14.68it/s]
Training 11/30:  66%|██████▋   | 95/143 [00:09<00:01, 26.59it/s]
Training 11/30: 100%|██████████| 143/143 [00:09<00:00, 14.55it/s]


Epoch 10: train_loss=0.1153, val_loss=0.1153



Training 12/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/30:   1%|          | 1/143 [00:09<23:03,  9.74s/it]
Training 12/30:   9%|▉         | 13/143 [00:09<01:10,  1.84it/s]
Training 12/30:  22%|██▏       | 32/143 [00:09<00:19,  5.64it/s]
Training 12/30: 100%|██████████| 143/143 [00:10<00:00, 13.69it/s]


Epoch 11: train_loss=0.1148, val_loss=0.1148



Training 13/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/30:   1%|          | 1/143 [00:08<20:55,  8.84s/it]
Training 13/30:  10%|▉         | 14/143 [00:08<00:59,  2.18it/s]
Training 13/30:  22%|██▏       | 31/143 [00:09<00:19,  5.89it/s]
Training 13/30:  34%|███▎      | 48/143 [00:09<00:08, 10.93it/s]
Training 13/30:  57%|█████▋    | 81/143 [00:09<00:02, 24.30it/s]
Training 13/30: 100%|██████████| 143/143 [00:09<00:00, 14.53it/s]


Epoch 12: train_loss=0.1139, val_loss=0.1142



Training 14/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/30:   1%|          | 1/143 [00:10<23:53, 10.10s/it]
Training 14/30:  12%|█▏        | 17/143 [00:10<00:54,  2.33it/s]
Training 14/30:  23%|██▎       | 33/143 [00:10<00:20,  5.39it/s]
Training 14/30: 100%|██████████| 143/143 [00:10<00:00, 13.17it/s]


Epoch 13: train_loss=0.1133, val_loss=0.1134



Training 15/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/30:   1%|          | 1/143 [00:09<21:21,  9.02s/it]
Training 15/30:   9%|▉         | 13/143 [00:09<01:05,  1.98it/s]
Training 15/30:  21%|██        | 30/143 [00:09<00:20,  5.63it/s]
Training 15/30:  39%|███▉      | 56/143 [00:09<00:06, 13.29it/s]
Training 15/30:  59%|█████▉    | 85/143 [00:09<00:02, 24.69it/s]
Training 15/30: 100%|██████████| 143/143 [00:09<00:00, 14.33it/s]


Epoch 14: train_loss=0.1127, val_loss=0.1130



Training 16/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/30:   1%|          | 1/143 [00:09<21:27,  9.07s/it]
Training 16/30:   8%|▊         | 11/143 [00:09<01:19,  1.66it/s]
Training 16/30:  21%|██        | 30/143 [00:09<00:19,  5.74it/s]
Training 16/30:  38%|███▊      | 55/143 [00:09<00:06, 13.07it/s]
Training 16/30:  58%|█████▊    | 83/143 [00:09<00:02, 24.01it/s]
Training 16/30: 100%|██████████| 143/143 [00:10<00:00, 14.27it/s]


Epoch 15: train_loss=0.1123, val_loss=0.1124



Training 17/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/30:   1%|          | 1/143 [00:09<21:35,  9.12s/it]
Training 17/30:  10%|█         | 15/143 [00:09<00:56,  2.26it/s]
Training 17/30:  22%|██▏       | 31/143 [00:09<00:19,  5.66it/s]
Training 17/30:  35%|███▍      | 50/143 [00:09<00:08, 11.15it/s]
Training 17/30:  55%|█████▍    | 78/143 [00:09<00:02, 22.11it/s]
Training 17/30: 100%|██████████| 143/143 [00:10<00:00, 14.11it/s]


Epoch 16: train_loss=0.1117, val_loss=0.1120



Training 18/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/30:   1%|          | 1/143 [00:08<21:09,  8.94s/it]
Training 18/30:   7%|▋         | 10/143 [00:09<01:27,  1.52it/s]
Training 18/30:  20%|█▉        | 28/143 [00:09<00:21,  5.44it/s]
Training 18/30:  38%|███▊      | 54/143 [00:09<00:06, 13.15it/s]
Training 18/30:  56%|█████▌    | 80/143 [00:09<00:02, 23.37it/s]
Training 18/30: 100%|██████████| 143/143 [00:09<00:00, 14.36it/s]


Epoch 17: train_loss=0.1111, val_loss=0.1116



Training 19/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/30:   1%|          | 1/143 [00:09<21:18,  9.00s/it]
Training 19/30:  10%|▉         | 14/143 [00:09<01:00,  2.14it/s]
Training 19/30:  20%|█▉        | 28/143 [00:09<00:22,  5.14it/s]
Training 19/30:  37%|███▋      | 53/143 [00:09<00:07, 12.52it/s]
Training 19/30:  55%|█████▌    | 79/143 [00:09<00:02, 22.73it/s]
Training 19/30: 100%|██████████| 143/143 [00:09<00:00, 14.36it/s]


Epoch 18: train_loss=0.1110, val_loss=0.1115



Training 20/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/30:   1%|          | 1/143 [00:08<21:13,  8.96s/it]
Training 20/30:   9%|▉         | 13/143 [00:09<01:05,  1.99it/s]
Training 20/30:  25%|██▌       | 36/143 [00:09<00:15,  6.98it/s]
Training 20/30:  41%|████      | 58/143 [00:09<00:06, 13.45it/s]
Training 20/30:  59%|█████▉    | 85/143 [00:09<00:02, 24.09it/s]
Training 20/30: 100%|██████████| 143/143 [00:09<00:00, 14.39it/s]


Epoch 19: train_loss=0.1106, val_loss=0.1109



Training 21/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/30:   1%|          | 1/143 [00:08<20:51,  8.81s/it]
Training 21/30:   8%|▊         | 12/143 [00:08<01:10,  1.87it/s]
Training 21/30:  20%|██        | 29/143 [00:09<00:20,  5.61it/s]
Training 21/30:  36%|███▌      | 51/143 [00:09<00:07, 12.20it/s]
Training 21/30:  58%|█████▊    | 83/143 [00:09<00:02, 25.15it/s]
Training 21/30: 100%|██████████| 143/143 [00:09<00:00, 14.62it/s]


Epoch 20: train_loss=0.1102, val_loss=0.1108



Training 22/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/30:   1%|          | 1/143 [00:08<20:47,  8.79s/it]
Training 22/30:   6%|▋         | 9/143 [00:08<01:36,  1.39it/s]
Training 22/30:  17%|█▋        | 25/143 [00:09<00:23,  4.92it/s]
Training 22/30: 100%|██████████| 143/143 [00:09<00:00, 15.00it/s]


Epoch 21: train_loss=0.1100, val_loss=0.1107



Training 23/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/30:   1%|          | 1/143 [00:08<20:33,  8.68s/it]
Training 23/30:  10%|▉         | 14/143 [00:08<00:58,  2.22it/s]
Training 23/30:  21%|██        | 30/143 [00:08<00:19,  5.78it/s]
Training 23/30: 100%|██████████| 143/143 [00:09<00:00, 15.23it/s]


Epoch 22: train_loss=0.1098, val_loss=0.1101



Training 24/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/30:   1%|          | 1/143 [00:08<20:42,  8.75s/it]
Training 24/30:   8%|▊         | 12/143 [00:08<01:09,  1.88it/s]
Training 24/30:  19%|█▉        | 27/143 [00:08<00:22,  5.20it/s]
Training 24/30:  33%|███▎      | 47/143 [00:09<00:08, 11.23it/s]
Training 24/30:  56%|█████▌    | 80/143 [00:09<00:02, 24.71it/s]
Training 24/30: 100%|██████████| 143/143 [00:09<00:00, 14.74it/s]


Epoch 23: train_loss=0.1094, val_loss=0.1098



Training 25/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/30:   1%|          | 1/143 [00:08<21:14,  8.97s/it]
Training 25/30:   8%|▊         | 11/143 [00:09<01:18,  1.68it/s]
Training 25/30:  21%|██        | 30/143 [00:09<00:19,  5.80it/s]
Training 25/30:  35%|███▍      | 50/143 [00:09<00:07, 11.67it/s]
Training 25/30:  59%|█████▊    | 84/143 [00:09<00:02, 25.25it/s]
Training 25/30: 100%|██████████| 143/143 [00:09<00:00, 14.39it/s]


Epoch 24: train_loss=0.1092, val_loss=0.1099



Training 26/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/30:   1%|          | 1/143 [00:09<21:25,  9.06s/it]
Training 26/30:  10%|▉         | 14/143 [00:09<01:00,  2.13it/s]
Training 26/30:  26%|██▌       | 37/143 [00:09<00:14,  7.07it/s]
Training 26/30:  38%|███▊      | 55/143 [00:09<00:07, 12.26it/s]
Training 26/30:  58%|█████▊    | 83/143 [00:09<00:02, 23.27it/s]
Training 26/30: 100%|██████████| 143/143 [00:09<00:00, 14.31it/s]


Epoch 25: train_loss=0.1090, val_loss=0.1098



Training 27/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/30:   1%|          | 1/143 [00:09<21:21,  9.03s/it]
Training 27/30:   9%|▉         | 13/143 [00:09<01:05,  1.98it/s]
Training 27/30:  22%|██▏       | 31/143 [00:09<00:19,  5.85it/s]
Training 27/30:  38%|███▊      | 55/143 [00:09<00:06, 12.89it/s]
Training 27/30:  59%|█████▊    | 84/143 [00:09<00:02, 24.27it/s]
Training 27/30: 100%|██████████| 143/143 [00:09<00:00, 14.33it/s]


Epoch 26: train_loss=0.1087, val_loss=0.1092



Training 28/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/30:   1%|          | 1/143 [00:08<21:04,  8.90s/it]
Training 28/30:   9%|▉         | 13/143 [00:09<01:04,  2.01it/s]
Training 28/30:  26%|██▌       | 37/143 [00:09<00:14,  7.25it/s]
Training 28/30:  39%|███▉      | 56/143 [00:09<00:06, 12.81it/s]
Training 28/30:  60%|██████    | 86/143 [00:09<00:02, 24.77it/s]
Training 28/30: 100%|██████████| 143/143 [00:09<00:00, 14.47it/s]


Epoch 27: train_loss=0.1084, val_loss=0.1090



Training 29/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/30:   1%|          | 1/143 [00:08<20:53,  8.83s/it]
Training 29/30:  12%|█▏        | 17/143 [00:08<00:47,  2.66it/s]
Training 29/30:  27%|██▋       | 38/143 [00:09<00:14,  7.27it/s]
Training 29/30:  43%|████▎     | 62/143 [00:09<00:05, 14.42it/s]
Training 29/30:  67%|██████▋   | 96/143 [00:09<00:01, 28.09it/s]
Training 29/30: 100%|██████████| 143/143 [00:09<00:00, 14.67it/s]


Epoch 28: train_loss=0.1081, val_loss=0.1088



Training 30/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/30:   1%|          | 1/143 [00:09<21:59,  9.29s/it]
Training 30/30:   8%|▊         | 12/143 [00:09<01:13,  1.77it/s]
Training 30/30:  20%|██        | 29/143 [00:09<00:21,  5.33it/s]
Training 30/30:  34%|███▎      | 48/143 [00:09<00:08, 10.74it/s]
Training 30/30:  57%|█████▋    | 82/143 [00:09<00:02, 23.96it/s]
Training 30/30: 100%|██████████| 143/143 [00:10<00:00, 13.92it/s]


Epoch 29: train_loss=0.1079, val_loss=0.1092


2025-06-01 23:48:21,675 - __main__ - INFO - Saved linear probe for layer 6 to cache\probes\phase1_dinov2_viewpoint_probing\linear_layer_6_probe.pth
2025-06-01 23:48:40,287 - __main__ - INFO - Running mlp probe on layer 6...
2025-06-01 23:48:40,287 - __main__ - INFO - Running mlp probe on layer 6 (feature_dim: 768)

Training 1/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 1/40:   1%|          | 1/143 [00:09<22:50,  9.65s/it]
Training 1/40:   5%|▍         | 7/143 [00:09<02:19,  1.03s/it]
Training 1/40:   9%|▉         | 13/143 [00:09<00:59,  2.18it/s]
Training 1/40:  29%|██▊       | 41/143 [00:09<00:10, 10.03it/s]
Training 1/40: 100%|██████████| 143/143 [00:10<00:00, 13.63it/s]


Epoch 0: train_loss=0.1367, val_loss=0.1143



Training 2/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/40:   1%|          | 1/143 [00:09<22:01,  9.30s/it]
Training 2/40:   4%|▍         | 6/143 [00:09<02:39,  1.16s/it]
Training 2/40:  13%|█▎        | 18/143 [00:09<00:37,  3.37it/s]
Training 2/40:  29%|██▊       | 41/143 [00:09<00:10, 10.00it/s]
Training 2/40: 100%|██████████| 143/143 [00:10<00:00, 14.10it/s]


Epoch 1: train_loss=0.1059, val_loss=0.1024



Training 3/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/40:   1%|          | 1/143 [00:08<21:05,  8.91s/it]
Training 3/40:   6%|▌         | 8/143 [00:09<01:51,  1.21it/s]
Training 3/40:  13%|█▎        | 19/143 [00:09<00:34,  3.60it/s]
Training 3/40:  21%|██        | 30/143 [00:09<00:16,  6.83it/s]
Training 3/40: 100%|██████████| 143/143 [00:09<00:00, 14.65it/s]


Epoch 2: train_loss=0.0974, val_loss=0.0907



Training 4/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/40:   1%|          | 1/143 [00:09<21:42,  9.17s/it]
Training 4/40:   5%|▍         | 7/143 [00:09<02:12,  1.03it/s]
Training 4/40:  12%|█▏        | 17/143 [00:09<00:40,  3.14it/s]
Training 4/40:  25%|██▌       | 36/143 [00:09<00:12,  8.68it/s]
Training 4/40: 100%|██████████| 143/143 [00:10<00:00, 14.29it/s]


Epoch 3: train_loss=0.0873, val_loss=0.0904



Training 5/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/40:   1%|          | 1/143 [00:09<22:01,  9.30s/it]
Training 5/40:   5%|▍         | 7/143 [00:09<02:14,  1.01it/s]
Training 5/40:  10%|█         | 15/143 [00:09<00:47,  2.68it/s]
Training 5/40: 100%|██████████| 143/143 [00:10<00:00, 14.17it/s]


Epoch 4: train_loss=0.0810, val_loss=0.0787



Training 6/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/40:   1%|          | 1/143 [00:08<20:55,  8.84s/it]
Training 6/40:   5%|▍         | 7/143 [00:08<02:07,  1.06it/s]
Training 6/40:  12%|█▏        | 17/143 [00:09<00:38,  3.25it/s]
Training 6/40:  23%|██▎       | 33/143 [00:09<00:13,  8.05it/s]
Training 6/40: 100%|██████████| 143/143 [00:09<00:00, 14.76it/s]


Epoch 5: train_loss=0.0779, val_loss=0.0790



Training 7/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/40:   1%|          | 1/143 [00:09<21:18,  9.01s/it]
Training 7/40:   5%|▍         | 7/143 [00:09<02:10,  1.05it/s]
Training 7/40:  11%|█         | 16/143 [00:09<00:42,  2.99it/s]
Training 7/40:  20%|██        | 29/143 [00:09<00:16,  6.82it/s]
Training 7/40: 100%|██████████| 143/143 [00:09<00:00, 14.50it/s]


Epoch 6: train_loss=0.0740, val_loss=0.0768



Training 8/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/40:   1%|          | 1/143 [00:08<21:14,  8.97s/it]
Training 8/40:   5%|▍         | 7/143 [00:09<02:09,  1.05it/s]
Training 8/40:  10%|█         | 15/143 [00:09<00:46,  2.77it/s]
Training 8/40: 100%|██████████| 143/143 [00:09<00:00, 14.67it/s]


Epoch 7: train_loss=0.0729, val_loss=0.0740



Training 9/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/40:   1%|          | 1/143 [00:08<21:06,  8.92s/it]
Training 9/40:   6%|▌         | 8/143 [00:09<01:51,  1.21it/s]
Training 9/40:  12%|█▏        | 17/143 [00:09<00:39,  3.16it/s]
Training 9/40: 100%|██████████| 143/143 [00:09<00:00, 14.74it/s]


Epoch 8: train_loss=0.0694, val_loss=0.0704



Training 10/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/40:   1%|          | 1/143 [00:08<21:12,  8.96s/it]
Training 10/40:   6%|▋         | 9/143 [00:09<01:38,  1.36it/s]
Training 10/40:  12%|█▏        | 17/143 [00:09<00:40,  3.08it/s]
Training 10/40:  20%|█▉        | 28/143 [00:09<00:18,  6.32it/s]
Training 10/40: 100%|██████████| 143/143 [00:09<00:00, 14.49it/s]


Epoch 9: train_loss=0.0694, val_loss=0.0693



Training 11/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/40:   1%|          | 1/143 [00:08<20:49,  8.80s/it]
Training 11/40:   6%|▌         | 8/143 [00:08<01:49,  1.23it/s]
Training 11/40:  10%|▉         | 14/143 [00:09<00:50,  2.53it/s]
Training 11/40: 100%|██████████| 143/143 [00:09<00:00, 14.92it/s]


Epoch 10: train_loss=0.0676, val_loss=0.0667



Training 12/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/40:   1%|          | 1/143 [00:09<21:17,  9.00s/it]
Training 12/40:   4%|▍         | 6/143 [00:09<02:33,  1.12s/it]
Training 12/40:  10%|█         | 15/143 [00:09<00:45,  2.83it/s]
Training 12/40:  17%|█▋        | 25/143 [00:09<00:20,  5.77it/s]
Training 12/40:  29%|██▉       | 42/143 [00:09<00:08, 12.53it/s]
Training 12/40:  42%|████▏     | 60/143 [00:09<00:03, 21.82it/s]
Training 12/40:  57%|█████▋    | 81/143 [00:09<00:01, 35.64it/s]
Training 12/40: 100%|██████████| 143/143 [00:10<00:00, 14.09it/s]


Epoch 11: train_loss=0.0648, val_loss=0.0702



Training 13/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/40:   1%|          | 1/143 [00:09<21:53,  9.25s/it]
Training 13/40:   5%|▍         | 7/143 [00:09<02:13,  1.02it/s]
Training 13/40:  11%|█         | 16/143 [00:09<00:43,  2.91it/s]
Training 13/40:  33%|███▎      | 47/143 [00:09<00:08, 11.96it/s]
Training 13/40: 100%|██████████| 143/143 [00:10<00:00, 14.22it/s]


Epoch 12: train_loss=0.0644, val_loss=0.0750



Training 14/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/40:   1%|          | 1/143 [00:08<21:07,  8.93s/it]
Training 14/40:   6%|▌         | 8/143 [00:09<01:51,  1.21it/s]
Training 14/40:  10%|█         | 15/143 [00:09<00:47,  2.72it/s]
Training 14/40: 100%|██████████| 143/143 [00:09<00:00, 14.72it/s]


Epoch 13: train_loss=0.0622, val_loss=0.0668



Training 15/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/40:   1%|          | 1/143 [00:08<21:17,  8.99s/it]
Training 15/40:   4%|▍         | 6/143 [00:09<02:33,  1.12s/it]
Training 15/40:  10%|▉         | 14/143 [00:09<00:49,  2.62it/s]
Training 15/40:  34%|███▎      | 48/143 [00:09<00:07, 12.81it/s]
Training 15/40: 100%|██████████| 143/143 [00:09<00:00, 14.58it/s]


Epoch 14: train_loss=0.0629, val_loss=0.0654



Training 16/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/40:   1%|          | 1/143 [00:08<21:12,  8.96s/it]
Training 16/40:   5%|▍         | 7/143 [00:09<02:09,  1.05it/s]
Training 16/40:  13%|█▎        | 18/143 [00:09<00:36,  3.43it/s]
Training 16/40:  21%|██        | 30/143 [00:09<00:16,  6.95it/s]
Training 16/40: 100%|██████████| 143/143 [00:09<00:00, 14.52it/s]


Epoch 15: train_loss=0.0617, val_loss=0.0678



Training 17/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/40:   1%|          | 1/143 [00:09<21:22,  9.03s/it]
Training 17/40:   4%|▍         | 6/143 [00:09<02:34,  1.13s/it]
Training 17/40:  10%|█         | 15/143 [00:09<00:45,  2.82it/s]
Training 17/40:  18%|█▊        | 26/143 [00:09<00:19,  6.04it/s]
Training 17/40: 100%|██████████| 143/143 [00:09<00:00, 14.39it/s]


Epoch 16: train_loss=0.0598, val_loss=0.0654



Training 18/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/40:   1%|          | 1/143 [00:08<21:13,  8.97s/it]
Training 18/40:   6%|▌         | 8/143 [00:09<01:51,  1.21it/s]
Training 18/40:  14%|█▍        | 20/143 [00:09<00:32,  3.80it/s]
Training 18/40:  22%|██▏       | 32/143 [00:09<00:15,  7.30it/s]
Training 18/40: 100%|██████████| 143/143 [00:09<00:00, 14.55it/s]


Epoch 17: train_loss=0.0594, val_loss=0.0654



Training 19/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/40:   1%|          | 1/143 [00:08<20:55,  8.84s/it]
Training 19/40:   6%|▌         | 8/143 [00:08<01:50,  1.22it/s]
Training 19/40:  13%|█▎        | 19/143 [00:09<00:34,  3.62it/s]
Training 19/40:  20%|█▉        | 28/143 [00:09<00:18,  6.26it/s]
Training 19/40: 100%|██████████| 143/143 [00:09<00:00, 14.66it/s]


Epoch 18: train_loss=0.0593, val_loss=0.0647



Training 20/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/40:   1%|          | 1/143 [00:09<23:23,  9.88s/it]
Training 20/40:   6%|▌         | 8/143 [00:09<02:03,  1.10it/s]
Training 20/40:  11%|█         | 16/143 [00:10<00:47,  2.66it/s]
Training 20/40: 100%|██████████| 143/143 [00:10<00:00, 13.41it/s]


Epoch 19: train_loss=0.0588, val_loss=0.0648



Training 21/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/40:   1%|          | 1/143 [00:08<21:03,  8.90s/it]
Training 21/40:   5%|▍         | 7/143 [00:09<02:08,  1.06it/s]
Training 21/40:  13%|█▎        | 18/143 [00:09<00:36,  3.46it/s]
Training 21/40: 100%|██████████| 143/143 [00:09<00:00, 14.84it/s]


Epoch 20: train_loss=0.0574, val_loss=0.0631



Training 22/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/40:   1%|          | 1/143 [00:08<20:44,  8.76s/it]
Training 22/40:   4%|▍         | 6/143 [00:08<02:29,  1.09s/it]
Training 22/40:   9%|▉         | 13/143 [00:08<00:53,  2.45it/s]
Training 22/40: 100%|██████████| 143/143 [00:09<00:00, 14.95it/s]


Epoch 21: train_loss=0.0574, val_loss=0.0644



Training 23/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/40:   1%|          | 1/143 [00:09<23:03,  9.74s/it]
Training 23/40:   3%|▎         | 5/143 [00:09<03:23,  1.47s/it]
Training 23/40:  10%|█         | 15/143 [00:09<00:47,  2.68it/s]
Training 23/40:  18%|█▊        | 26/143 [00:10<00:20,  5.68it/s]
Training 23/40: 100%|██████████| 143/143 [00:10<00:00, 13.50it/s]


Epoch 22: train_loss=0.0559, val_loss=0.0622



Training 24/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/40:   1%|          | 1/143 [00:08<20:58,  8.86s/it]
Training 24/40:   5%|▍         | 7/143 [00:08<02:08,  1.06it/s]
Training 24/40:  13%|█▎        | 18/143 [00:09<00:36,  3.46it/s]
Training 24/40:  22%|██▏       | 31/143 [00:09<00:15,  7.33it/s]
Training 24/40: 100%|██████████| 143/143 [00:09<00:00, 14.69it/s]


Epoch 23: train_loss=0.0555, val_loss=0.0628



Training 25/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/40:   1%|          | 1/143 [00:09<21:32,  9.11s/it]
Training 25/40:   5%|▍         | 7/143 [00:09<02:11,  1.03it/s]
Training 25/40:  10%|█         | 15/143 [00:09<00:46,  2.73it/s]
Training 25/40:  20%|█▉        | 28/143 [00:09<00:17,  6.53it/s]
Training 25/40: 100%|██████████| 143/143 [00:09<00:00, 14.39it/s]


Epoch 24: train_loss=0.0550, val_loss=0.0667



Training 26/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/40:   1%|          | 1/143 [00:09<21:50,  9.23s/it]
Training 26/40:   5%|▍         | 7/143 [00:09<02:13,  1.02it/s]
Training 26/40:  10%|█         | 15/143 [00:09<00:47,  2.70it/s]
Training 26/40: 100%|██████████| 143/143 [00:09<00:00, 14.30it/s]


Epoch 25: train_loss=0.0542, val_loss=0.0638



Training 27/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/40:   1%|          | 1/143 [00:08<21:16,  8.99s/it]
Training 27/40:   5%|▍         | 7/143 [00:09<02:09,  1.05it/s]
Training 27/40:  10%|█         | 15/143 [00:09<00:46,  2.77it/s]
Training 27/40:  40%|███▉      | 57/143 [00:09<00:05, 15.36it/s]
Training 27/40: 100%|██████████| 143/143 [00:09<00:00, 14.58it/s]


Epoch 26: train_loss=0.0553, val_loss=0.0643



Training 28/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/40:   1%|          | 1/143 [00:09<21:47,  9.20s/it]
Training 28/40:   6%|▌         | 8/143 [00:09<01:54,  1.18it/s]
Training 28/40:  12%|█▏        | 17/143 [00:09<00:41,  3.07it/s]
Training 28/40:  18%|█▊        | 26/143 [00:09<00:20,  5.63it/s]
Training 28/40: 100%|██████████| 143/143 [00:10<00:00, 14.23it/s]


Epoch 27: train_loss=0.0542, val_loss=0.0665



Training 29/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/40:   1%|          | 1/143 [00:09<21:52,  9.24s/it]
Training 29/40:   6%|▋         | 9/143 [00:09<01:41,  1.32it/s]
Training 29/40:  13%|█▎        | 18/143 [00:09<00:39,  3.20it/s]
Training 29/40:  26%|██▌       | 37/143 [00:09<00:12,  8.69it/s]
Training 29/40: 100%|██████████| 143/143 [00:10<00:00, 14.20it/s]


Epoch 28: train_loss=0.0524, val_loss=0.0623



Training 30/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/40:   1%|          | 1/143 [00:08<21:08,  8.93s/it]
Training 30/40:   6%|▌         | 8/143 [00:09<01:51,  1.21it/s]
Training 30/40:  13%|█▎        | 18/143 [00:09<00:37,  3.38it/s]
Training 30/40:  20%|██        | 29/143 [00:09<00:17,  6.60it/s]
Training 30/40:  32%|███▏      | 46/143 [00:09<00:07, 13.37it/s]
Training 30/40:  45%|████▌     | 65/143 [00:09<00:03, 23.21it/s]
Training 30/40: 100%|██████████| 143/143 [00:09<00:00, 14.34it/s]


Epoch 29: train_loss=0.0521, val_loss=0.0729



Training 31/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 31/40:   1%|          | 1/143 [00:08<20:47,  8.78s/it]
Training 31/40:   5%|▍         | 7/143 [00:08<02:07,  1.07it/s]
Training 31/40:  11%|█         | 16/143 [00:08<00:41,  3.05it/s]
Training 31/40:  20%|█▉        | 28/143 [00:09<00:17,  6.66it/s]
Training 31/40:  33%|███▎      | 47/143 [00:09<00:06, 14.38it/s]
Training 31/40:  46%|████▌     | 66/143 [00:09<00:03, 24.34it/s]
Training 31/40: 100%|██████████| 143/143 [00:09<00:00, 14.58it/s]


Epoch 30: train_loss=0.0518, val_loss=0.0642



Training 32/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 32/40:   1%|          | 1/143 [00:09<21:42,  9.17s/it]
Training 32/40:   5%|▍         | 7/143 [00:09<02:12,  1.03it/s]
Training 32/40:  10%|█         | 15/143 [00:09<00:47,  2.72it/s]
Training 32/40:  38%|███▊      | 55/143 [00:09<00:06, 14.51it/s]
Training 32/40: 100%|██████████| 143/143 [00:09<00:00, 14.36it/s]


Epoch 31: train_loss=0.0522, val_loss=0.0633



Training 33/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 33/40:   1%|          | 1/143 [00:08<20:54,  8.84s/it]
Training 33/40:   6%|▌         | 8/143 [00:08<01:50,  1.22it/s]
Training 33/40:  14%|█▍        | 20/143 [00:09<00:31,  3.86it/s]
Training 33/40: 100%|██████████| 143/143 [00:09<00:00, 14.95it/s]


Epoch 32: train_loss=0.0516, val_loss=0.0621



Training 34/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 34/40:   1%|          | 1/143 [00:08<20:41,  8.74s/it]
Training 34/40:   5%|▍         | 7/143 [00:08<02:06,  1.08it/s]
Training 34/40:  10%|█         | 15/143 [00:08<00:44,  2.85it/s]
Training 34/40:  22%|██▏       | 32/143 [00:09<00:13,  8.03it/s]
Training 34/40: 100%|██████████| 143/143 [00:09<00:00, 14.96it/s]


Epoch 33: train_loss=0.0515, val_loss=0.0657



Training 35/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 35/40:   1%|          | 1/143 [00:09<23:05,  9.76s/it]
Training 35/40:   6%|▋         | 9/143 [00:09<01:47,  1.25it/s]
Training 35/40:  12%|█▏        | 17/143 [00:10<00:44,  2.83it/s]
Training 35/40: 100%|██████████| 143/143 [00:10<00:00, 13.51it/s]


Epoch 34: train_loss=0.0510, val_loss=0.0629



Training 36/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 36/40:   1%|          | 1/143 [00:09<22:58,  9.71s/it]
Training 36/40:   4%|▍         | 6/143 [00:09<02:46,  1.21s/it]
Training 36/40:  10%|▉         | 14/143 [00:09<00:53,  2.43it/s]
Training 36/40:  35%|███▍      | 50/143 [00:10<00:07, 12.47it/s]
Training 36/40: 100%|██████████| 143/143 [00:10<00:00, 13.61it/s]


Epoch 35: train_loss=0.0502, val_loss=0.0657



Training 37/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 37/40:   1%|          | 1/143 [00:09<21:39,  9.15s/it]
Training 37/40:   5%|▍         | 7/143 [00:09<02:12,  1.03it/s]
Training 37/40:  14%|█▍        | 20/143 [00:09<00:32,  3.80it/s]
Training 37/40: 100%|██████████| 143/143 [00:09<00:00, 14.47it/s]


Epoch 36: train_loss=0.0520, val_loss=0.0635



Training 38/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 38/40:   1%|          | 1/143 [00:08<21:12,  8.96s/it]
Training 38/40:   5%|▍         | 7/143 [00:09<02:09,  1.05it/s]
Training 38/40:  10%|█         | 15/143 [00:09<00:46,  2.78it/s]
Training 38/40: 100%|██████████| 143/143 [00:09<00:00, 14.70it/s]


Epoch 37: train_loss=0.0506, val_loss=0.0721



Training 39/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 39/40:   1%|          | 1/143 [00:10<24:08, 10.20s/it]
Training 39/40:   6%|▌         | 8/143 [00:10<02:07,  1.06it/s]
Training 39/40:  11%|█         | 16/143 [00:10<00:49,  2.58it/s]
Training 39/40:  24%|██▍       | 35/143 [00:10<00:14,  7.61it/s]
Training 39/40: 100%|██████████| 143/143 [00:11<00:00, 12.89it/s]


Epoch 38: train_loss=0.0501, val_loss=0.0614



Training 40/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 40/40:   1%|          | 1/143 [00:09<21:22,  9.03s/it]
Training 40/40:   6%|▌         | 8/143 [00:09<01:52,  1.20it/s]
Training 40/40:  12%|█▏        | 17/143 [00:09<00:40,  3.12it/s]
Training 40/40:  39%|███▉      | 56/143 [00:09<00:05, 14.75it/s]
Training 40/40: 100%|██████████| 143/143 [00:09<00:00, 14.55it/s]


Epoch 39: train_loss=0.0484, val_loss=0.0687


2025-06-02 00:01:39,546 - __main__ - INFO - Saved mlp probe for layer 6 to cache\probes\phase1_dinov2_viewpoint_probing\mlp_layer_6_probe.pth
 50%|█████     | 3/6 [1:59:32<1:57:11, 2343.91s/it]2025-06-02 00:01:59,726 - __main__ - INFO - Processing layer 8...
2025-06-02 00:01:59,727 - src.probing.data_preprocessing - INFO - Extracting features from 1149 batches...

Extracting features:   0%|          | 0/1149 [00:00<?, ?it/s]
Extracting features:   0%|          | 1/1149 [00:07<2:25:35,  7.61s/it]
Extracting features:   0%|          | 2/1149 [00:07<1:03:51,  3.34s/it]
Extracting features:   0%|          | 3/1149 [00:08<37:45,  1.98s/it]  
Extracting features:   0%|          | 4/1149 [00:08<25:35,  1.34s/it]
Extracting features:   0%|          | 5/1149 [00:09<18:47,  1.01it/s]
Extracting features:   1%|          | 6/1149 [00:09<14:37,  1.30it/s]
Extracting features:   1%|          | 7/1149 [00:09<12:03,  1.58it/s]
Extracting features:   1%|          | 8/1149 [00:10<10:17,  1.85it/s]
Extra

Epoch 0: train_loss=0.1428, val_loss=0.1234



Training 2/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/30:   1%|          | 1/143 [00:08<20:32,  8.68s/it]
Training 2/30:   8%|▊         | 12/143 [00:08<01:09,  1.89it/s]
Training 2/30:  24%|██▍       | 35/143 [00:08<00:15,  7.05it/s]
Training 2/30:  39%|███▉      | 56/143 [00:08<00:06, 13.39it/s]
Training 2/30:  62%|██████▏   | 88/143 [00:09<00:02, 26.49it/s]
Training 2/30: 100%|██████████| 143/143 [00:09<00:00, 14.86it/s]


Epoch 1: train_loss=0.1191, val_loss=0.1136



Training 3/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/30:   1%|          | 1/143 [00:09<23:26,  9.91s/it]
Training 3/30:   9%|▉         | 13/143 [00:10<01:12,  1.80it/s]
Training 3/30:  22%|██▏       | 32/143 [00:10<00:20,  5.54it/s]
Training 3/30:  42%|████▏     | 60/143 [00:10<00:06, 13.12it/s]
Training 3/30:  63%|██████▎   | 90/143 [00:10<00:02, 23.98it/s]
Training 3/30: 100%|██████████| 143/143 [00:10<00:00, 13.16it/s]


Epoch 2: train_loss=0.1120, val_loss=0.1090



Training 4/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/30:   1%|          | 1/143 [00:09<21:21,  9.03s/it]
Training 4/30:   8%|▊         | 11/143 [00:09<01:19,  1.67it/s]
Training 4/30:  21%|██        | 30/143 [00:09<00:19,  5.76it/s]
Training 4/30:  36%|███▌      | 51/143 [00:09<00:07, 11.91it/s]
Training 4/30:  57%|█████▋    | 82/143 [00:09<00:02, 24.20it/s]
Training 4/30: 100%|██████████| 143/143 [00:09<00:00, 14.35it/s]


Epoch 3: train_loss=0.1075, val_loss=0.1055



Training 5/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/30:   1%|          | 1/143 [00:08<21:13,  8.97s/it]
Training 5/30:  10%|█         | 15/143 [00:09<00:55,  2.30it/s]
Training 5/30:  22%|██▏       | 32/143 [00:09<00:18,  5.97it/s]
Training 5/30:  43%|████▎     | 62/143 [00:09<00:05, 14.87it/s]
Training 5/30:  62%|██████▏   | 89/143 [00:09<00:02, 25.38it/s]
Training 5/30: 100%|██████████| 143/143 [00:09<00:00, 14.42it/s]


Epoch 4: train_loss=0.1047, val_loss=0.1032



Training 6/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/30:   1%|          | 1/143 [00:09<22:45,  9.62s/it]
Training 6/30:   8%|▊         | 12/143 [00:09<01:16,  1.71it/s]
Training 6/30:  22%|██▏       | 31/143 [00:09<00:20,  5.56it/s]
Training 6/30:  38%|███▊      | 54/143 [00:09<00:07, 11.93it/s]
Training 6/30:  59%|█████▉    | 85/143 [00:10<00:02, 23.52it/s]
Training 6/30: 100%|██████████| 143/143 [00:10<00:00, 13.56it/s]


Epoch 5: train_loss=0.1025, val_loss=0.1016



Training 7/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/30:   1%|          | 1/143 [00:09<21:27,  9.07s/it]
Training 7/30:   8%|▊         | 11/143 [00:09<01:19,  1.66it/s]
Training 7/30:  20%|█▉        | 28/143 [00:09<00:21,  5.31it/s]
Training 7/30:  33%|███▎      | 47/143 [00:09<00:08, 10.84it/s]
Training 7/30:  53%|█████▎    | 76/143 [00:09<00:03, 22.29it/s]
Training 7/30: 100%|██████████| 143/143 [00:10<00:00, 14.19it/s]


Epoch 6: train_loss=0.1009, val_loss=0.1004



Training 8/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/30:   1%|          | 1/143 [00:09<21:46,  9.20s/it]
Training 8/30:   9%|▉         | 13/143 [00:09<01:06,  1.94it/s]
Training 8/30:  25%|██▌       | 36/143 [00:09<00:15,  6.81it/s]
Training 8/30:  40%|███▉      | 57/143 [00:09<00:06, 12.82it/s]
Training 8/30:  63%|██████▎   | 90/143 [00:09<00:02, 25.66it/s]
Training 8/30: 100%|██████████| 143/143 [00:10<00:00, 14.07it/s]


Epoch 7: train_loss=0.0995, val_loss=0.0986



Training 9/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/30:   1%|          | 1/143 [00:09<21:36,  9.13s/it]
Training 9/30:  10%|▉         | 14/143 [00:09<01:01,  2.11it/s]
Training 9/30:  20%|██        | 29/143 [00:09<00:21,  5.28it/s]
Training 9/30: 100%|██████████| 143/143 [00:09<00:00, 14.50it/s]


Epoch 8: train_loss=0.0983, val_loss=0.0976



Training 10/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/30:   1%|          | 1/143 [00:09<21:20,  9.02s/it]
Training 10/30:   7%|▋         | 10/143 [00:09<01:27,  1.51it/s]
Training 10/30:  22%|██▏       | 32/143 [00:09<00:17,  6.27it/s]
Training 10/30: 100%|██████████| 143/143 [00:09<00:00, 14.68it/s]


Epoch 9: train_loss=0.0975, val_loss=0.0972



Training 11/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/30:   1%|          | 1/143 [00:09<21:36,  9.13s/it]
Training 11/30:   8%|▊         | 12/143 [00:09<01:12,  1.80it/s]
Training 11/30:  20%|██        | 29/143 [00:09<00:21,  5.41it/s]
Training 11/30:  37%|███▋      | 53/143 [00:09<00:07, 12.39it/s]
Training 11/30:  52%|█████▏    | 75/143 [00:09<00:03, 20.86it/s]
Training 11/30:  74%|███████▍  | 106/143 [00:09<00:01, 36.68it/s]
Training 11/30: 100%|██████████| 143/143 [00:10<00:00, 14.04it/s]


Epoch 10: train_loss=0.0964, val_loss=0.0975



Training 12/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/30:   1%|          | 1/143 [00:08<21:17,  8.99s/it]
Training 12/30:  12%|█▏        | 17/143 [00:09<00:48,  2.61it/s]
Training 12/30:  22%|██▏       | 31/143 [00:09<00:19,  5.61it/s]
Training 12/30: 100%|██████████| 143/143 [00:09<00:00, 14.69it/s]


Epoch 11: train_loss=0.0958, val_loss=0.0954



Training 13/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/30:   1%|          | 1/143 [00:08<20:54,  8.83s/it]
Training 13/30:  10%|▉         | 14/143 [00:08<00:59,  2.18it/s]
Training 13/30:  24%|██▍       | 35/143 [00:09<00:15,  6.80it/s]
Training 13/30: 100%|██████████| 143/143 [00:09<00:00, 15.05it/s]


Epoch 12: train_loss=0.0953, val_loss=0.0948



Training 14/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/30:   1%|          | 1/143 [00:08<21:08,  8.94s/it]
Training 14/30:   9%|▉         | 13/143 [00:09<01:05,  2.00it/s]
Training 14/30:  21%|██        | 30/143 [00:09<00:19,  5.69it/s]
Training 14/30:  41%|████▏     | 59/143 [00:09<00:05, 14.33it/s]
Training 14/30:  59%|█████▉    | 85/143 [00:09<00:02, 24.53it/s]
Training 14/30: 100%|██████████| 143/143 [00:09<00:00, 14.48it/s]


Epoch 13: train_loss=0.0947, val_loss=0.0944



Training 15/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/30:   1%|          | 1/143 [00:09<21:18,  9.01s/it]
Training 15/30:  13%|█▎        | 19/143 [00:09<00:42,  2.91it/s]
Training 15/30:  25%|██▌       | 36/143 [00:09<00:16,  6.55it/s]
Training 15/30:  45%|████▌     | 65/143 [00:09<00:05, 15.08it/s]
Training 15/30:  67%|██████▋   | 96/143 [00:09<00:01, 27.25it/s]
Training 15/30: 100%|██████████| 143/143 [00:09<00:00, 14.38it/s]


Epoch 14: train_loss=0.0941, val_loss=0.0938



Training 16/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/30:   1%|          | 1/143 [00:09<21:22,  9.03s/it]
Training 16/30:  12%|█▏        | 17/143 [00:09<00:48,  2.60it/s]
Training 16/30:  24%|██▍       | 34/143 [00:09<00:17,  6.24it/s]
Training 16/30: 100%|██████████| 143/143 [00:09<00:00, 14.70it/s]


Epoch 15: train_loss=0.0935, val_loss=0.0934



Training 17/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/30:   1%|          | 1/143 [00:08<21:12,  8.96s/it]
Training 17/30:   9%|▉         | 13/143 [00:09<01:05,  1.99it/s]
Training 17/30:  23%|██▎       | 33/143 [00:09<00:17,  6.33it/s]
Training 17/30: 100%|██████████| 143/143 [00:09<00:00, 14.83it/s]


Epoch 16: train_loss=0.0933, val_loss=0.0934



Training 18/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/30:   1%|          | 1/143 [00:08<21:06,  8.92s/it]
Training 18/30:  10%|█         | 15/143 [00:09<00:55,  2.31it/s]
Training 18/30:  22%|██▏       | 31/143 [00:09<00:19,  5.78it/s]
Training 18/30: 100%|██████████| 143/143 [00:09<00:00, 14.83it/s]


Epoch 17: train_loss=0.0928, val_loss=0.0931



Training 19/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/30:   1%|          | 1/143 [00:09<21:31,  9.09s/it]
Training 19/30:  10%|▉         | 14/143 [00:09<01:00,  2.12it/s]
Training 19/30:  27%|██▋       | 38/143 [00:09<00:14,  7.25it/s]
Training 19/30:  41%|████      | 58/143 [00:09<00:06, 13.00it/s]
Training 19/30:  73%|███████▎  | 104/143 [00:09<00:01, 31.29it/s]
Training 19/30: 100%|██████████| 143/143 [00:10<00:00, 14.30it/s]


Epoch 18: train_loss=0.0924, val_loss=0.0932



Training 20/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/30:   1%|          | 1/143 [00:08<21:05,  8.92s/it]
Training 20/30:   9%|▉         | 13/143 [00:09<01:04,  2.00it/s]
Training 20/30:  22%|██▏       | 32/143 [00:09<00:18,  6.14it/s]
Training 20/30:  36%|███▋      | 52/143 [00:09<00:07, 12.04it/s]
Training 20/30:  59%|█████▊    | 84/143 [00:09<00:02, 24.84it/s]
Training 20/30: 100%|██████████| 143/143 [00:09<00:00, 14.44it/s]


Epoch 19: train_loss=0.0921, val_loss=0.0928



Training 21/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/30:   1%|          | 1/143 [00:09<22:19,  9.44s/it]
Training 21/30:  12%|█▏        | 17/143 [00:09<00:50,  2.49it/s]
Training 21/30:  27%|██▋       | 38/143 [00:09<00:15,  6.81it/s]
Training 21/30:  44%|████▍     | 63/143 [00:09<00:05, 13.83it/s]
Training 21/30:  69%|██████▊   | 98/143 [00:09<00:01, 27.14it/s]
Training 21/30: 100%|██████████| 143/143 [00:10<00:00, 13.80it/s]


Epoch 20: train_loss=0.0918, val_loss=0.0922



Training 22/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/30:   1%|          | 1/143 [00:09<22:51,  9.66s/it]
Training 22/30:   9%|▉         | 13/143 [00:09<01:10,  1.85it/s]
Training 22/30:  23%|██▎       | 33/143 [00:09<00:18,  5.89it/s]
Training 22/30:  33%|███▎      | 47/143 [00:09<00:09,  9.68it/s]
Training 22/30:  55%|█████▍    | 78/143 [00:10<00:03, 21.33it/s]
Training 22/30: 100%|██████████| 143/143 [00:10<00:00, 13.44it/s]


Epoch 21: train_loss=0.0917, val_loss=0.0943



Training 23/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/30:   1%|          | 1/143 [00:08<20:49,  8.80s/it]
Training 23/30:  10%|█         | 15/143 [00:08<00:54,  2.35it/s]
Training 23/30:  22%|██▏       | 31/143 [00:09<00:19,  5.86it/s]
Training 23/30:  34%|███▍      | 49/143 [00:09<00:08, 11.23it/s]
Training 23/30:  56%|█████▌    | 80/143 [00:09<00:02, 23.83it/s]
Training 23/30: 100%|██████████| 143/143 [00:09<00:00, 14.62it/s]


Epoch 22: train_loss=0.0912, val_loss=0.0914



Training 24/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/30:   1%|          | 1/143 [00:09<21:39,  9.15s/it]
Training 24/30:   9%|▉         | 13/143 [00:09<01:06,  1.95it/s]
Training 24/30:  24%|██▍       | 34/143 [00:09<00:16,  6.42it/s]
Training 24/30:  41%|████      | 58/143 [00:09<00:06, 13.37it/s]
Training 24/30:  61%|██████    | 87/143 [00:09<00:02, 24.64it/s]
Training 24/30: 100%|██████████| 143/143 [00:10<00:00, 14.17it/s]


Epoch 23: train_loss=0.0911, val_loss=0.0910



Training 25/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/30:   1%|          | 1/143 [00:09<22:25,  9.47s/it]
Training 25/30:   8%|▊         | 12/143 [00:09<01:15,  1.74it/s]
Training 25/30:  20%|█▉        | 28/143 [00:09<00:22,  5.03it/s]
Training 25/30:  38%|███▊      | 54/143 [00:09<00:07, 12.37it/s]
Training 25/30:  59%|█████▉    | 85/143 [00:09<00:02, 24.12it/s]
Training 25/30: 100%|██████████| 143/143 [00:10<00:00, 13.74it/s]


Epoch 24: train_loss=0.0907, val_loss=0.0914



Training 26/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/30:   1%|          | 1/143 [00:08<20:54,  8.83s/it]
Training 26/30:  10%|█         | 15/143 [00:08<00:54,  2.34it/s]
Training 26/30:  21%|██        | 30/143 [00:09<00:20,  5.61it/s]
Training 26/30: 100%|██████████| 143/143 [00:09<00:00, 14.87it/s]


Epoch 25: train_loss=0.0908, val_loss=0.0909



Training 27/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/30:   1%|          | 1/143 [00:09<21:19,  9.01s/it]
Training 27/30:  13%|█▎        | 18/143 [00:09<00:45,  2.76it/s]
Training 27/30:  25%|██▌       | 36/143 [00:09<00:16,  6.62it/s]
Training 27/30:  41%|████▏     | 59/143 [00:09<00:06, 13.35it/s]
Training 27/30:  68%|██████▊   | 97/143 [00:09<00:01, 28.44it/s]
Training 27/30: 100%|██████████| 143/143 [00:09<00:00, 14.40it/s]


Epoch 26: train_loss=0.0904, val_loss=0.0915



Training 28/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/30:   1%|          | 1/143 [00:08<21:08,  8.94s/it]
Training 28/30:  12%|█▏        | 17/143 [00:09<00:47,  2.63it/s]
Training 28/30:  25%|██▌       | 36/143 [00:09<00:15,  6.74it/s]
Training 28/30: 100%|██████████| 143/143 [00:09<00:00, 14.88it/s]


Epoch 27: train_loss=0.0906, val_loss=0.0901



Training 29/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/30:   1%|          | 1/143 [00:08<21:13,  8.97s/it]
Training 29/30:   8%|▊         | 12/143 [00:09<01:11,  1.83it/s]
Training 29/30:  22%|██▏       | 31/143 [00:09<00:18,  5.95it/s]
Training 29/30:  36%|███▋      | 52/143 [00:09<00:07, 12.12it/s]
Training 29/30:  53%|█████▎    | 76/143 [00:09<00:03, 21.54it/s]
Training 29/30: 100%|██████████| 143/143 [00:09<00:00, 14.38it/s]


Epoch 28: train_loss=0.0902, val_loss=0.0902



Training 30/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/30:   1%|          | 1/143 [00:08<21:08,  8.93s/it]
Training 30/30:  10%|▉         | 14/143 [00:09<00:59,  2.16it/s]
Training 30/30:  23%|██▎       | 33/143 [00:09<00:17,  6.29it/s]
Training 30/30: 100%|██████████| 143/143 [00:09<00:00, 14.86it/s]


Epoch 29: train_loss=0.0898, val_loss=0.0898


2025-06-02 00:21:54,629 - __main__ - INFO - Saved linear probe for layer 8 to cache\probes\phase1_dinov2_viewpoint_probing\linear_layer_8_probe.pth
2025-06-02 00:22:13,638 - __main__ - INFO - Running mlp probe on layer 8...
2025-06-02 00:22:13,639 - __main__ - INFO - Running mlp probe on layer 8 (feature_dim: 768)

Training 1/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 1/40:   1%|          | 1/143 [00:08<21:00,  8.88s/it]
Training 1/40:   6%|▌         | 8/143 [00:08<01:51,  1.22it/s]
Training 1/40:  10%|█         | 15/143 [00:09<00:46,  2.73it/s]
Training 1/40: 100%|██████████| 143/143 [00:09<00:00, 14.79it/s]


Epoch 0: train_loss=0.1382, val_loss=0.1040



Training 2/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/40:   1%|          | 1/143 [00:08<20:55,  8.84s/it]
Training 2/40:   5%|▍         | 7/143 [00:08<02:07,  1.06it/s]
Training 2/40:  10%|█         | 15/143 [00:09<00:45,  2.82it/s]
Training 2/40: 100%|██████████| 143/143 [00:09<00:00, 14.89it/s]


Epoch 1: train_loss=0.0945, val_loss=0.0872



Training 3/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/40:   1%|          | 1/143 [00:09<22:11,  9.38s/it]
Training 3/40:   7%|▋         | 10/143 [00:09<01:31,  1.45it/s]
Training 3/40:  13%|█▎        | 18/143 [00:09<00:40,  3.10it/s]
Training 3/40: 100%|██████████| 143/143 [00:10<00:00, 14.10it/s]


Epoch 2: train_loss=0.0802, val_loss=0.0835



Training 4/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/40:   1%|          | 1/143 [00:08<21:11,  8.96s/it]
Training 4/40:   5%|▍         | 7/143 [00:09<02:09,  1.05it/s]
Training 4/40:  11%|█         | 16/143 [00:09<00:42,  3.00it/s]
Training 4/40:  20%|██        | 29/143 [00:09<00:16,  6.85it/s]
Training 4/40: 100%|██████████| 143/143 [00:09<00:00, 14.52it/s]


Epoch 3: train_loss=0.0716, val_loss=0.0707



Training 5/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/40:   1%|          | 1/143 [00:08<21:16,  8.99s/it]
Training 5/40:   5%|▍         | 7/143 [00:09<02:09,  1.05it/s]
Training 5/40:  11%|█         | 16/143 [00:09<00:42,  2.99it/s]
Training 5/40: 100%|██████████| 143/143 [00:09<00:00, 14.69it/s]


Epoch 4: train_loss=0.0676, val_loss=0.0666



Training 6/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/40:   1%|          | 1/143 [00:08<21:01,  8.89s/it]
Training 6/40:   6%|▌         | 8/143 [00:09<01:51,  1.22it/s]
Training 6/40:  11%|█         | 16/143 [00:09<00:43,  2.95it/s]
Training 6/40: 100%|██████████| 143/143 [00:09<00:00, 14.80it/s]


Epoch 5: train_loss=0.0652, val_loss=0.0648



Training 7/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/40:   1%|          | 1/143 [00:08<20:52,  8.82s/it]
Training 7/40:   6%|▌         | 8/143 [00:08<01:49,  1.23it/s]
Training 7/40:  13%|█▎        | 18/143 [00:09<00:36,  3.42it/s]
Training 7/40:  24%|██▍       | 35/143 [00:09<00:12,  8.54it/s]
Training 7/40: 100%|██████████| 143/143 [00:09<00:00, 14.81it/s]


Epoch 6: train_loss=0.0625, val_loss=0.0624



Training 8/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/40:   1%|          | 1/143 [00:09<21:27,  9.07s/it]
Training 8/40:   6%|▌         | 8/143 [00:09<01:53,  1.19it/s]
Training 8/40:  12%|█▏        | 17/143 [00:09<00:40,  3.12it/s]
Training 8/40:  20%|██        | 29/143 [00:09<00:17,  6.62it/s]
Training 8/40:  32%|███▏      | 46/143 [00:09<00:07, 13.31it/s]
Training 8/40:  45%|████▌     | 65/143 [00:09<00:03, 23.06it/s]
Training 8/40: 100%|██████████| 143/143 [00:10<00:00, 14.10it/s]


Epoch 7: train_loss=0.0592, val_loss=0.0670



Training 9/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/40:   1%|          | 1/143 [00:09<22:46,  9.62s/it]
Training 9/40:   4%|▍         | 6/143 [00:09<02:44,  1.20s/it]
Training 9/40:  10%|█         | 15/143 [00:09<00:48,  2.65it/s]
Training 9/40: 100%|██████████| 143/143 [00:10<00:00, 13.79it/s]


Epoch 8: train_loss=0.0578, val_loss=0.0598



Training 10/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/40:   1%|          | 1/143 [00:08<20:56,  8.85s/it]
Training 10/40:   6%|▌         | 8/143 [00:08<01:50,  1.22it/s]
Training 10/40:  11%|█         | 16/143 [00:09<00:42,  2.97it/s]
Training 10/40:  31%|███▏      | 45/143 [00:09<00:08, 11.77it/s]
Training 10/40: 100%|██████████| 143/143 [00:09<00:00, 14.82it/s]


Epoch 9: train_loss=0.0569, val_loss=0.0580



Training 11/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/40:   1%|          | 1/143 [00:08<20:52,  8.82s/it]
Training 11/40:   8%|▊         | 12/143 [00:08<01:10,  1.86it/s]
Training 11/40:  15%|█▌        | 22/143 [00:09<00:29,  4.04it/s]
Training 11/40: 100%|██████████| 143/143 [00:09<00:00, 14.98it/s]


Epoch 10: train_loss=0.0531, val_loss=0.0573



Training 12/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/40:   1%|          | 1/143 [00:10<23:48, 10.06s/it]
Training 12/40:   9%|▉         | 13/143 [00:10<01:13,  1.78it/s]
Training 12/40:  17%|█▋        | 25/143 [00:10<00:28,  4.09it/s]
Training 12/40: 100%|██████████| 143/143 [00:10<00:00, 13.16it/s]


Epoch 11: train_loss=0.0524, val_loss=0.0551



Training 13/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/40:   1%|          | 1/143 [00:09<21:58,  9.28s/it]
Training 13/40:   8%|▊         | 11/143 [00:09<01:21,  1.62it/s]
Training 13/40:  16%|█▌        | 23/143 [00:09<00:29,  4.12it/s]
Training 13/40:  27%|██▋       | 38/143 [00:09<00:12,  8.40it/s]
Training 13/40:  38%|███▊      | 55/143 [00:09<00:05, 14.88it/s]
Training 13/40:  53%|█████▎    | 76/143 [00:09<00:02, 25.38it/s]
Training 13/40: 100%|██████████| 143/143 [00:10<00:00, 13.84it/s]


Epoch 12: train_loss=0.0517, val_loss=0.0581



Training 14/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/40:   1%|          | 1/143 [00:08<20:48,  8.79s/it]
Training 14/40:   8%|▊         | 12/143 [00:08<01:10,  1.87it/s]
Training 14/40:  17%|█▋        | 25/143 [00:09<00:25,  4.72it/s]
Training 14/40:  28%|██▊       | 40/143 [00:09<00:11,  9.19it/s]
Training 14/40:  41%|████      | 58/143 [00:09<00:05, 16.38it/s]
Training 14/40:  58%|█████▊    | 83/143 [00:09<00:02, 29.57it/s]
Training 14/40: 100%|██████████| 143/143 [00:09<00:00, 14.58it/s]


Epoch 13: train_loss=0.0506, val_loss=0.0554



Training 15/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/40:   1%|          | 1/143 [00:09<21:35,  9.13s/it]
Training 15/40:   8%|▊         | 12/143 [00:09<01:12,  1.80it/s]
Training 15/40:  15%|█▌        | 22/143 [00:09<00:30,  3.91it/s]
Training 15/40: 100%|██████████| 143/143 [00:09<00:00, 14.44it/s]


Epoch 14: train_loss=0.0500, val_loss=0.0590



Training 16/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/40:   1%|          | 1/143 [00:08<21:11,  8.96s/it]
Training 16/40:   8%|▊         | 12/143 [00:09<01:11,  1.84it/s]
Training 16/40:  17%|█▋        | 24/143 [00:09<00:26,  4.42it/s]
Training 16/40:  26%|██▌       | 37/143 [00:09<00:12,  8.24it/s]
Training 16/40:  40%|███▉      | 57/143 [00:09<00:05, 16.18it/s]
Training 16/40:  55%|█████▍    | 78/143 [00:09<00:02, 26.99it/s]
Training 16/40:  71%|███████▏  | 102/143 [00:09<00:00, 42.62it/s]
Training 16/40: 100%|██████████| 143/143 [00:10<00:00, 14.12it/s]


Epoch 15: train_loss=0.0480, val_loss=0.0556



Training 17/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/40:   1%|          | 1/143 [00:09<21:48,  9.21s/it]
Training 17/40:   9%|▉         | 13/143 [00:09<01:07,  1.94it/s]
Training 17/40:  17%|█▋        | 25/143 [00:09<00:26,  4.45it/s]
Training 17/40: 100%|██████████| 143/143 [00:10<00:00, 14.26it/s]


Epoch 16: train_loss=0.0463, val_loss=0.0537



Training 18/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/40:   1%|          | 1/143 [00:08<20:50,  8.81s/it]
Training 18/40:   8%|▊         | 12/143 [00:08<01:10,  1.86it/s]
Training 18/40:  16%|█▌        | 23/143 [00:09<00:28,  4.27it/s]
Training 18/40:  42%|████▏     | 60/143 [00:09<00:05, 15.54it/s]
Training 18/40: 100%|██████████| 143/143 [00:09<00:00, 14.89it/s]


Epoch 17: train_loss=0.0462, val_loss=0.0567



Training 19/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/40:   1%|          | 1/143 [00:08<21:04,  8.91s/it]
Training 19/40:   8%|▊         | 12/143 [00:09<01:10,  1.85it/s]
Training 19/40:  17%|█▋        | 24/143 [00:09<00:26,  4.44it/s]
Training 19/40: 100%|██████████| 143/143 [00:09<00:00, 14.77it/s]


Epoch 18: train_loss=0.0468, val_loss=0.0587



Training 20/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/40:   1%|          | 1/143 [00:09<21:24,  9.04s/it]
Training 20/40:   7%|▋         | 10/143 [00:09<01:28,  1.51it/s]
Training 20/40:  15%|█▍        | 21/143 [00:09<00:31,  3.86it/s]
Training 20/40:  25%|██▌       | 36/143 [00:09<00:13,  8.23it/s]
Training 20/40:  36%|███▋      | 52/143 [00:09<00:06, 14.46it/s]
Training 20/40:  49%|████▉     | 70/143 [00:09<00:03, 23.61it/s]
Training 20/40:  67%|██████▋   | 96/143 [00:09<00:01, 40.88it/s]
Training 20/40: 100%|██████████| 143/143 [00:10<00:00, 14.01it/s]


Epoch 19: train_loss=0.0462, val_loss=0.0531



Training 21/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/40:   1%|          | 1/143 [00:09<21:25,  9.05s/it]
Training 21/40:   8%|▊         | 12/143 [00:09<01:12,  1.81it/s]
Training 21/40:  16%|█▌        | 23/143 [00:09<00:28,  4.16it/s]
Training 21/40:  31%|███       | 44/143 [00:09<00:09, 10.33it/s]
Training 21/40: 100%|██████████| 143/143 [00:09<00:00, 14.51it/s]


Epoch 20: train_loss=0.0436, val_loss=0.0543



Training 22/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/40:   1%|          | 1/143 [00:08<21:08,  8.94s/it]
Training 22/40:   8%|▊         | 11/143 [00:09<01:18,  1.68it/s]
Training 22/40:  14%|█▍        | 20/143 [00:09<00:34,  3.62it/s]
Training 22/40: 100%|██████████| 143/143 [00:09<00:00, 14.70it/s]


Epoch 21: train_loss=0.0465, val_loss=0.0529



Training 23/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/40:   1%|          | 1/143 [00:08<20:43,  8.75s/it]
Training 23/40:   8%|▊         | 12/143 [00:08<01:09,  1.88it/s]
Training 23/40:  16%|█▌        | 23/143 [00:08<00:27,  4.30it/s]
Training 23/40:  25%|██▌       | 36/143 [00:09<00:13,  8.17it/s]
Training 23/40:  37%|███▋      | 53/143 [00:09<00:06, 14.99it/s]
Training 23/40:  50%|█████     | 72/143 [00:09<00:02, 24.92it/s]
Training 23/40: 100%|██████████| 143/143 [00:09<00:00, 14.58it/s]


Epoch 22: train_loss=0.0432, val_loss=0.0521



Training 24/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/40:   1%|          | 1/143 [00:09<21:44,  9.19s/it]
Training 24/40:   7%|▋         | 10/143 [00:09<01:29,  1.48it/s]
Training 24/40:  16%|█▌        | 23/143 [00:09<00:28,  4.22it/s]
Training 24/40:  25%|██▌       | 36/143 [00:09<00:13,  7.93it/s]
Training 24/40:  41%|████      | 58/143 [00:09<00:05, 16.48it/s]
Training 24/40:  55%|█████▍    | 78/143 [00:09<00:02, 26.37it/s]
Training 24/40:  73%|███████▎  | 104/143 [00:09<00:00, 43.17it/s]
Training 24/40: 100%|██████████| 143/143 [00:10<00:00, 13.85it/s]


Epoch 23: train_loss=0.0426, val_loss=0.0561



Training 25/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/40:   1%|          | 1/143 [00:08<20:49,  8.80s/it]
Training 25/40:   8%|▊         | 12/143 [00:08<01:10,  1.87it/s]
Training 25/40:  17%|█▋        | 24/143 [00:09<00:26,  4.49it/s]
Training 25/40:  34%|███▍      | 49/143 [00:09<00:07, 12.05it/s]
Training 25/40: 100%|██████████| 143/143 [00:09<00:00, 14.89it/s]


Epoch 24: train_loss=0.0433, val_loss=0.0538



Training 26/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/40:   1%|          | 1/143 [00:09<21:26,  9.06s/it]
Training 26/40:   8%|▊         | 12/143 [00:09<01:12,  1.81it/s]
Training 26/40:  16%|█▌        | 23/143 [00:09<00:28,  4.15it/s]
Training 26/40:  39%|███▉      | 56/143 [00:09<00:06, 13.92it/s]
Training 26/40: 100%|██████████| 143/143 [00:09<00:00, 14.53it/s]


Epoch 25: train_loss=0.0409, val_loss=0.0511



Training 27/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/40:   1%|          | 1/143 [00:09<22:59,  9.72s/it]
Training 27/40:   8%|▊         | 11/143 [00:09<01:25,  1.55it/s]
Training 27/40:  15%|█▌        | 22/143 [00:09<00:32,  3.74it/s]
Training 27/40:  24%|██▍       | 35/143 [00:10<00:14,  7.28it/s]
Training 27/40:  37%|███▋      | 53/143 [00:10<00:06, 13.90it/s]
Training 27/40:  50%|█████     | 72/143 [00:10<00:03, 23.07it/s]
Training 27/40: 100%|██████████| 143/143 [00:10<00:00, 13.30it/s]


Epoch 26: train_loss=0.0411, val_loss=0.0538



Training 28/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/40:   1%|          | 1/143 [00:09<23:08,  9.78s/it]
Training 28/40:   7%|▋         | 10/143 [00:09<01:35,  1.40it/s]
Training 28/40:  13%|█▎        | 19/143 [00:09<00:39,  3.18it/s]
Training 28/40: 100%|██████████| 143/143 [00:10<00:00, 13.54it/s]


Epoch 27: train_loss=0.0413, val_loss=0.0534



Training 29/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/40:   1%|          | 1/143 [00:09<21:52,  9.24s/it]
Training 29/40:   8%|▊         | 11/143 [00:09<01:21,  1.62it/s]
Training 29/40:  18%|█▊        | 26/143 [00:09<00:24,  4.76it/s]
Training 29/40:  29%|██▊       | 41/143 [00:09<00:11,  9.01it/s]
Training 29/40:  42%|████▏     | 60/143 [00:09<00:05, 16.28it/s]
Training 29/40:  57%|█████▋    | 81/143 [00:09<00:02, 26.78it/s]
Training 29/40:  75%|███████▍  | 107/143 [00:09<00:00, 43.45it/s]
Training 29/40: 100%|██████████| 143/143 [00:10<00:00, 13.78it/s]


Epoch 28: train_loss=0.0424, val_loss=0.0520



Training 30/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/40:   1%|          | 1/143 [00:09<21:54,  9.26s/it]
Training 30/40:   7%|▋         | 10/143 [00:09<01:30,  1.47it/s]
Training 30/40:  14%|█▍        | 20/143 [00:09<00:34,  3.57it/s]
Training 30/40: 100%|██████████| 143/143 [00:10<00:00, 14.29it/s]


Epoch 29: train_loss=0.0407, val_loss=0.0514



Training 31/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 31/40:   1%|          | 1/143 [00:09<22:13,  9.39s/it]
Training 31/40:   8%|▊         | 12/143 [00:09<01:14,  1.75it/s]
Training 31/40:  15%|█▌        | 22/143 [00:09<00:31,  3.80it/s]
Training 31/40:  29%|██▉       | 42/143 [00:09<00:10,  9.48it/s]
Training 31/40: 100%|██████████| 143/143 [00:10<00:00, 13.98it/s]


Epoch 30: train_loss=0.0390, val_loss=0.0504



Training 32/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 32/40:   1%|          | 1/143 [00:08<21:02,  8.89s/it]
Training 32/40:   8%|▊         | 12/143 [00:08<01:10,  1.85it/s]
Training 32/40:  17%|█▋        | 24/143 [00:09<00:26,  4.45it/s]
Training 32/40:  26%|██▌       | 37/143 [00:09<00:12,  8.28it/s]
Training 32/40:  39%|███▉      | 56/143 [00:09<00:05, 15.84it/s]
Training 32/40:  53%|█████▎    | 76/143 [00:09<00:02, 26.09it/s]
Training 32/40:  70%|██████▉   | 100/143 [00:09<00:01, 41.85it/s]
Training 32/40: 100%|██████████| 143/143 [00:10<00:00, 14.28it/s]


Epoch 31: train_loss=0.0391, val_loss=0.0503



Training 33/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 33/40:   1%|          | 1/143 [00:09<21:46,  9.20s/it]
Training 33/40:   7%|▋         | 10/143 [00:09<01:29,  1.48it/s]
Training 33/40:  14%|█▍        | 20/143 [00:09<00:34,  3.58it/s]
Training 33/40:  44%|████▍     | 63/143 [00:09<00:04, 16.19it/s]
Training 33/40: 100%|██████████| 143/143 [00:09<00:00, 14.33it/s]


Epoch 32: train_loss=0.0383, val_loss=0.0499



Training 34/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 34/40:   1%|          | 1/143 [00:09<21:30,  9.09s/it]
Training 34/40:   9%|▉         | 13/143 [00:09<01:06,  1.96it/s]
Training 34/40:  16%|█▌        | 23/143 [00:09<00:29,  4.08it/s]
Training 34/40:  36%|███▋      | 52/143 [00:09<00:07, 12.62it/s]
Training 34/40: 100%|██████████| 143/143 [00:09<00:00, 14.47it/s]


Epoch 33: train_loss=0.0394, val_loss=0.0536



Training 35/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 35/40:   1%|          | 1/143 [00:09<21:45,  9.19s/it]
Training 35/40:   8%|▊         | 12/143 [00:09<01:13,  1.79it/s]
Training 35/40:  15%|█▌        | 22/143 [00:09<00:31,  3.88it/s]
Training 35/40:  25%|██▌       | 36/143 [00:09<00:13,  7.91it/s]
Training 35/40:  35%|███▍      | 50/143 [00:09<00:07, 13.26it/s]
Training 35/40:  53%|█████▎    | 76/143 [00:09<00:02, 26.76it/s]
Training 35/40: 100%|██████████| 143/143 [00:10<00:00, 13.96it/s]


Epoch 34: train_loss=0.0378, val_loss=0.0509



Training 36/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 36/40:   1%|          | 1/143 [00:09<22:59,  9.71s/it]
Training 36/40:   8%|▊         | 11/143 [00:09<01:25,  1.55it/s]
Training 36/40:  15%|█▍        | 21/143 [00:09<00:34,  3.55it/s]
Training 36/40: 100%|██████████| 143/143 [00:10<00:00, 13.65it/s]


Epoch 35: train_loss=0.0378, val_loss=0.0510



Training 37/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 37/40:   1%|          | 1/143 [00:09<21:33,  9.11s/it]
Training 37/40:   7%|▋         | 10/143 [00:09<01:28,  1.50it/s]
Training 37/40:  15%|█▌        | 22/143 [00:09<00:29,  4.05it/s]
Training 37/40:  24%|██▍       | 35/143 [00:09<00:13,  7.81it/s]
Training 37/40: 100%|██████████| 143/143 [00:09<00:00, 14.40it/s]


Epoch 36: train_loss=0.0370, val_loss=0.0504



Training 38/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 38/40:   1%|          | 1/143 [00:09<21:47,  9.21s/it]
Training 38/40:   9%|▉         | 13/143 [00:09<01:07,  1.94it/s]
Training 38/40:  19%|█▉        | 27/143 [00:09<00:23,  4.88it/s]
Training 38/40:  32%|███▏      | 46/143 [00:09<00:09, 10.35it/s]
Training 38/40: 100%|██████████| 143/143 [00:10<00:00, 14.29it/s]


Epoch 37: train_loss=0.0366, val_loss=0.0537



Training 39/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 39/40:   1%|          | 1/143 [00:09<21:27,  9.07s/it]
Training 39/40:   9%|▉         | 13/143 [00:09<01:06,  1.96it/s]
Training 39/40:  17%|█▋        | 24/143 [00:09<00:27,  4.30it/s]
Training 39/40: 100%|██████████| 143/143 [00:09<00:00, 14.56it/s]


Epoch 38: train_loss=0.0378, val_loss=0.0533



Training 40/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 40/40:   1%|          | 1/143 [00:09<21:35,  9.12s/it]
Training 40/40:   8%|▊         | 12/143 [00:09<01:12,  1.80it/s]
Training 40/40:  17%|█▋        | 24/143 [00:09<00:27,  4.34it/s]
Training 40/40:  27%|██▋       | 38/143 [00:09<00:12,  8.37it/s]
Training 40/40:  38%|███▊      | 55/143 [00:09<00:05, 14.93it/s]
Training 40/40:  55%|█████▍    | 78/143 [00:09<00:02, 26.68it/s]
Training 40/40:  69%|██████▉   | 99/143 [00:09<00:01, 39.97it/s]
Training 40/40: 100%|██████████| 143/143 [00:10<00:00, 13.91it/s]


Epoch 39: train_loss=0.0356, val_loss=0.0488


2025-06-02 00:35:16,856 - __main__ - INFO - Saved mlp probe for layer 8 to cache\probes\phase1_dinov2_viewpoint_probing\mlp_layer_8_probe.pth
 67%|██████▋   | 4/6 [2:33:08<1:13:49, 2214.60s/it]2025-06-02 00:35:36,093 - __main__ - INFO - Processing layer 10...
2025-06-02 00:35:36,094 - src.probing.data_preprocessing - INFO - Extracting features from 1149 batches...

Extracting features:   0%|          | 0/1149 [00:00<?, ?it/s]
Extracting features:   0%|          | 1/1149 [00:06<2:13:44,  6.99s/it]
Extracting features:   0%|          | 2/1149 [00:07<59:03,  3.09s/it]  
Extracting features:   0%|          | 3/1149 [00:07<35:09,  1.84s/it]
Extracting features:   0%|          | 4/1149 [00:08<23:57,  1.26s/it]
Extracting features:   0%|          | 5/1149 [00:08<17:50,  1.07it/s]
Extracting features:   1%|          | 6/1149 [00:08<14:10,  1.34it/s]
Extracting features:   1%|          | 7/1149 [00:09<11:39,  1.63it/s]
Extracting features:   1%|          | 8/1149 [00:09<10:04,  1.89it/s]
Extrac

Epoch 0: train_loss=0.1522, val_loss=0.1232



Training 2/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/30:   1%|          | 1/143 [00:09<22:16,  9.41s/it]
Training 2/30:  15%|█▍        | 21/143 [00:09<00:39,  3.09it/s]
Training 2/30:  33%|███▎      | 47/143 [00:09<00:11,  8.46it/s]
Training 2/30:  52%|█████▏    | 74/143 [00:09<00:04, 16.01it/s]
Training 2/30:  73%|███████▎  | 104/143 [00:09<00:01, 27.27it/s]
Training 2/30: 100%|██████████| 143/143 [00:10<00:00, 13.84it/s]


Epoch 1: train_loss=0.1173, val_loss=0.1130



Training 3/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/30:   1%|          | 1/143 [00:09<21:50,  9.23s/it]
Training 3/30:  13%|█▎        | 18/143 [00:09<00:46,  2.70it/s]
Training 3/30:  27%|██▋       | 38/143 [00:09<00:15,  6.90it/s]
Training 3/30:  43%|████▎     | 61/143 [00:09<00:06, 13.48it/s]
Training 3/30:  62%|██████▏   | 89/143 [00:09<00:02, 24.26it/s]
Training 3/30: 100%|██████████| 143/143 [00:10<00:00, 14.05it/s]


Epoch 2: train_loss=0.1104, val_loss=0.1079



Training 4/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/30:   1%|          | 1/143 [00:09<22:39,  9.57s/it]
Training 4/30:  14%|█▍        | 20/143 [00:09<00:42,  2.90it/s]
Training 4/30:  27%|██▋       | 38/143 [00:09<00:16,  6.54it/s]
Training 4/30:  43%|████▎     | 62/143 [00:09<00:06, 13.20it/s]
Training 4/30:  64%|██████▎   | 91/143 [00:09<00:02, 24.02it/s]
Training 4/30: 100%|██████████| 143/143 [00:10<00:00, 13.57it/s]


Epoch 3: train_loss=0.1072, val_loss=0.1062



Training 5/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/30:   1%|          | 1/143 [00:09<22:38,  9.57s/it]
Training 5/30:  13%|█▎        | 19/143 [00:09<00:45,  2.75it/s]
Training 5/30:  27%|██▋       | 38/143 [00:09<00:15,  6.60it/s]
Training 5/30:  48%|████▊     | 69/143 [00:09<00:04, 15.24it/s]
Training 5/30: 100%|██████████| 143/143 [00:10<00:00, 13.66it/s]


Epoch 4: train_loss=0.1036, val_loss=0.1043



Training 6/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/30:   1%|          | 1/143 [00:09<21:52,  9.24s/it]
Training 6/30:  17%|█▋        | 24/143 [00:09<00:33,  3.61it/s]
Training 6/30:  31%|███       | 44/143 [00:09<00:12,  7.78it/s]
Training 6/30:  47%|████▋     | 67/143 [00:09<00:05, 14.33it/s]
Training 6/30:  67%|██████▋   | 96/143 [00:09<00:01, 25.47it/s]
Training 6/30: 100%|██████████| 143/143 [00:10<00:00, 14.04it/s]


Epoch 5: train_loss=0.1021, val_loss=0.1035



Training 7/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/30:   1%|          | 1/143 [00:08<21:09,  8.94s/it]
Training 7/30:  14%|█▍        | 20/143 [00:09<00:39,  3.10it/s]
Training 7/30:  30%|███       | 43/143 [00:09<00:12,  8.08it/s]
Training 7/30:  49%|████▉     | 70/143 [00:09<00:04, 16.03it/s]
Training 7/30:  67%|██████▋   | 96/143 [00:09<00:01, 26.16it/s]
Training 7/30: 100%|██████████| 143/143 [00:09<00:00, 14.49it/s]


Epoch 6: train_loss=0.1006, val_loss=0.1006



Training 8/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/30:   1%|          | 1/143 [00:08<21:14,  8.97s/it]
Training 8/30:  15%|█▌        | 22/143 [00:09<00:35,  3.40it/s]
Training 8/30:  31%|███▏      | 45/143 [00:09<00:11,  8.34it/s]
Training 8/30:  49%|████▉     | 70/143 [00:09<00:04, 15.65it/s]
Training 8/30:  67%|██████▋   | 96/143 [00:09<00:01, 25.75it/s]
Training 8/30: 100%|██████████| 143/143 [00:09<00:00, 14.42it/s]


Epoch 7: train_loss=0.0990, val_loss=0.1002



Training 9/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/30:   1%|          | 1/143 [00:08<21:05,  8.91s/it]
Training 9/30:  18%|█▊        | 26/143 [00:09<00:28,  4.05it/s]
Training 9/30:  31%|███       | 44/143 [00:09<00:12,  7.93it/s]
Training 9/30:  50%|████▉     | 71/143 [00:09<00:04, 15.91it/s]
Training 9/30:  69%|██████▉   | 99/143 [00:09<00:01, 26.91it/s]
Training 9/30: 100%|██████████| 143/143 [00:09<00:00, 14.56it/s]


Epoch 8: train_loss=0.0986, val_loss=0.0992



Training 10/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/30:   1%|          | 1/143 [00:10<24:08, 10.20s/it]
Training 10/30:  16%|█▌        | 23/143 [00:10<00:38,  3.13it/s]
Training 10/30:  31%|███▏      | 45/143 [00:10<00:13,  7.30it/s]
Training 10/30:  50%|████▉     | 71/143 [00:10<00:05, 14.07it/s]
Training 10/30:  71%|███████▏  | 102/143 [00:10<00:01, 24.97it/s]
Training 10/30: 100%|██████████| 143/143 [00:11<00:00, 12.84it/s]


Epoch 9: train_loss=0.0972, val_loss=0.0987



Training 11/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/30:   1%|          | 1/143 [00:09<22:34,  9.54s/it]
Training 11/30:  10%|█         | 15/143 [00:09<00:59,  2.17it/s]
Training 11/30:  27%|██▋       | 38/143 [00:09<00:15,  6.87it/s]
Training 11/30:  43%|████▎     | 62/143 [00:09<00:05, 13.53it/s]
Training 11/30:  59%|█████▊    | 84/143 [00:09<00:02, 21.62it/s]
Training 11/30: 100%|██████████| 143/143 [00:10<00:00, 13.61it/s]


Epoch 10: train_loss=0.0963, val_loss=0.0984



Training 12/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/30:   1%|          | 1/143 [00:08<21:15,  8.98s/it]
Training 12/30:  13%|█▎        | 19/143 [00:09<00:42,  2.93it/s]
Training 12/30:  29%|██▊       | 41/143 [00:09<00:13,  7.67it/s]
Training 12/30:  48%|████▊     | 69/143 [00:09<00:04, 15.92it/s]
Training 12/30:  71%|███████   | 101/143 [00:09<00:01, 28.50it/s]
Training 12/30: 100%|██████████| 143/143 [00:09<00:00, 14.45it/s]


Epoch 11: train_loss=0.0955, val_loss=0.0975



Training 13/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/30:   1%|          | 1/143 [00:09<23:28,  9.92s/it]
Training 13/30:  15%|█▍        | 21/143 [00:10<00:41,  2.94it/s]
Training 13/30:  32%|███▏      | 46/143 [00:10<00:12,  7.84it/s]
Training 13/30:  47%|████▋     | 67/143 [00:10<00:05, 13.40it/s]
Training 13/30:  69%|██████▉   | 99/143 [00:10<00:01, 24.99it/s]
Training 13/30: 100%|██████████| 143/143 [00:10<00:00, 13.17it/s]


Epoch 12: train_loss=0.0955, val_loss=0.0968



Training 14/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/30:   1%|          | 1/143 [00:09<22:36,  9.55s/it]
Training 14/30:  15%|█▌        | 22/143 [00:09<00:37,  3.20it/s]
Training 14/30:  25%|██▌       | 36/143 [00:09<00:17,  6.01it/s]
Training 14/30:  46%|████▌     | 66/143 [00:09<00:05, 14.41it/s]
Training 14/30:  66%|██████▋   | 95/143 [00:09<00:01, 25.18it/s]
Training 14/30: 100%|██████████| 143/143 [00:10<00:00, 13.65it/s]


Epoch 13: train_loss=0.0944, val_loss=0.0957



Training 15/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/30:   1%|          | 1/143 [00:10<24:18, 10.27s/it]
Training 15/30:  14%|█▍        | 20/143 [00:10<00:45,  2.70it/s]
Training 15/30:  27%|██▋       | 39/143 [00:10<00:16,  6.29it/s]
Training 15/30:  43%|████▎     | 62/143 [00:10<00:06, 12.24it/s]
Training 15/30:  65%|██████▌   | 93/143 [00:10<00:02, 23.13it/s]
Training 15/30: 100%|██████████| 143/143 [00:11<00:00, 12.78it/s]


Epoch 14: train_loss=0.0942, val_loss=0.0957



Training 16/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/30:   1%|          | 1/143 [00:09<22:11,  9.38s/it]
Training 16/30:  13%|█▎        | 19/143 [00:09<00:44,  2.80it/s]
Training 16/30:  31%|███       | 44/143 [00:09<00:12,  7.98it/s]
Training 16/30:  51%|█████     | 73/143 [00:09<00:04, 16.17it/s]
Training 16/30:  75%|███████▍  | 107/143 [00:09<00:01, 29.04it/s]
Training 16/30: 100%|██████████| 143/143 [00:10<00:00, 13.90it/s]


Epoch 15: train_loss=0.0938, val_loss=0.0950



Training 17/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/30:   1%|          | 1/143 [00:10<24:11, 10.22s/it]
Training 17/30:  16%|█▌        | 23/143 [00:10<00:38,  3.13it/s]
Training 17/30:  33%|███▎      | 47/143 [00:10<00:12,  7.70it/s]
Training 17/30:  52%|█████▏    | 75/143 [00:10<00:04, 15.00it/s]
Training 17/30:  73%|███████▎  | 105/143 [00:10<00:01, 25.50it/s]
Training 17/30: 100%|██████████| 143/143 [00:11<00:00, 12.88it/s]


Epoch 16: train_loss=0.0930, val_loss=0.0949



Training 18/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/30:   1%|          | 1/143 [00:09<21:45,  9.19s/it]
Training 18/30:  15%|█▍        | 21/143 [00:09<00:38,  3.17it/s]
Training 18/30:  31%|███       | 44/143 [00:09<00:12,  8.01it/s]
Training 18/30:  50%|█████     | 72/143 [00:09<00:04, 16.07it/s]
Training 18/30:  73%|███████▎  | 104/143 [00:09<00:01, 28.40it/s]
Training 18/30: 100%|██████████| 143/143 [00:10<00:00, 14.17it/s]


Epoch 17: train_loss=0.0928, val_loss=0.0960



Training 19/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/30:   1%|          | 1/143 [00:09<23:14,  9.82s/it]
Training 19/30:  15%|█▍        | 21/143 [00:09<00:41,  2.97it/s]
Training 19/30:  30%|███       | 43/143 [00:10<00:13,  7.32it/s]
Training 19/30:  48%|████▊     | 68/143 [00:10<00:05, 14.08it/s]
Training 19/30:  67%|██████▋   | 96/143 [00:10<00:01, 24.25it/s]
Training 19/30: 100%|██████████| 143/143 [00:10<00:00, 13.31it/s]


Epoch 18: train_loss=0.0924, val_loss=0.0942



Training 20/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/30:   1%|          | 1/143 [00:09<21:21,  9.02s/it]
Training 20/30:  14%|█▍        | 20/143 [00:09<00:40,  3.07it/s]
Training 20/30:  29%|██▉       | 42/143 [00:09<00:12,  7.79it/s]
Training 20/30:  49%|████▉     | 70/143 [00:09<00:04, 16.00it/s]
Training 20/30:  69%|██████▉   | 99/143 [00:09<00:01, 27.31it/s]
Training 20/30: 100%|██████████| 143/143 [00:09<00:00, 14.38it/s]


Epoch 19: train_loss=0.0921, val_loss=0.0938



Training 21/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/30:   1%|          | 1/143 [00:08<21:11,  8.95s/it]
Training 21/30:  13%|█▎        | 19/143 [00:09<00:42,  2.93it/s]
Training 21/30:  30%|███       | 43/143 [00:09<00:12,  8.12it/s]
Training 21/30:  51%|█████     | 73/143 [00:09<00:04, 16.97it/s]
Training 21/30:  73%|███████▎  | 104/143 [00:09<00:01, 29.12it/s]
Training 21/30: 100%|██████████| 143/143 [00:09<00:00, 14.51it/s]


Epoch 20: train_loss=0.0917, val_loss=0.0950



Training 22/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/30:   1%|          | 1/143 [00:09<21:54,  9.26s/it]
Training 22/30:  19%|█▉        | 27/143 [00:09<00:28,  4.06it/s]
Training 22/30:  35%|███▍      | 50/143 [00:09<00:10,  8.85it/s]
Training 22/30:  56%|█████▌    | 80/143 [00:09<00:03, 17.42it/s]
Training 22/30: 100%|██████████| 143/143 [00:10<00:00, 14.09it/s]


Epoch 21: train_loss=0.0919, val_loss=0.0933



Training 23/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/30:   1%|          | 1/143 [00:09<21:22,  9.03s/it]
Training 23/30:  13%|█▎        | 18/143 [00:09<00:45,  2.75it/s]
Training 23/30:  27%|██▋       | 38/143 [00:09<00:14,  7.04it/s]
Training 23/30:  45%|████▌     | 65/143 [00:09<00:05, 14.95it/s]
Training 23/30:  64%|██████▎   | 91/143 [00:09<00:02, 25.04it/s]
Training 23/30: 100%|██████████| 143/143 [00:09<00:00, 14.33it/s]


Epoch 22: train_loss=0.0915, val_loss=0.0934



Training 24/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/30:   1%|          | 1/143 [00:09<21:39,  9.15s/it]
Training 24/30:  16%|█▌        | 23/143 [00:09<00:34,  3.49it/s]
Training 24/30:  29%|██▉       | 42/143 [00:09<00:13,  7.49it/s]
Training 24/30:  46%|████▌     | 66/143 [00:09<00:05, 14.40it/s]
Training 24/30:  64%|██████▍   | 92/143 [00:09<00:02, 24.40it/s]
Training 24/30: 100%|██████████| 143/143 [00:10<00:00, 14.16it/s]


Epoch 23: train_loss=0.0912, val_loss=0.0982



Training 25/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/30:   1%|          | 1/143 [00:08<21:13,  8.97s/it]
Training 25/30:   9%|▉         | 13/143 [00:09<01:05,  1.99it/s]
Training 25/30:  25%|██▌       | 36/143 [00:09<00:15,  6.98it/s]
Training 25/30:  43%|████▎     | 61/143 [00:09<00:05, 14.35it/s]
Training 25/30:  64%|██████▎   | 91/143 [00:09<00:01, 26.20it/s]
Training 25/30: 100%|██████████| 143/143 [00:09<00:00, 14.41it/s]


Epoch 24: train_loss=0.0914, val_loss=0.0929



Training 26/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/30:   1%|          | 1/143 [00:09<23:00,  9.72s/it]
Training 26/30:  13%|█▎        | 19/143 [00:09<00:45,  2.71it/s]
Training 26/30:  29%|██▊       | 41/143 [00:09<00:14,  7.11it/s]
Training 26/30:  48%|████▊     | 68/143 [00:10<00:05, 14.50it/s]
Training 26/30:  66%|██████▋   | 95/143 [00:10<00:01, 24.35it/s]
Training 26/30: 100%|██████████| 143/143 [00:10<00:00, 13.43it/s]


Epoch 25: train_loss=0.0906, val_loss=0.0923



Training 27/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/30:   1%|          | 1/143 [00:08<21:01,  8.88s/it]
Training 27/30:  11%|█         | 16/143 [00:08<00:51,  2.48it/s]
Training 27/30:  26%|██▌       | 37/143 [00:09<00:15,  7.06it/s]
Training 27/30:  45%|████▌     | 65/143 [00:09<00:05, 15.39it/s]
Training 27/30:  64%|██████▎   | 91/143 [00:09<00:02, 25.61it/s]
Training 27/30: 100%|██████████| 143/143 [00:09<00:00, 14.54it/s]


Epoch 26: train_loss=0.0905, val_loss=0.0924



Training 28/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/30:   1%|          | 1/143 [00:10<24:01, 10.15s/it]
Training 28/30:  13%|█▎        | 19/143 [00:10<00:47,  2.59it/s]
Training 28/30:  28%|██▊       | 40/143 [00:10<00:15,  6.62it/s]
Training 28/30:  45%|████▌     | 65/143 [00:10<00:05, 13.19it/s]
Training 28/30:  67%|██████▋   | 96/143 [00:10<00:01, 24.19it/s]
Training 28/30: 100%|██████████| 143/143 [00:11<00:00, 12.89it/s]


Epoch 27: train_loss=0.0906, val_loss=0.0937



Training 29/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/30:   1%|          | 1/143 [00:09<21:46,  9.20s/it]
Training 29/30:  15%|█▌        | 22/143 [00:09<00:36,  3.32it/s]
Training 29/30:  33%|███▎      | 47/143 [00:09<00:11,  8.58it/s]
Training 29/30:  52%|█████▏    | 75/143 [00:09<00:04, 16.60it/s]
Training 29/30: 100%|██████████| 143/143 [00:10<00:00, 14.18it/s]


Epoch 28: train_loss=0.0905, val_loss=0.0921



Training 30/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/30:   1%|          | 1/143 [00:09<21:31,  9.10s/it]
Training 30/30:  16%|█▌        | 23/143 [00:09<00:34,  3.51it/s]
Training 30/30:  32%|███▏      | 46/143 [00:09<00:11,  8.39it/s]
Training 30/30:  52%|█████▏    | 75/143 [00:09<00:04, 16.80it/s]
Training 30/30: 100%|██████████| 143/143 [00:09<00:00, 14.31it/s]


Epoch 29: train_loss=0.0903, val_loss=0.0921


2025-06-02 00:55:45,732 - __main__ - INFO - Saved linear probe for layer 10 to cache\probes\phase1_dinov2_viewpoint_probing\linear_layer_10_probe.pth
2025-06-02 00:56:05,972 - __main__ - INFO - Running mlp probe on layer 10...
2025-06-02 00:56:05,972 - __main__ - INFO - Running mlp probe on layer 10 (feature_dim: 768)

Training 1/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 1/40:   1%|          | 1/143 [00:10<23:40, 10.00s/it]
Training 1/40:   8%|▊         | 11/143 [00:10<01:27,  1.50it/s]
Training 1/40:  15%|█▌        | 22/143 [00:10<00:33,  3.64it/s]
Training 1/40:  24%|██▍       | 35/143 [00:10<00:15,  7.07it/s]
Training 1/40: 100%|██████████| 143/143 [00:10<00:00, 13.18it/s]


Epoch 0: train_loss=0.1619, val_loss=0.1099



Training 2/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/40:   1%|          | 1/143 [00:09<22:05,  9.34s/it]
Training 2/40:   8%|▊         | 12/143 [00:09<01:14,  1.76it/s]
Training 2/40:  17%|█▋        | 24/143 [00:09<00:27,  4.25it/s]
Training 2/40:  31%|███       | 44/143 [00:09<00:09,  9.97it/s]
Training 2/40: 100%|██████████| 143/143 [00:10<00:00, 14.10it/s]


Epoch 1: train_loss=0.1022, val_loss=0.0986



Training 3/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/40:   1%|          | 1/143 [00:09<22:21,  9.44s/it]
Training 3/40:   8%|▊         | 12/143 [00:09<01:15,  1.74it/s]
Training 3/40:  17%|█▋        | 24/143 [00:09<00:28,  4.20it/s]
Training 3/40: 100%|██████████| 143/143 [00:10<00:00, 14.04it/s]


Epoch 2: train_loss=0.0893, val_loss=0.0864



Training 4/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/40:   1%|          | 1/143 [00:09<21:22,  9.03s/it]
Training 4/40:   8%|▊         | 11/143 [00:09<01:19,  1.67it/s]
Training 4/40:  16%|█▌        | 23/143 [00:09<00:28,  4.24it/s]
Training 4/40: 100%|██████████| 143/143 [00:09<00:00, 14.62it/s]


Epoch 3: train_loss=0.0809, val_loss=0.0807



Training 5/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/40:   1%|          | 1/143 [00:09<22:30,  9.51s/it]
Training 5/40:  10%|▉         | 14/143 [00:09<01:03,  2.03it/s]
Training 5/40:  17%|█▋        | 25/143 [00:09<00:27,  4.26it/s]
Training 5/40: 100%|██████████| 143/143 [00:10<00:00, 13.96it/s]


Epoch 4: train_loss=0.0747, val_loss=0.0780



Training 6/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/40:   1%|          | 1/143 [00:09<21:19,  9.01s/it]
Training 6/40:   8%|▊         | 12/143 [00:09<01:11,  1.83it/s]
Training 6/40:  17%|█▋        | 24/143 [00:09<00:27,  4.39it/s]
Training 6/40: 100%|██████████| 143/143 [00:09<00:00, 14.65it/s]


Epoch 5: train_loss=0.0696, val_loss=0.0724



Training 7/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/40:   1%|          | 1/143 [00:09<21:38,  9.14s/it]
Training 7/40:   8%|▊         | 12/143 [00:09<01:12,  1.80it/s]
Training 7/40:  16%|█▌        | 23/143 [00:09<00:29,  4.13it/s]
Training 7/40: 100%|██████████| 143/143 [00:09<00:00, 14.46it/s]


Epoch 6: train_loss=0.0652, val_loss=0.0719



Training 8/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/40:   1%|          | 1/143 [00:09<21:28,  9.08s/it]
Training 8/40:   8%|▊         | 11/143 [00:09<01:19,  1.66it/s]
Training 8/40:  17%|█▋        | 24/143 [00:09<00:26,  4.43it/s]
Training 8/40:  29%|██▊       | 41/143 [00:09<00:10,  9.37it/s]
Training 8/40:  41%|████▏     | 59/143 [00:09<00:05, 16.35it/s]
Training 8/40:  57%|█████▋    | 82/143 [00:09<00:02, 28.10it/s]
Training 8/40: 100%|██████████| 143/143 [00:10<00:00, 14.16it/s]


Epoch 7: train_loss=0.0627, val_loss=0.0715



Training 9/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/40:   1%|          | 1/143 [00:09<21:45,  9.19s/it]
Training 9/40:   9%|▉         | 13/143 [00:09<01:06,  1.94it/s]
Training 9/40:  16%|█▌        | 23/143 [00:09<00:29,  4.03it/s]
Training 9/40: 100%|██████████| 143/143 [00:09<00:00, 14.39it/s]


Epoch 8: train_loss=0.0610, val_loss=0.0693



Training 10/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/40:   1%|          | 1/143 [00:09<21:48,  9.22s/it]
Training 10/40:   9%|▉         | 13/143 [00:09<01:07,  1.94it/s]
Training 10/40:  17%|█▋        | 24/143 [00:09<00:28,  4.24it/s]
Training 10/40:  29%|██▊       | 41/143 [00:09<00:11,  9.14it/s]
Training 10/40: 100%|██████████| 143/143 [00:10<00:00, 14.27it/s]


Epoch 9: train_loss=0.0584, val_loss=0.0644



Training 11/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/40:   1%|          | 1/143 [00:08<21:17,  9.00s/it]
Training 11/40:   7%|▋         | 10/143 [00:09<01:27,  1.52it/s]
Training 11/40:  14%|█▍        | 20/143 [00:09<00:33,  3.66it/s]
Training 11/40: 100%|██████████| 143/143 [00:09<00:00, 14.58it/s]


Epoch 10: train_loss=0.0554, val_loss=0.0685



Training 12/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/40:   1%|          | 1/143 [00:09<21:24,  9.05s/it]
Training 12/40:   8%|▊         | 12/143 [00:09<01:12,  1.82it/s]
Training 12/40:  16%|█▌        | 23/143 [00:09<00:28,  4.16it/s]
Training 12/40:  26%|██▌       | 37/143 [00:09<00:12,  8.23it/s]
Training 12/40:  39%|███▉      | 56/143 [00:09<00:05, 15.68it/s]
Training 12/40:  53%|█████▎    | 76/143 [00:09<00:02, 25.82it/s]
Training 12/40: 100%|██████████| 143/143 [00:10<00:00, 14.21it/s]


Epoch 11: train_loss=0.0541, val_loss=0.0682



Training 13/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/40:   1%|          | 1/143 [00:09<22:57,  9.70s/it]
Training 13/40:   8%|▊         | 12/143 [00:09<01:17,  1.70it/s]
Training 13/40:  15%|█▌        | 22/143 [00:09<00:32,  3.69it/s]
Training 13/40:  23%|██▎       | 33/143 [00:10<00:16,  6.67it/s]
Training 13/40:  34%|███▎      | 48/143 [00:10<00:07, 12.19it/s]
Training 13/40:  48%|████▊     | 68/143 [00:10<00:03, 21.96it/s]
Training 13/40: 100%|██████████| 143/143 [00:10<00:00, 13.25it/s]


Epoch 12: train_loss=0.0528, val_loss=0.0694



Training 14/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/40:   1%|          | 1/143 [00:10<25:06, 10.61s/it]
Training 14/40:   9%|▉         | 13/143 [00:10<01:17,  1.69it/s]
Training 14/40:  17%|█▋        | 25/143 [00:10<00:30,  3.87it/s]
Training 14/40:  25%|██▌       | 36/143 [00:10<00:16,  6.61it/s]
Training 14/40:  36%|███▋      | 52/143 [00:11<00:07, 12.02it/s]
Training 14/40:  50%|████▉     | 71/143 [00:11<00:03, 20.58it/s]
Training 14/40: 100%|██████████| 143/143 [00:11<00:00, 12.22it/s]


Epoch 13: train_loss=0.0522, val_loss=0.0627



Training 15/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/40:   1%|          | 1/143 [00:10<24:42, 10.44s/it]
Training 15/40:   8%|▊         | 12/143 [00:10<01:23,  1.58it/s]
Training 15/40:  17%|█▋        | 25/143 [00:10<00:29,  4.00it/s]
Training 15/40:  28%|██▊       | 40/143 [00:10<00:13,  7.83it/s]
Training 15/40:  38%|███▊      | 55/143 [00:10<00:06, 12.96it/s]
Training 15/40:  53%|█████▎    | 76/143 [00:10<00:02, 22.62it/s]
Training 15/40:  68%|██████▊   | 97/143 [00:11<00:01, 34.87it/s]
Training 15/40: 100%|██████████| 143/143 [00:11<00:00, 12.28it/s]


Epoch 14: train_loss=0.0501, val_loss=0.0639



Training 16/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/40:   1%|          | 1/143 [00:09<21:37,  9.14s/it]
Training 16/40:   8%|▊         | 11/143 [00:09<01:20,  1.65it/s]
Training 16/40:  16%|█▌        | 23/143 [00:09<00:28,  4.18it/s]
Training 16/40:  37%|███▋      | 53/143 [00:09<00:06, 12.98it/s]
Training 16/40: 100%|██████████| 143/143 [00:09<00:00, 14.39it/s]


Epoch 15: train_loss=0.0482, val_loss=0.0637



Training 17/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/40:   1%|          | 1/143 [00:09<21:36,  9.13s/it]
Training 17/40:  10%|▉         | 14/143 [00:09<01:01,  2.11it/s]
Training 17/40:  17%|█▋        | 25/143 [00:09<00:26,  4.42it/s]
Training 17/40:  41%|████▏     | 59/143 [00:09<00:05, 14.39it/s]
Training 17/40: 100%|██████████| 143/143 [00:09<00:00, 14.40it/s]


Epoch 16: train_loss=0.0464, val_loss=0.0600



Training 18/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/40:   1%|          | 1/143 [00:10<24:58, 10.55s/it]
Training 18/40:   8%|▊         | 11/143 [00:10<01:32,  1.43it/s]
Training 18/40:  15%|█▍        | 21/143 [00:10<00:37,  3.27it/s]
Training 18/40:  29%|██▉       | 42/143 [00:10<00:11,  8.64it/s]
Training 18/40: 100%|██████████| 143/143 [00:11<00:00, 12.58it/s]


Epoch 17: train_loss=0.0453, val_loss=0.0622



Training 19/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/40:   1%|          | 1/143 [00:09<23:38,  9.99s/it]
Training 19/40:   6%|▋         | 9/143 [00:10<01:49,  1.23it/s]
Training 19/40:  15%|█▍        | 21/143 [00:10<00:34,  3.57it/s]
Training 19/40:  25%|██▌       | 36/143 [00:10<00:14,  7.57it/s]
Training 19/40:  35%|███▍      | 50/143 [00:10<00:07, 12.53it/s]
Training 19/40:  48%|████▊     | 69/143 [00:10<00:03, 21.54it/s]
Training 19/40: 100%|██████████| 143/143 [00:11<00:00, 12.93it/s]


Epoch 18: train_loss=0.0451, val_loss=0.0595



Training 20/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/40:   1%|          | 1/143 [00:09<22:42,  9.59s/it]
Training 20/40:   8%|▊         | 11/143 [00:09<01:24,  1.57it/s]
Training 20/40:  15%|█▍        | 21/143 [00:09<00:34,  3.58it/s]
Training 20/40: 100%|██████████| 143/143 [00:10<00:00, 13.79it/s]


Epoch 19: train_loss=0.0433, val_loss=0.0634



Training 21/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/40:   1%|          | 1/143 [00:09<23:19,  9.86s/it]
Training 21/40:   7%|▋         | 10/143 [00:09<01:36,  1.38it/s]
Training 21/40:  16%|█▌        | 23/143 [00:10<00:30,  3.94it/s]
Training 21/40: 100%|██████████| 143/143 [00:10<00:00, 13.44it/s]


Epoch 20: train_loss=0.0421, val_loss=0.0601



Training 22/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/40:   1%|          | 1/143 [00:09<22:15,  9.41s/it]
Training 22/40:   8%|▊         | 11/143 [00:09<01:22,  1.60it/s]
Training 22/40:  16%|█▌        | 23/143 [00:09<00:29,  4.07it/s]
Training 22/40:  25%|██▌       | 36/143 [00:09<00:13,  7.70it/s]
Training 22/40:  37%|███▋      | 53/143 [00:09<00:06, 14.14it/s]
Training 22/40:  50%|████▉     | 71/143 [00:09<00:03, 23.02it/s]
Training 22/40:  65%|██████▌   | 93/143 [00:10<00:01, 36.97it/s]
Training 22/40: 100%|██████████| 143/143 [00:10<00:00, 13.53it/s]


Epoch 21: train_loss=0.0409, val_loss=0.0622



Training 23/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/40:   1%|          | 1/143 [00:09<21:53,  9.25s/it]
Training 23/40:   8%|▊         | 12/143 [00:09<01:13,  1.78it/s]
Training 23/40:  15%|█▌        | 22/143 [00:09<00:31,  3.87it/s]
Training 23/40: 100%|██████████| 143/143 [00:10<00:00, 14.26it/s]


Epoch 22: train_loss=0.0409, val_loss=0.0587



Training 24/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/40:   1%|          | 1/143 [00:09<21:47,  9.21s/it]
Training 24/40:   8%|▊         | 12/143 [00:09<01:13,  1.79it/s]
Training 24/40:  17%|█▋        | 24/143 [00:09<00:27,  4.30it/s]
Training 24/40:  25%|██▌       | 36/143 [00:09<00:13,  7.72it/s]
Training 24/40: 100%|██████████| 143/143 [00:10<00:00, 14.25it/s]


Epoch 23: train_loss=0.0401, val_loss=0.0614



Training 25/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/40:   1%|          | 1/143 [00:09<22:08,  9.36s/it]
Training 25/40:   7%|▋         | 10/143 [00:09<01:31,  1.46it/s]
Training 25/40:  13%|█▎        | 19/143 [00:09<00:37,  3.31it/s]
Training 25/40:  19%|█▉        | 27/143 [00:09<00:21,  5.52it/s]
Training 25/40:  30%|███       | 43/143 [00:09<00:08, 11.64it/s]
Training 25/40:  43%|████▎     | 62/143 [00:09<00:03, 21.18it/s]
Training 25/40:  59%|█████▉    | 85/143 [00:09<00:01, 35.93it/s]
Training 25/40: 100%|██████████| 143/143 [00:10<00:00, 13.56it/s]


Epoch 24: train_loss=0.0392, val_loss=0.0597



Training 26/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/40:   1%|          | 1/143 [00:09<23:21,  9.87s/it]
Training 26/40:   8%|▊         | 12/143 [00:09<01:18,  1.67it/s]
Training 26/40:  17%|█▋        | 24/143 [00:10<00:29,  4.02it/s]
Training 26/40:  27%|██▋       | 38/143 [00:10<00:13,  7.77it/s]
Training 26/40:  39%|███▉      | 56/143 [00:10<00:06, 14.29it/s]
Training 26/40:  54%|█████▍    | 77/143 [00:10<00:02, 24.29it/s]
Training 26/40:  70%|██████▉   | 100/143 [00:10<00:01, 38.29it/s]
Training 26/40: 100%|██████████| 143/143 [00:11<00:00, 12.93it/s]


Epoch 25: train_loss=0.0380, val_loss=0.0611



Training 27/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/40:   1%|          | 1/143 [00:09<21:23,  9.04s/it]
Training 27/40:   7%|▋         | 10/143 [00:09<01:28,  1.51it/s]
Training 27/40:  17%|█▋        | 24/143 [00:09<00:26,  4.50it/s]
Training 27/40:  26%|██▌       | 37/143 [00:09<00:12,  8.27it/s]
Training 27/40: 100%|██████████| 143/143 [00:09<00:00, 14.47it/s]


Epoch 26: train_loss=0.0382, val_loss=0.0587



Training 28/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/40:   1%|          | 1/143 [00:08<21:01,  8.88s/it]
Training 28/40:   8%|▊         | 11/143 [00:08<01:17,  1.69it/s]
Training 28/40:  16%|█▌        | 23/143 [00:09<00:27,  4.30it/s]
Training 28/40: 100%|██████████| 143/143 [00:09<00:00, 14.81it/s]


Epoch 27: train_loss=0.0371, val_loss=0.0659



Training 29/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/40:   1%|          | 1/143 [00:08<21:11,  8.95s/it]
Training 29/40:   8%|▊         | 12/143 [00:09<01:11,  1.84it/s]
Training 29/40:  15%|█▍        | 21/143 [00:09<00:32,  3.77it/s]
Training 29/40:  28%|██▊       | 40/143 [00:09<00:10,  9.42it/s]
Training 29/40: 100%|██████████| 143/143 [00:09<00:00, 14.61it/s]


Epoch 28: train_loss=0.0367, val_loss=0.0593



Training 30/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/40:   1%|          | 1/143 [00:09<22:02,  9.32s/it]
Training 30/40:   8%|▊         | 11/143 [00:09<01:21,  1.61it/s]
Training 30/40:  16%|█▌        | 23/143 [00:09<00:29,  4.11it/s]
Training 30/40:  24%|██▍       | 35/143 [00:09<00:14,  7.48it/s]
Training 30/40:  38%|███▊      | 55/143 [00:09<00:05, 15.17it/s]
Training 30/40:  55%|█████▌    | 79/143 [00:09<00:02, 27.27it/s]
Training 30/40:  69%|██████▉   | 99/143 [00:09<00:01, 39.61it/s]
Training 30/40: 100%|██████████| 143/143 [00:10<00:00, 13.67it/s]


Epoch 29: train_loss=0.0352, val_loss=0.0605



Training 31/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 31/40:   1%|          | 1/143 [00:10<25:34, 10.80s/it]
Training 31/40:   9%|▉         | 13/143 [00:10<01:18,  1.66it/s]
Training 31/40:  19%|█▉        | 27/143 [00:11<00:27,  4.19it/s]
Training 31/40: 100%|██████████| 143/143 [00:11<00:00, 12.39it/s]


Epoch 30: train_loss=0.0347, val_loss=0.0618



Training 32/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 32/40:   1%|          | 1/143 [00:09<23:09,  9.78s/it]
Training 32/40:   7%|▋         | 10/143 [00:09<01:35,  1.40it/s]
Training 32/40:  16%|█▌        | 23/143 [00:09<00:30,  3.98it/s]
Training 32/40:  24%|██▍       | 35/143 [00:10<00:14,  7.20it/s]
Training 32/40: 100%|██████████| 143/143 [00:10<00:00, 13.44it/s]


Epoch 31: train_loss=0.0355, val_loss=0.0600



Training 33/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 33/40:   1%|          | 1/143 [00:10<24:34, 10.38s/it]
Training 33/40:   8%|▊         | 12/143 [00:10<01:22,  1.59it/s]
Training 33/40:  17%|█▋        | 25/143 [00:10<00:29,  4.02it/s]
Training 33/40:  27%|██▋       | 38/143 [00:10<00:14,  7.35it/s]
Training 33/40:  41%|████▏     | 59/143 [00:10<00:05, 14.69it/s]
Training 33/40:  55%|█████▌    | 79/143 [00:10<00:02, 23.80it/s]
Training 33/40: 100%|██████████| 143/143 [00:11<00:00, 12.56it/s]


Epoch 32: train_loss=0.0342, val_loss=0.0579



Training 34/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 34/40:   1%|          | 1/143 [00:09<21:45,  9.19s/it]
Training 34/40:   7%|▋         | 10/143 [00:09<01:29,  1.48it/s]
Training 34/40:  17%|█▋        | 25/143 [00:09<00:25,  4.65it/s]
Training 34/40:  27%|██▋       | 39/143 [00:09<00:12,  8.65it/s]
Training 34/40:  40%|███▉      | 57/143 [00:09<00:05, 15.58it/s]
Training 34/40:  55%|█████▍    | 78/143 [00:09<00:02, 26.18it/s]
Training 34/40:  69%|██████▉   | 99/143 [00:09<00:01, 39.43it/s]
Training 34/40: 100%|██████████| 143/143 [00:10<00:00, 13.79it/s]


Epoch 33: train_loss=0.0333, val_loss=0.0603



Training 35/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 35/40:   1%|          | 1/143 [00:09<22:32,  9.52s/it]
Training 35/40:   8%|▊         | 12/143 [00:09<01:15,  1.73it/s]
Training 35/40:  16%|█▌        | 23/143 [00:09<00:30,  3.96it/s]
Training 35/40:  38%|███▊      | 55/143 [00:09<00:06, 13.00it/s]
Training 35/40: 100%|██████████| 143/143 [00:10<00:00, 13.83it/s]


Epoch 34: train_loss=0.0328, val_loss=0.0603



Training 36/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 36/40:   1%|          | 1/143 [00:10<25:28, 10.76s/it]
Training 36/40:   6%|▌         | 8/143 [00:10<02:13,  1.01it/s]
Training 36/40:  11%|█         | 16/143 [00:10<00:51,  2.46it/s]
Training 36/40:  24%|██▍       | 35/143 [00:11<00:14,  7.25it/s]
Training 36/40: 100%|██████████| 143/143 [00:11<00:00, 12.35it/s]


Epoch 35: train_loss=0.0325, val_loss=0.0619



Training 37/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 37/40:   1%|          | 1/143 [00:09<21:44,  9.18s/it]
Training 37/40:   7%|▋         | 10/143 [00:09<01:29,  1.48it/s]
Training 37/40:  16%|█▌        | 23/143 [00:09<00:28,  4.22it/s]
Training 37/40:  24%|██▍       | 34/143 [00:09<00:14,  7.34it/s]
Training 37/40: 100%|██████████| 143/143 [00:10<00:00, 14.26it/s]


Epoch 36: train_loss=0.0317, val_loss=0.0583



Training 38/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 38/40:   1%|          | 1/143 [00:09<21:27,  9.07s/it]
Training 38/40:   8%|▊         | 12/143 [00:09<01:12,  1.81it/s]
Training 38/40:  17%|█▋        | 24/143 [00:09<00:27,  4.37it/s]
Training 38/40:  28%|██▊       | 40/143 [00:09<00:11,  9.03it/s]
Training 38/40: 100%|██████████| 143/143 [00:09<00:00, 14.41it/s]


Epoch 37: train_loss=0.0317, val_loss=0.0578



Training 39/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 39/40:   1%|          | 1/143 [00:09<21:30,  9.09s/it]
Training 39/40:   8%|▊         | 12/143 [00:09<01:12,  1.81it/s]
Training 39/40:  18%|█▊        | 26/143 [00:09<00:24,  4.79it/s]
Training 39/40:  28%|██▊       | 40/143 [00:09<00:11,  8.82it/s]
Training 39/40: 100%|██████████| 143/143 [00:09<00:00, 14.44it/s]


Epoch 38: train_loss=0.0318, val_loss=0.0606



Training 40/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 40/40:   1%|          | 1/143 [00:09<21:52,  9.24s/it]
Training 40/40:   7%|▋         | 10/143 [00:09<01:30,  1.47it/s]
Training 40/40:  15%|█▍        | 21/143 [00:09<00:32,  3.78it/s]
Training 40/40:  23%|██▎       | 33/143 [00:09<00:15,  7.18it/s]
Training 40/40: 100%|██████████| 143/143 [00:10<00:00, 14.18it/s]


Epoch 39: train_loss=0.0316, val_loss=0.0569


2025-06-02 01:09:35,597 - __main__ - INFO - Saved mlp probe for layer 10 to cache\probes\phase1_dinov2_viewpoint_probing\mlp_layer_10_probe.pth
 83%|████████▎ | 5/6 [3:07:27<35:58, 2158.51s/it]  2025-06-02 01:09:55,139 - __main__ - INFO - Processing layer 11...
2025-06-02 01:09:55,140 - src.probing.data_preprocessing - INFO - Extracting features from 1149 batches...

Extracting features:   0%|          | 0/1149 [00:00<?, ?it/s]
Extracting features:   0%|          | 1/1149 [00:07<2:19:46,  7.31s/it]
Extracting features:   0%|          | 2/1149 [00:07<1:01:25,  3.21s/it]
Extracting features:   0%|          | 3/1149 [00:07<36:18,  1.90s/it]  
Extracting features:   0%|          | 4/1149 [00:08<24:29,  1.28s/it]
Extracting features:   0%|          | 5/1149 [00:08<18:05,  1.05it/s]
Extracting features:   1%|          | 6/1149 [00:09<14:08,  1.35it/s]
Extracting features:   1%|          | 7/1149 [00:09<11:36,  1.64it/s]
Extracting features:   1%|          | 8/1149 [00:09<09:59,  1.90it/s]
Ex

Epoch 0: train_loss=0.2676, val_loss=0.1728



Training 2/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/30:   1%|          | 1/143 [00:09<21:19,  9.01s/it]
Training 2/30:  14%|█▍        | 20/143 [00:09<00:40,  3.07it/s]
Training 2/30:  29%|██▉       | 42/143 [00:09<00:12,  7.80it/s]
Training 2/30:  50%|████▉     | 71/143 [00:09<00:04, 16.32it/s]
Training 2/30: 100%|██████████| 143/143 [00:09<00:00, 14.44it/s]


Epoch 1: train_loss=0.1547, val_loss=0.1433



Training 3/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/30:   1%|          | 1/143 [00:08<21:10,  8.95s/it]
Training 3/30:  15%|█▌        | 22/143 [00:09<00:35,  3.41it/s]
Training 3/30:  34%|███▎      | 48/143 [00:09<00:10,  9.04it/s]
Training 3/30:  52%|█████▏    | 75/143 [00:09<00:04, 16.95it/s]
Training 3/30:  71%|███████▏  | 102/143 [00:09<00:01, 27.43it/s]
Training 3/30: 100%|██████████| 143/143 [00:09<00:00, 14.48it/s]


Epoch 2: train_loss=0.1356, val_loss=0.1335



Training 4/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/30:   1%|          | 1/143 [00:09<22:18,  9.42s/it]
Training 4/30:  13%|█▎        | 18/143 [00:09<00:47,  2.64it/s]
Training 4/30:  30%|███       | 43/143 [00:09<00:12,  7.81it/s]
Training 4/30:  50%|█████     | 72/143 [00:09<00:04, 15.98it/s]
Training 4/30:  71%|███████▏  | 102/143 [00:09<00:01, 27.25it/s]
Training 4/30: 100%|██████████| 143/143 [00:10<00:00, 13.82it/s]


Epoch 3: train_loss=0.1265, val_loss=0.1233



Training 5/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/30:   1%|          | 1/143 [00:08<21:10,  8.95s/it]
Training 5/30:  15%|█▌        | 22/143 [00:09<00:35,  3.41it/s]
Training 5/30:  33%|███▎      | 47/143 [00:09<00:10,  8.81it/s]
Training 5/30:  50%|████▉     | 71/143 [00:09<00:04, 15.83it/s]
Training 5/30:  71%|███████   | 101/143 [00:09<00:01, 27.62it/s]
Training 5/30: 100%|██████████| 143/143 [00:09<00:00, 14.50it/s]


Epoch 4: train_loss=0.1208, val_loss=0.1204



Training 6/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/30:   1%|          | 1/143 [00:08<21:16,  8.99s/it]
Training 6/30:  13%|█▎        | 18/143 [00:09<00:45,  2.76it/s]
Training 6/30:  31%|███▏      | 45/143 [00:09<00:11,  8.59it/s]
Training 6/30:  49%|████▉     | 70/143 [00:09<00:04, 15.89it/s]
Training 6/30:  71%|███████   | 101/143 [00:09<00:01, 28.05it/s]
Training 6/30: 100%|██████████| 143/143 [00:09<00:00, 14.42it/s]


Epoch 5: train_loss=0.1174, val_loss=0.1160



Training 7/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/30:   1%|          | 1/143 [00:09<22:08,  9.35s/it]
Training 7/30:  14%|█▍        | 20/143 [00:09<00:41,  2.96it/s]
Training 7/30:  30%|███       | 43/143 [00:09<00:12,  7.73it/s]
Training 7/30:  46%|████▌     | 66/143 [00:09<00:05, 14.20it/s]
Training 7/30:  69%|██████▊   | 98/143 [00:09<00:01, 26.39it/s]
Training 7/30: 100%|██████████| 143/143 [00:10<00:00, 13.92it/s]


Epoch 6: train_loss=0.1142, val_loss=0.1169



Training 8/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/30:   1%|          | 1/143 [00:09<21:23,  9.04s/it]
Training 8/30:  13%|█▎        | 19/143 [00:09<00:42,  2.91it/s]
Training 8/30:  32%|███▏      | 46/143 [00:09<00:11,  8.71it/s]
Training 8/30:  48%|████▊     | 68/143 [00:09<00:04, 15.06it/s]
Training 8/30:  69%|██████▉   | 99/143 [00:09<00:01, 27.21it/s]
Training 8/30: 100%|██████████| 143/143 [00:10<00:00, 14.29it/s]


Epoch 7: train_loss=0.1124, val_loss=0.1135



Training 9/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/30:   1%|          | 1/143 [00:08<21:05,  8.92s/it]
Training 9/30:  13%|█▎        | 19/143 [00:09<00:42,  2.95it/s]
Training 9/30:  27%|██▋       | 38/143 [00:09<00:14,  7.07it/s]
Training 9/30:  48%|████▊     | 68/143 [00:09<00:04, 15.98it/s]
Training 9/30:  69%|██████▉   | 99/143 [00:09<00:01, 28.20it/s]
Training 9/30: 100%|██████████| 143/143 [00:09<00:00, 14.52it/s]


Epoch 8: train_loss=0.1105, val_loss=0.1106



Training 10/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/30:   1%|          | 1/143 [00:09<21:54,  9.26s/it]
Training 10/30:  14%|█▍        | 20/143 [00:09<00:41,  2.99it/s]
Training 10/30:  29%|██▊       | 41/143 [00:09<00:13,  7.38it/s]
Training 10/30:  50%|█████     | 72/143 [00:09<00:04, 16.26it/s]
Training 10/30: 100%|██████████| 143/143 [00:10<00:00, 14.11it/s]


Epoch 9: train_loss=0.1092, val_loss=0.1115



Training 11/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/30:   1%|          | 1/143 [00:09<21:23,  9.04s/it]
Training 11/30:  16%|█▌        | 23/143 [00:09<00:34,  3.53it/s]
Training 11/30:  33%|███▎      | 47/143 [00:09<00:11,  8.66it/s]
Training 11/30:  53%|█████▎    | 76/143 [00:09<00:03, 17.12it/s]
Training 11/30:  75%|███████▍  | 107/143 [00:09<00:01, 29.16it/s]
Training 11/30: 100%|██████████| 143/143 [00:09<00:00, 14.35it/s]


Epoch 10: train_loss=0.1082, val_loss=0.1091



Training 12/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/30:   1%|          | 1/143 [00:09<22:00,  9.30s/it]
Training 12/30:  15%|█▌        | 22/143 [00:09<00:36,  3.28it/s]
Training 12/30:  32%|███▏      | 46/143 [00:09<00:11,  8.29it/s]
Training 12/30:  50%|█████     | 72/143 [00:09<00:04, 15.66it/s]
Training 12/30:  73%|███████▎  | 104/143 [00:09<00:01, 27.88it/s]
Training 12/30: 100%|██████████| 143/143 [00:10<00:00, 14.00it/s]


Epoch 11: train_loss=0.1084, val_loss=0.1083



Training 13/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/30:   1%|          | 1/143 [00:09<21:50,  9.23s/it]
Training 13/30:  14%|█▍        | 20/143 [00:09<00:41,  3.00it/s]
Training 13/30:  27%|██▋       | 38/143 [00:09<00:15,  6.77it/s]
Training 13/30:  47%|████▋     | 67/143 [00:09<00:05, 15.11it/s]
Training 13/30: 100%|██████████| 143/143 [00:10<00:00, 14.12it/s]


Epoch 12: train_loss=0.1072, val_loss=0.1089



Training 14/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/30:   1%|          | 1/143 [00:08<21:09,  8.94s/it]
Training 14/30:  13%|█▎        | 18/143 [00:09<00:44,  2.78it/s]
Training 14/30:  22%|██▏       | 32/143 [00:09<00:19,  5.79it/s]
Training 14/30:  36%|███▋      | 52/143 [00:09<00:07, 11.68it/s]
Training 14/30:  57%|█████▋    | 81/143 [00:09<00:02, 23.23it/s]
Training 14/30: 100%|██████████| 143/143 [00:09<00:00, 14.41it/s]


Epoch 13: train_loss=0.1063, val_loss=0.1114



Training 15/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/30:   1%|          | 1/143 [00:08<21:09,  8.94s/it]
Training 15/30:  14%|█▍        | 20/143 [00:09<00:39,  3.10it/s]
Training 15/30:  29%|██▊       | 41/143 [00:09<00:13,  7.64it/s]
Training 15/30:  45%|████▍     | 64/143 [00:09<00:05, 14.39it/s]
Training 15/30:  64%|██████▎   | 91/143 [00:09<00:02, 25.02it/s]
Training 15/30: 100%|██████████| 143/143 [00:09<00:00, 14.48it/s]


Epoch 14: train_loss=0.1059, val_loss=0.1062



Training 16/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/30:   1%|          | 1/143 [00:09<21:25,  9.06s/it]
Training 16/30:  15%|█▍        | 21/143 [00:09<00:37,  3.21it/s]
Training 16/30:  29%|██▉       | 42/143 [00:09<00:13,  7.69it/s]
Training 16/30:  50%|████▉     | 71/143 [00:09<00:04, 16.17it/s]
Training 16/30: 100%|██████████| 143/143 [00:09<00:00, 14.41it/s]


Epoch 15: train_loss=0.1048, val_loss=0.1053



Training 17/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/30:   1%|          | 1/143 [00:09<21:30,  9.09s/it]
Training 17/30:  14%|█▍        | 20/143 [00:09<00:40,  3.05it/s]
Training 17/30:  23%|██▎       | 33/143 [00:09<00:19,  5.79it/s]
Training 17/30:  43%|████▎     | 61/143 [00:09<00:05, 13.99it/s]
Training 17/30:  66%|██████▌   | 94/143 [00:09<00:01, 26.90it/s]
Training 17/30: 100%|██████████| 143/143 [00:10<00:00, 14.28it/s]


Epoch 16: train_loss=0.1051, val_loss=0.1066



Training 18/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/30:   1%|          | 1/143 [00:08<21:12,  8.96s/it]
Training 18/30:  13%|█▎        | 19/143 [00:09<00:42,  2.93it/s]
Training 18/30:  29%|██▉       | 42/143 [00:09<00:12,  7.90it/s]
Training 18/30:  48%|████▊     | 69/143 [00:09<00:04, 15.84it/s]
Training 18/30:  71%|███████   | 101/143 [00:09<00:01, 28.42it/s]
Training 18/30: 100%|██████████| 143/143 [00:09<00:00, 14.47it/s]


Epoch 17: train_loss=0.1044, val_loss=0.1059



Training 19/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/30:   1%|          | 1/143 [00:09<23:14,  9.82s/it]
Training 19/30:  13%|█▎        | 18/143 [00:09<00:49,  2.54it/s]
Training 19/30:  27%|██▋       | 38/143 [00:10<00:16,  6.50it/s]
Training 19/30:  44%|████▍     | 63/143 [00:10<00:06, 13.27it/s]
Training 19/30:  67%|██████▋   | 96/143 [00:10<00:01, 25.38it/s]
Training 19/30: 100%|██████████| 143/143 [00:10<00:00, 13.30it/s]


Epoch 18: train_loss=0.1043, val_loss=0.1061



Training 20/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/30:   1%|          | 1/143 [00:09<21:36,  9.13s/it]
Training 20/30:  13%|█▎        | 19/143 [00:09<00:43,  2.88it/s]
Training 20/30:  27%|██▋       | 39/143 [00:09<00:14,  7.12it/s]
Training 20/30:  46%|████▌     | 66/143 [00:09<00:05, 14.95it/s]
Training 20/30:  69%|██████▊   | 98/143 [00:09<00:01, 27.41it/s]
Training 20/30: 100%|██████████| 143/143 [00:10<00:00, 14.25it/s]


Epoch 19: train_loss=0.1042, val_loss=0.1077



Training 21/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/30:   1%|          | 1/143 [00:09<21:38,  9.15s/it]
Training 21/30:  14%|█▍        | 20/143 [00:09<00:40,  3.03it/s]
Training 21/30:  28%|██▊       | 40/143 [00:09<00:14,  7.25it/s]
Training 21/30:  45%|████▌     | 65/143 [00:09<00:05, 14.47it/s]
Training 21/30:  65%|██████▌   | 93/143 [00:09<00:01, 25.28it/s]
Training 21/30: 100%|██████████| 143/143 [00:10<00:00, 14.19it/s]


Epoch 20: train_loss=0.1043, val_loss=0.1046



Training 22/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/30:   1%|          | 1/143 [00:09<21:52,  9.25s/it]
Training 22/30:  10%|█         | 15/143 [00:09<00:57,  2.24it/s]
Training 22/30:  27%|██▋       | 38/143 [00:09<00:14,  7.07it/s]
Training 22/30:  41%|████▏     | 59/143 [00:09<00:06, 13.05it/s]
Training 22/30:  62%|██████▏   | 88/143 [00:09<00:02, 24.21it/s]
Training 22/30: 100%|██████████| 143/143 [00:10<00:00, 14.00it/s]


Epoch 21: train_loss=0.1036, val_loss=0.1067



Training 23/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/30:   1%|          | 1/143 [00:09<21:38,  9.15s/it]
Training 23/30:  12%|█▏        | 17/143 [00:09<00:49,  2.57it/s]
Training 23/30:  27%|██▋       | 39/143 [00:09<00:14,  7.24it/s]
Training 23/30:  45%|████▍     | 64/143 [00:09<00:05, 14.47it/s]
Training 23/30:  64%|██████▍   | 92/143 [00:09<00:02, 25.27it/s]
Training 23/30: 100%|██████████| 143/143 [00:10<00:00, 14.16it/s]


Epoch 22: train_loss=0.1049, val_loss=0.1049



Training 24/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/30:   1%|          | 1/143 [00:09<21:35,  9.12s/it]
Training 24/30:  14%|█▍        | 20/143 [00:09<00:40,  3.03it/s]
Training 24/30:  30%|███       | 43/143 [00:09<00:12,  7.91it/s]
Training 24/30:  49%|████▉     | 70/143 [00:09<00:04, 15.70it/s]
Training 24/30:  69%|██████▉   | 99/143 [00:09<00:01, 26.83it/s]
Training 24/30: 100%|██████████| 143/143 [00:10<00:00, 14.23it/s]


Epoch 23: train_loss=0.1034, val_loss=0.1045



Training 25/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/30:   1%|          | 1/143 [00:08<21:02,  8.89s/it]
Training 25/30:  14%|█▍        | 20/143 [00:08<00:39,  3.12it/s]
Training 25/30:  29%|██▉       | 42/143 [00:09<00:12,  7.91it/s]
Training 25/30:  50%|████▉     | 71/143 [00:09<00:04, 16.52it/s]
Training 25/30: 100%|██████████| 143/143 [00:09<00:00, 14.63it/s]


Epoch 24: train_loss=0.1032, val_loss=0.1053



Training 26/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/30:   1%|          | 1/143 [00:08<20:58,  8.87s/it]
Training 26/30:  17%|█▋        | 25/143 [00:08<00:30,  3.92it/s]
Training 26/30:  34%|███▎      | 48/143 [00:09<00:10,  8.91it/s]
Training 26/30:  54%|█████▍    | 77/143 [00:09<00:03, 17.51it/s]
Training 26/30: 100%|██████████| 143/143 [00:09<00:00, 14.66it/s]


Epoch 25: train_loss=0.1038, val_loss=0.1049



Training 27/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/30:   1%|          | 1/143 [00:09<21:39,  9.15s/it]
Training 27/30:  13%|█▎        | 19/143 [00:09<00:43,  2.87it/s]
Training 27/30:  31%|███       | 44/143 [00:09<00:12,  8.18it/s]
Training 27/30:  45%|████▌     | 65/143 [00:09<00:05, 14.18it/s]
Training 27/30:  67%|██████▋   | 96/143 [00:09<00:01, 26.20it/s]
Training 27/30: 100%|██████████| 143/143 [00:10<00:00, 14.16it/s]


Epoch 26: train_loss=0.1032, val_loss=0.1053



Training 28/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/30:   1%|          | 1/143 [00:09<21:40,  9.16s/it]
Training 28/30:  15%|█▌        | 22/143 [00:09<00:36,  3.33it/s]
Training 28/30:  32%|███▏      | 46/143 [00:09<00:11,  8.41it/s]
Training 28/30:  55%|█████▍    | 78/143 [00:09<00:03, 17.65it/s]
Training 28/30:  76%|███████▌  | 109/143 [00:09<00:01, 29.51it/s]
Training 28/30: 100%|██████████| 143/143 [00:10<00:00, 14.20it/s]


Epoch 27: train_loss=0.1031, val_loss=0.1043



Training 29/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/30:   1%|          | 1/143 [00:08<21:10,  8.95s/it]
Training 29/30:  14%|█▍        | 20/143 [00:09<00:39,  3.09it/s]
Training 29/30:  30%|███       | 43/143 [00:09<00:12,  8.07it/s]
Training 29/30:  50%|████▉     | 71/143 [00:09<00:04, 16.31it/s]
Training 29/30:  72%|███████▏  | 103/143 [00:09<00:01, 28.91it/s]
Training 29/30: 100%|██████████| 143/143 [00:09<00:00, 14.50it/s]


Epoch 28: train_loss=0.1031, val_loss=0.1039



Training 30/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/30:   1%|          | 1/143 [00:08<21:04,  8.90s/it]
Training 30/30:  15%|█▌        | 22/143 [00:09<00:35,  3.43it/s]
Training 30/30:  33%|███▎      | 47/143 [00:09<00:10,  8.86it/s]
Training 30/30:  50%|████▉     | 71/143 [00:09<00:04, 15.91it/s]
Training 30/30:  73%|███████▎  | 105/143 [00:09<00:01, 29.42it/s]
Training 30/30: 100%|██████████| 143/143 [00:09<00:00, 14.54it/s]


Epoch 29: train_loss=0.1029, val_loss=0.1042


2025-06-02 01:29:46,827 - __main__ - INFO - Saved linear probe for layer 11 to cache\probes\phase1_dinov2_viewpoint_probing\linear_layer_11_probe.pth
2025-06-02 01:30:06,258 - __main__ - INFO - Running mlp probe on layer 11...
2025-06-02 01:30:06,260 - __main__ - INFO - Running mlp probe on layer 11 (feature_dim: 768)

Training 1/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 1/40:   1%|          | 1/143 [00:09<21:23,  9.04s/it]
Training 1/40:   9%|▉         | 13/143 [00:09<01:05,  1.97it/s]
Training 1/40:  18%|█▊        | 26/143 [00:09<00:24,  4.75it/s]
Training 1/40:  28%|██▊       | 40/143 [00:09<00:11,  8.80it/s]
Training 1/40:  38%|███▊      | 55/143 [00:09<00:06, 14.59it/s]
Training 1/40:  54%|█████▍    | 77/143 [00:09<00:02, 25.96it/s]
Training 1/40:  68%|██████▊   | 97/143 [00:09<00:01, 38.71it/s]
Training 1/40: 100%|██████████| 143/143 [00:10<00:00, 14.02it/s]


Epoch 0: train_loss=0.2027, val_loss=0.1254



Training 2/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/40:   1%|          | 1/143 [00:09<21:20,  9.01s/it]
Training 2/40:  10%|▉         | 14/143 [00:09<01:00,  2.14it/s]
Training 2/40:  16%|█▌        | 23/143 [00:09<00:29,  4.05it/s]
Training 2/40:  24%|██▍       | 35/143 [00:09<00:14,  7.54it/s]
Training 2/40:  37%|███▋      | 53/143 [00:09<00:06, 14.64it/s]
Training 2/40:  49%|████▉     | 70/143 [00:09<00:03, 23.27it/s]
Training 2/40:  64%|██████▍   | 92/143 [00:09<00:01, 37.65it/s]
Training 2/40: 100%|██████████| 143/143 [00:10<00:00, 14.10it/s]


Epoch 1: train_loss=0.1135, val_loss=0.1076



Training 3/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/40:   1%|          | 1/143 [00:10<24:01, 10.15s/it]
Training 3/40:   8%|▊         | 12/143 [00:10<01:20,  1.62it/s]
Training 3/40:  16%|█▌        | 23/143 [00:10<00:32,  3.72it/s]
Training 3/40: 100%|██████████| 143/143 [00:10<00:00, 13.01it/s]


Epoch 2: train_loss=0.1009, val_loss=0.1020



Training 4/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/40:   1%|          | 1/143 [00:09<21:19,  9.01s/it]
Training 4/40:   8%|▊         | 12/143 [00:09<01:11,  1.82it/s]
Training 4/40:  16%|█▌        | 23/143 [00:09<00:28,  4.18it/s]
Training 4/40: 100%|██████████| 143/143 [00:09<00:00, 14.61it/s]


Epoch 3: train_loss=0.0943, val_loss=0.1005



Training 5/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/40:   1%|          | 1/143 [00:08<21:17,  9.00s/it]
Training 5/40:   8%|▊         | 11/143 [00:09<01:18,  1.67it/s]
Training 5/40:  16%|█▌        | 23/143 [00:09<00:28,  4.25it/s]
Training 5/40:  24%|██▍       | 35/143 [00:09<00:13,  7.74it/s]
Training 5/40:  34%|███▍      | 49/143 [00:09<00:07, 13.19it/s]
Training 5/40:  45%|████▌     | 65/143 [00:09<00:03, 21.35it/s]
Training 5/40:  60%|██████    | 86/143 [00:09<00:01, 35.07it/s]
Training 5/40: 100%|██████████| 143/143 [00:10<00:00, 14.14it/s]


Epoch 4: train_loss=0.0889, val_loss=0.0953



Training 6/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/40:   1%|          | 1/143 [00:09<23:01,  9.73s/it]
Training 6/40:   8%|▊         | 12/143 [00:09<01:17,  1.69it/s]
Training 6/40:  17%|█▋        | 25/143 [00:09<00:27,  4.28it/s]
Training 6/40:  27%|██▋       | 39/143 [00:10<00:12,  8.08it/s]
Training 6/40:  38%|███▊      | 55/143 [00:10<00:06, 13.92it/s]
Training 6/40:  57%|█████▋    | 81/143 [00:10<00:02, 26.72it/s]
Training 6/40: 100%|██████████| 143/143 [00:10<00:00, 13.31it/s]


Epoch 5: train_loss=0.0844, val_loss=0.0934



Training 7/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/40:   1%|          | 1/143 [00:08<21:11,  8.95s/it]
Training 7/40:  10%|▉         | 14/143 [00:09<01:00,  2.15it/s]
Training 7/40:  17%|█▋        | 25/143 [00:09<00:26,  4.51it/s]
Training 7/40: 100%|██████████| 143/143 [00:09<00:00, 14.73it/s]


Epoch 6: train_loss=0.0790, val_loss=0.0899



Training 8/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/40:   1%|          | 1/143 [00:09<21:47,  9.21s/it]
Training 8/40:   8%|▊         | 12/143 [00:09<01:13,  1.79it/s]
Training 8/40:  17%|█▋        | 25/143 [00:09<00:26,  4.51it/s]
Training 8/40: 100%|██████████| 143/143 [00:09<00:00, 14.33it/s]


Epoch 7: train_loss=0.0746, val_loss=0.0917



Training 9/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/40:   1%|          | 1/143 [00:09<21:59,  9.29s/it]
Training 9/40:   9%|▉         | 13/143 [00:09<01:07,  1.92it/s]
Training 9/40:  18%|█▊        | 26/143 [00:09<00:25,  4.63it/s]
Training 9/40:  27%|██▋       | 38/143 [00:09<00:13,  7.97it/s]
Training 9/40:  40%|███▉      | 57/143 [00:09<00:05, 15.23it/s]
Training 9/40:  53%|█████▎    | 76/143 [00:09<00:02, 24.65it/s]
Training 9/40:  68%|██████▊   | 97/143 [00:09<00:01, 37.81it/s]
Training 9/40: 100%|██████████| 143/143 [00:10<00:00, 13.71it/s]


Epoch 8: train_loss=0.0710, val_loss=0.0866



Training 10/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/40:   1%|          | 1/143 [00:09<22:05,  9.33s/it]
Training 10/40:   8%|▊         | 11/143 [00:09<01:21,  1.61it/s]
Training 10/40:  16%|█▌        | 23/143 [00:09<00:29,  4.10it/s]
Training 10/40:  33%|███▎      | 47/143 [00:09<00:08, 10.97it/s]
Training 10/40: 100%|██████████| 143/143 [00:10<00:00, 14.10it/s]


Epoch 9: train_loss=0.0681, val_loss=0.0881



Training 11/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/40:   1%|          | 1/143 [00:09<21:40,  9.16s/it]
Training 11/40:   8%|▊         | 11/143 [00:09<01:20,  1.64it/s]
Training 11/40:  17%|█▋        | 25/143 [00:09<00:25,  4.60it/s]
Training 11/40:  27%|██▋       | 39/143 [00:09<00:12,  8.61it/s]
Training 11/40:  40%|███▉      | 57/143 [00:09<00:05, 15.56it/s]
Training 11/40:  53%|█████▎    | 76/143 [00:09<00:02, 25.06it/s]
Training 11/40:  73%|███████▎  | 105/143 [00:09<00:00, 44.13it/s]
Training 11/40: 100%|██████████| 143/143 [00:10<00:00, 13.91it/s]


Epoch 10: train_loss=0.0636, val_loss=0.0866



Training 12/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/40:   1%|          | 1/143 [00:09<21:32,  9.11s/it]
Training 12/40:   8%|▊         | 12/143 [00:09<01:12,  1.81it/s]
Training 12/40:  18%|█▊        | 26/143 [00:09<00:24,  4.78it/s]
Training 12/40:  29%|██▊       | 41/143 [00:09<00:11,  9.11it/s]
Training 12/40:  42%|████▏     | 60/143 [00:09<00:05, 16.47it/s]
Training 12/40:  59%|█████▊    | 84/143 [00:09<00:02, 28.71it/s]
Training 12/40:  76%|███████▌  | 108/143 [00:09<00:00, 44.04it/s]
Training 12/40: 100%|██████████| 143/143 [00:10<00:00, 13.97it/s]


Epoch 11: train_loss=0.0610, val_loss=0.0920



Training 13/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/40:   1%|          | 1/143 [00:08<21:15,  8.98s/it]
Training 13/40:  10%|▉         | 14/143 [00:09<01:00,  2.14it/s]
Training 13/40:  19%|█▉        | 27/143 [00:09<00:23,  4.93it/s]
Training 13/40:  29%|██▊       | 41/143 [00:09<00:11,  9.01it/s]
Training 13/40:  41%|████      | 58/143 [00:09<00:05, 15.64it/s]
Training 13/40:  54%|█████▍    | 77/143 [00:09<00:02, 25.35it/s]
Training 13/40: 100%|██████████| 143/143 [00:10<00:00, 14.28it/s]


Epoch 12: train_loss=0.0583, val_loss=0.0838



Training 14/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/40:   1%|          | 1/143 [00:09<21:53,  9.25s/it]
Training 14/40:   9%|▉         | 13/143 [00:09<01:07,  1.93it/s]
Training 14/40:  16%|█▌        | 23/143 [00:09<00:29,  4.01it/s]
Training 14/40: 100%|██████████| 143/143 [00:10<00:00, 14.22it/s]


Epoch 13: train_loss=0.0554, val_loss=0.0887



Training 15/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/40:   1%|          | 1/143 [00:09<21:48,  9.22s/it]
Training 15/40:   8%|▊         | 12/143 [00:09<01:13,  1.78it/s]
Training 15/40:  17%|█▋        | 24/143 [00:09<00:27,  4.30it/s]
Training 15/40:  25%|██▌       | 36/143 [00:09<00:13,  7.70it/s]
Training 15/40: 100%|██████████| 143/143 [00:10<00:00, 14.23it/s]


Epoch 14: train_loss=0.0545, val_loss=0.0843



Training 16/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/40:   1%|          | 1/143 [00:09<21:27,  9.06s/it]
Training 16/40:   8%|▊         | 12/143 [00:09<01:12,  1.81it/s]
Training 16/40:  16%|█▌        | 23/143 [00:09<00:28,  4.15it/s]
Training 16/40:  26%|██▌       | 37/143 [00:09<00:12,  8.22it/s]
Training 16/40:  37%|███▋      | 53/143 [00:09<00:06, 14.44it/s]
Training 16/40:  52%|█████▏    | 74/143 [00:09<00:02, 25.23it/s]
Training 16/40: 100%|██████████| 143/143 [00:10<00:00, 14.16it/s]


Epoch 15: train_loss=0.0495, val_loss=0.0832



Training 17/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/40:   1%|          | 1/143 [00:09<21:23,  9.04s/it]
Training 17/40:   8%|▊         | 12/143 [00:09<01:12,  1.82it/s]
Training 17/40:  17%|█▋        | 24/143 [00:09<00:27,  4.38it/s]
Training 17/40:  25%|██▌       | 36/143 [00:09<00:13,  7.85it/s]
Training 17/40:  38%|███▊      | 54/143 [00:09<00:05, 14.92it/s]
Training 17/40:  50%|█████     | 72/143 [00:09<00:02, 24.06it/s]
Training 17/40:  66%|██████▌   | 94/143 [00:09<00:01, 38.34it/s]
Training 17/40: 100%|██████████| 143/143 [00:10<00:00, 14.03it/s]


Epoch 16: train_loss=0.0499, val_loss=0.0841



Training 18/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/40:   1%|          | 1/143 [00:09<21:19,  9.01s/it]
Training 18/40:   8%|▊         | 12/143 [00:09<01:11,  1.82it/s]
Training 18/40:  15%|█▌        | 22/143 [00:09<00:30,  3.96it/s]
Training 18/40:  24%|██▍       | 35/143 [00:09<00:13,  7.76it/s]
Training 18/40:  36%|███▌      | 51/143 [00:09<00:06, 14.00it/s]
Training 18/40:  48%|████▊     | 69/143 [00:09<00:03, 23.17it/s]
Training 18/40: 100%|██████████| 143/143 [00:10<00:00, 14.15it/s]


Epoch 17: train_loss=0.0464, val_loss=0.0842



Training 19/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/40:   1%|          | 1/143 [00:09<21:43,  9.18s/it]
Training 19/40:   8%|▊         | 12/143 [00:09<01:13,  1.79it/s]
Training 19/40:  19%|█▉        | 27/143 [00:09<00:23,  4.96it/s]
Training 19/40: 100%|██████████| 143/143 [00:09<00:00, 14.44it/s]


Epoch 18: train_loss=0.0444, val_loss=0.0851



Training 20/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/40:   1%|          | 1/143 [00:09<21:19,  9.01s/it]
Training 20/40:   9%|▉         | 13/143 [00:09<01:05,  1.98it/s]
Training 20/40:  15%|█▌        | 22/143 [00:09<00:31,  3.90it/s]
Training 20/40: 100%|██████████| 143/143 [00:09<00:00, 14.69it/s]


Epoch 19: train_loss=0.0430, val_loss=0.0824



Training 21/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/40:   1%|          | 1/143 [00:09<21:35,  9.12s/it]
Training 21/40:   8%|▊         | 11/143 [00:09<01:20,  1.65it/s]
Training 21/40:  15%|█▌        | 22/143 [00:09<00:30,  3.97it/s]
Training 21/40:  28%|██▊       | 40/143 [00:09<00:11,  9.19it/s]
Training 21/40:  38%|███▊      | 55/143 [00:09<00:05, 14.91it/s]
Training 21/40:  53%|█████▎    | 76/143 [00:09<00:02, 25.52it/s]
Training 21/40: 100%|██████████| 143/143 [00:10<00:00, 14.05it/s]


Epoch 20: train_loss=0.0411, val_loss=0.0856



Training 22/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/40:   1%|          | 1/143 [00:08<21:15,  8.98s/it]
Training 22/40:   8%|▊         | 12/143 [00:09<01:11,  1.83it/s]
Training 22/40:  15%|█▌        | 22/143 [00:09<00:30,  3.95it/s]
Training 22/40:  25%|██▌       | 36/143 [00:09<00:13,  8.04it/s]
Training 22/40:  37%|███▋      | 53/143 [00:09<00:06, 14.69it/s]
Training 22/40:  51%|█████     | 73/143 [00:09<00:02, 24.95it/s]
Training 22/40:  73%|███████▎  | 105/143 [00:09<00:00, 46.51it/s]
Training 22/40: 100%|██████████| 143/143 [00:10<00:00, 14.12it/s]


Epoch 21: train_loss=0.0404, val_loss=0.0821



Training 23/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/40:   1%|          | 1/143 [00:08<21:10,  8.95s/it]
Training 23/40:   8%|▊         | 11/143 [00:09<01:18,  1.68it/s]
Training 23/40:  16%|█▌        | 23/143 [00:09<00:28,  4.27it/s]
Training 23/40:  27%|██▋       | 38/143 [00:09<00:12,  8.69it/s]
Training 23/40:  36%|███▋      | 52/143 [00:09<00:06, 14.13it/s]
Training 23/40:  52%|█████▏    | 74/143 [00:09<00:02, 25.63it/s]
Training 23/40: 100%|██████████| 143/143 [00:10<00:00, 14.27it/s]


Epoch 22: train_loss=0.0378, val_loss=0.0833



Training 24/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/40:   1%|          | 1/143 [00:09<23:31,  9.94s/it]
Training 24/40:   9%|▉         | 13/143 [00:10<01:12,  1.80it/s]
Training 24/40:  15%|█▌        | 22/143 [00:10<00:34,  3.54it/s]
Training 24/40:  25%|██▌       | 36/143 [00:10<00:14,  7.30it/s]
Training 24/40:  35%|███▍      | 50/143 [00:10<00:07, 12.31it/s]
Training 24/40:  52%|█████▏    | 74/143 [00:10<00:02, 23.92it/s]
Training 24/40: 100%|██████████| 143/143 [00:10<00:00, 13.01it/s]


Epoch 23: train_loss=0.0376, val_loss=0.0873



Training 25/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/40:   1%|          | 1/143 [00:09<22:16,  9.42s/it]
Training 25/40:   8%|▊         | 11/143 [00:09<01:22,  1.60it/s]
Training 25/40:  13%|█▎        | 19/143 [00:09<00:38,  3.23it/s]
Training 25/40:  22%|██▏       | 32/143 [00:09<00:16,  6.90it/s]
Training 25/40:  32%|███▏      | 46/143 [00:09<00:07, 12.16it/s]
Training 25/40:  45%|████▌     | 65/143 [00:09<00:03, 21.65it/s]
Training 25/40:  59%|█████▊    | 84/143 [00:10<00:01, 33.59it/s]
Training 25/40: 100%|██████████| 143/143 [00:10<00:00, 13.61it/s]


Epoch 24: train_loss=0.0364, val_loss=0.0823



Training 26/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/40:   1%|          | 1/143 [00:09<21:53,  9.25s/it]
Training 26/40:   8%|▊         | 11/143 [00:09<01:21,  1.63it/s]
Training 26/40:  15%|█▍        | 21/143 [00:09<00:32,  3.71it/s]
Training 26/40: 100%|██████████| 143/143 [00:09<00:00, 14.31it/s]


Epoch 25: train_loss=0.0366, val_loss=0.0855



Training 27/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/40:   1%|          | 1/143 [00:09<21:52,  9.25s/it]
Training 27/40:   8%|▊         | 11/143 [00:09<01:21,  1.63it/s]
Training 27/40:  15%|█▍        | 21/143 [00:09<00:32,  3.72it/s]
Training 27/40:  23%|██▎       | 33/143 [00:09<00:15,  7.13it/s]
Training 27/40:  34%|███▎      | 48/143 [00:09<00:07, 12.88it/s]
Training 27/40:  50%|█████     | 72/143 [00:09<00:02, 25.18it/s]
Training 27/40: 100%|██████████| 143/143 [00:10<00:00, 13.90it/s]


Epoch 26: train_loss=0.0343, val_loss=0.0830



Training 28/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/40:   1%|          | 1/143 [00:09<21:38,  9.15s/it]
Training 28/40:   8%|▊         | 11/143 [00:09<01:20,  1.64it/s]
Training 28/40:  16%|█▌        | 23/143 [00:09<00:28,  4.17it/s]
Training 28/40: 100%|██████████| 143/143 [00:09<00:00, 14.41it/s]


Epoch 27: train_loss=0.0340, val_loss=0.0854



Training 29/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/40:   1%|          | 1/143 [00:09<21:54,  9.26s/it]
Training 29/40:   9%|▉         | 13/143 [00:09<01:07,  1.93it/s]
Training 29/40:  15%|█▍        | 21/143 [00:09<00:34,  3.59it/s]
Training 29/40: 100%|██████████| 143/143 [00:10<00:00, 14.27it/s]


Epoch 28: train_loss=0.0323, val_loss=0.0793



Training 30/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/40:   1%|          | 1/143 [00:09<21:44,  9.19s/it]
Training 30/40:   8%|▊         | 12/143 [00:09<01:13,  1.79it/s]
Training 30/40:  15%|█▌        | 22/143 [00:09<00:31,  3.89it/s]
Training 30/40:  24%|██▍       | 35/143 [00:09<00:14,  7.63it/s]
Training 30/40:  36%|███▌      | 51/143 [00:09<00:06, 13.80it/s]
Training 30/40:  50%|█████     | 72/143 [00:09<00:02, 24.49it/s]
Training 30/40: 100%|██████████| 143/143 [00:10<00:00, 13.95it/s]


Epoch 29: train_loss=0.0306, val_loss=0.0857



Training 31/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 31/40:   1%|          | 1/143 [00:09<21:30,  9.09s/it]
Training 31/40:   7%|▋         | 10/143 [00:09<01:28,  1.50it/s]
Training 31/40:  14%|█▍        | 20/143 [00:09<00:33,  3.63it/s]
Training 31/40:  22%|██▏       | 32/143 [00:09<00:15,  7.08it/s]
Training 31/40:  34%|███▍      | 49/143 [00:09<00:06, 13.72it/s]
Training 31/40:  48%|████▊     | 68/143 [00:09<00:03, 23.41it/s]
Training 31/40:  63%|██████▎   | 90/143 [00:09<00:01, 37.61it/s]
Training 31/40: 100%|██████████| 143/143 [00:10<00:00, 13.96it/s]


Epoch 30: train_loss=0.0292, val_loss=0.0817



Training 32/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 32/40:   1%|          | 1/143 [00:08<21:15,  8.98s/it]
Training 32/40:  10%|▉         | 14/143 [00:09<01:00,  2.14it/s]
Training 32/40:  18%|█▊        | 26/143 [00:09<00:24,  4.72it/s]
Training 32/40:  28%|██▊       | 40/143 [00:09<00:11,  8.80it/s]
Training 32/40:  41%|████      | 58/143 [00:09<00:05, 15.84it/s]
Training 32/40:  54%|█████▍    | 77/143 [00:09<00:02, 25.53it/s]
Training 32/40: 100%|██████████| 143/143 [00:10<00:00, 14.16it/s]


Epoch 31: train_loss=0.0320, val_loss=0.0818



Training 33/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 33/40:   1%|          | 1/143 [00:09<21:23,  9.04s/it]
Training 33/40:   8%|▊         | 12/143 [00:09<01:12,  1.82it/s]
Training 33/40:  18%|█▊        | 26/143 [00:09<00:24,  4.81it/s]
Training 33/40:  27%|██▋       | 38/143 [00:09<00:12,  8.27it/s]
Training 33/40: 100%|██████████| 143/143 [00:09<00:00, 14.48it/s]


Epoch 32: train_loss=0.0296, val_loss=0.0829



Training 34/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 34/40:   1%|          | 1/143 [00:09<21:51,  9.24s/it]
Training 34/40:   8%|▊         | 11/143 [00:09<01:21,  1.63it/s]
Training 34/40:  15%|█▍        | 21/143 [00:09<00:32,  3.71it/s]
Training 34/40: 100%|██████████| 143/143 [00:10<00:00, 14.26it/s]


Epoch 33: train_loss=0.0285, val_loss=0.0830



Training 35/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 35/40:   1%|          | 1/143 [00:09<21:27,  9.07s/it]
Training 35/40:   8%|▊         | 12/143 [00:09<01:12,  1.81it/s]
Training 35/40:  17%|█▋        | 24/143 [00:09<00:27,  4.37it/s]
Training 35/40:  27%|██▋       | 38/143 [00:09<00:12,  8.43it/s]
Training 35/40: 100%|██████████| 143/143 [00:09<00:00, 14.44it/s]


Epoch 34: train_loss=0.0272, val_loss=0.0866



Training 36/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 36/40:   1%|          | 1/143 [00:09<21:49,  9.22s/it]
Training 36/40:   8%|▊         | 11/143 [00:09<01:20,  1.63it/s]
Training 36/40:  15%|█▍        | 21/143 [00:09<00:32,  3.73it/s]
Training 36/40:  35%|███▍      | 50/143 [00:09<00:07, 12.18it/s]
Training 36/40: 100%|██████████| 143/143 [00:10<00:00, 14.28it/s]


Epoch 35: train_loss=0.0288, val_loss=0.0863



Training 37/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 37/40:   1%|          | 1/143 [00:09<21:51,  9.24s/it]
Training 37/40:   8%|▊         | 12/143 [00:09<01:13,  1.78it/s]
Training 37/40:  17%|█▋        | 25/143 [00:09<00:26,  4.51it/s]
Training 37/40:  27%|██▋       | 39/143 [00:09<00:12,  8.50it/s]
Training 37/40: 100%|██████████| 143/143 [00:10<00:00, 14.21it/s]


Epoch 36: train_loss=0.0282, val_loss=0.0829



Training 38/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 38/40:   1%|          | 1/143 [00:09<21:20,  9.02s/it]
Training 38/40:   9%|▉         | 13/143 [00:09<01:05,  1.98it/s]
Training 38/40:  17%|█▋        | 24/143 [00:09<00:27,  4.33it/s]
Training 38/40:  35%|███▍      | 50/143 [00:09<00:07, 12.04it/s]
Training 38/40: 100%|██████████| 143/143 [00:09<00:00, 14.56it/s]


Epoch 37: train_loss=0.0258, val_loss=0.0862



Training 39/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 39/40:   1%|          | 1/143 [00:09<23:02,  9.73s/it]
Training 39/40:   7%|▋         | 10/143 [00:09<01:34,  1.40it/s]
Training 39/40:  15%|█▌        | 22/143 [00:09<00:31,  3.80it/s]
Training 39/40:  25%|██▌       | 36/143 [00:10<00:14,  7.61it/s]
Training 39/40:  38%|███▊      | 54/143 [00:10<00:06, 14.22it/s]
Training 39/40:  51%|█████     | 73/143 [00:10<00:02, 23.34it/s]
Training 39/40:  68%|██████▊   | 97/143 [00:10<00:01, 38.22it/s]
Training 39/40: 100%|██████████| 143/143 [00:10<00:00, 13.11it/s]


Epoch 38: train_loss=0.0253, val_loss=0.0813



Training 40/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 40/40:   1%|          | 1/143 [00:09<23:31,  9.94s/it]
Training 40/40:   8%|▊         | 12/143 [00:10<01:19,  1.66it/s]
Training 40/40:  17%|█▋        | 25/143 [00:10<00:28,  4.19it/s]
Training 40/40:  27%|██▋       | 39/143 [00:10<00:13,  7.91it/s]
Training 40/40:  42%|████▏     | 60/143 [00:10<00:05, 15.50it/s]
Training 40/40:  59%|█████▉    | 85/143 [00:10<00:02, 27.42it/s]
Training 40/40:  76%|███████▌  | 109/143 [00:10<00:00, 41.76it/s]
Training 40/40: 100%|██████████| 143/143 [00:11<00:00, 12.90it/s]


Epoch 39: train_loss=0.0251, val_loss=0.0839


2025-06-02 01:43:22,002 - __main__ - INFO - Saved mlp probe for layer 11 to cache\probes\phase1_dinov2_viewpoint_probing\mlp_layer_11_probe.pth
100%|██████████| 6/6 [3:41:13<00:00, 2212.33s/it]


In [22]:
logger.info("Saving results...")
result_path = experiment.save_results(results)

2025-06-02 01:43:41,406 - __main__ - INFO - Saving results...
2025-06-02 01:43:41,418 - __main__ - INFO - Results saved to results\phase1_dinov2_viewpoint_probing\results.json


In [23]:
from src.analysis.layer_analysis import analyze_experiment_results

logger.info("Creating analysis and visualizations...")
analyze_experiment_results(result_path, output_dir=result_path.parent)

logger.info("Results analyzed! Please see the results and analysis_results folders for the outcomes.")

2025-06-02 01:43:41,438 - __main__ - INFO - Creating analysis and visualizations...
2025-06-02 01:43:43,812 - src.analysis.layer_analysis - INFO - Analysis report saved to results\phase1_dinov2_viewpoint_probing\layer_analysis_report.json
2025-06-02 01:43:43,813 - __main__ - INFO - Results analyzed! Please see the results and analysis_results folders for the outcomes.
