# Probing Experiment on SSL Models

This is effectively a notebook-ized version of the old experiment runner script. It compartmentalizes everything so we don't lose state between small errors.

### Imports, Logging Setup

In [1]:
# Set environment variables before imports
import os
os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'

# Imports
import hydra
from omegaconf import DictConfig, OmegaConf
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import numpy as np
from pathlib import Path
import logging
import wandb
from typing import Dict, List, Tuple, Optional
from tqdm import tqdm


from src.models.feature_extractor import FeatureExtractor, load_feature_extractor
from src.datasets.shapenet_3dr2n2 import create_3dr2n2_dataloaders
from src.probing.probes import create_probe, ProbeTrainer
from src.probing.data_preprocessing import (
    FeatureExtractorPipeline,
    create_probing_dataloaders,
    ProbingDataset,
)
from src.probing.metrics import (
    compute_regression_metrics,
    compute_viewpoint_specific_metrics,
    MetricsTracker,
)
from src.analysis.layer_analysis import LayerWiseAnalyzer

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")

### Probing Setup
This class is the overarching "manager" that is responsible for the entire experiment. It contains all the functionalities required to:

- Create & setup dataloaders 
- Extract features from the frozen layers of the ViT models 
- Train MLP & Linear probes on those layers 
- Summarize results

In [2]:
class ProbingExperiment:
    """Orchestrates probing experiments"""

    def __init__(self, config: DictConfig):
        self.config = config
        # Determine device: prioritize models.device, then top-level device, then auto-detect
        device_to_use = config.models.get("device", config.get("device"))
        if device_to_use:
            self.device = device_to_use
        else:
            self.device = (
                "cuda"
                if torch.cuda.is_available()
                else "mps" if torch.backends.mps.is_available() else "cpu"
            )
        logger.info(f"Using device: {self.device}")

        # Initialize wandb
        if config.get("wandb", {}).get("enabled", False):
            wandb.init(
                project=config.wandb.project,
                entity=config.wandb.get("entity"),
                name=config.experiment.name,
                config=OmegaConf.to_container(config, resolve=True),
            )

        # Setup paths
        self.results_dir = Path(config.get("results_dir", "./results"))
        self.results_dir.mkdir(parents=True, exist_ok=True)
        self.cache_dir = Path(config.get("cache_dir", "./cache"))
        self.cache_dir.mkdir(parents=True, exist_ok=True)
        
        # Setup probe save directory
        self.probe_save_dir = self.cache_dir / "probes" / self.config.experiment.name
        self.probe_save_dir.mkdir(parents=True, exist_ok=True)

        # Initialize analyzer
        self.analyzer = LayerWiseAnalyzer(self.results_dir / config.experiment.name)

   
    def load_dataset(self) -> Tuple[DataLoader, DataLoader, DataLoader]:
        """Load the dataset"""
        subset_percentage = self.config.datasets.get("subset_percentage", None)
        return create_3dr2n2_dataloaders(
            self.config.datasets, subset_percentage=subset_percentage
        )

    def load_feature_extractor(self) -> FeatureExtractor:
        """Load and setup feature extractor"""
        model_config = self.config.models
        model_config.device = self.device
        model_config.cache_dir = str(self.cache_dir / "models")

        feature_extractor = load_feature_extractor(OmegaConf.to_container(model_config))
        logger.info(f"Loaded {model_config.model_name} feature extractor")
        return feature_extractor

    def extract_features_for_layer(
        self,
        feature_extractor: FeatureExtractor,
        train_loader: DataLoader,
        val_loader: DataLoader,
        test_loader: DataLoader,
        layer: int,
        feature_type: str,
        task_type: str,
    ) -> Tuple[ProbingDataset, ProbingDataset, ProbingDataset]:
        """Extract features for a specific layer"""
        pipeline = FeatureExtractorPipeline(
            feature_extractor=feature_extractor,
            device=self.device,
            batch_size=self.config.get("extraction_batch_size", 32),
            cache_dir=str(self.cache_dir / "features"),
        )

        experiment_name = f"{self.config.models.model_name}_{self.config.experiment.name}_layer_{layer}"

        return pipeline.create_probing_datasets(
            train_loader=train_loader,
            val_loader=val_loader,
            test_loader=test_loader,
            layers=[layer],
            feature_type=feature_type,
            task_type=task_type,
            experiment_name=experiment_name,
        )

    def run_probe_experiment(
        self,
        probe_type: str,
        train_loader: DataLoader,
        val_loader: DataLoader,
        test_loader: DataLoader,
        feature_dim: int,
        layer: int,
    ) -> Dict:
        """Run a single probe experiment"""

        logger.info(
            f"Running {probe_type} probe on layer {layer} (feature_dim: {feature_dim})"
        )

        # Get probe configuration
        probe_config = self.config.probing.get(probe_type, {})
        # Make a mutable copy for modification
        probe_config = OmegaConf.to_container(probe_config, resolve=True)

        # Create probe
        probe_config["input_dim"] = feature_dim
        probe_config["output_dim"] = self.config.probing.get("output_dim", 2)

        main_task_type = self.config.probing.get("task_type", "regression")
        if main_task_type == "viewpoint_regression":
            probe_config["task_type"] = "regression"
        elif main_task_type == "view_classification":
            probe_config["task_type"] = "classification"
        else:
            probe_config["task_type"] = main_task_type

        probe = create_probe(probe_config)

        # Setup trainer
        trainer = ProbeTrainer(probe, device=self.device)

        # Setup optimizer and scheduler
        training_config = probe_config.get("training", {})
        optimizer = self.create_optimizer(probe, training_config.get("optimizer", {}))
        scheduler = self.create_scheduler(
            optimizer, training_config.get("scheduler", {})
        )

        # Training parameters"results/phase1_dinov2_viewpoint_probing/results.json"
        epochs = training_config.get("epochs", 30)
        early_stopping_patience = training_config.get("early_stopping_patience", 15)

        metrics_tracker = MetricsTracker()
        trainer = ProbeTrainer(
            probe, device=self.device, MetricsTracker=metrics_tracker
        )

        # Check if wandb is enabled
        wandb_enabled = self.config.get("wandb", {}).get("enabled", False)

        best_model, best_val_loss = trainer.train(
            epochs,
            optimizer,
            scheduler,
            early_stopping_patience,
            train_loader,
            val_loader,
            probe_type=probe_type,
            layer=layer,
            wandb_enabled=wandb_enabled,
        )
        
        # Save the trained probe
        probe_save_dir = self.cache_dir / "probes" / self.config.experiment.name
        probe_save_dir.mkdir(parents=True, exist_ok=True)
        probe_filename = f"{probe_type}_layer_{layer}_probe.pth"
        probe_save_path = probe_save_dir / probe_filename
        
        torch.save({
            'model_state_dict': best_model,  # best_model is already a state_dict
            'probe_config': probe_config,
            'layer': layer,
            'probe_type': probe_type,
            'experiment_name': self.config.experiment.name,
            'model_name': self.config.models.model_name,
            'best_val_loss': best_val_loss,
            'feature_dim': feature_dim
        }, probe_save_path)
        
        logger.info(f"Saved {probe_type} probe for layer {layer} to {probe_save_path}")

        test_metrics = trainer.evaluate(test_loader)

        detailed_metrics = self.compute_detailed_metrics(probe, test_loader)

        total_epochs = len(metrics_tracker.get_history("train"))

        results = {
            "train_history": metrics_tracker.get_history("train"),
            "val_history": metrics_tracker.get_history("val"),
            "test_metrics": test_metrics,
            "detailed_metrics": detailed_metrics,
            "best_epoch": metrics_tracker.best_epoch,
            "total_epochs": total_epochs,
        }

        return results

    def save_probe(self, probe: nn.Module, probe_type: str, layer: int, probe_config: Dict):
        """Save the trained probe model and its configuration"""
        import json
        
        # Create filename with model name, probe type, and layer
        model_name = self.config.models.model_name
        filename = f"{model_name}_{probe_type}_layer_{layer}.pth"
        probe_path = self.probe_save_dir / filename
        
        # Save the probe state dict
        torch.save({
            'model_state_dict': probe.state_dict(),
            'probe_config': probe_config,
            'model_name': model_name,
            'probe_type': probe_type,
            'layer': layer,
            'experiment_name': self.config.experiment.name
        }, probe_path)
        
        # Also save the config as JSON
        config_filename = f"{model_name}_{probe_type}_layer_{layer}_config.json"
        config_path = self.probe_save_dir / config_filename
        
        with open(config_path, 'w') as f:
            json.dump({
                'probe_config': probe_config,
                'model_name': model_name,
                'probe_type': probe_type,
                'layer': layer,
                'experiment_name': self.config.experiment.name
            }, f, indent=2)
        
        logger.info(f"Probe saved to {probe_path}")
        logger.info(f"Probe config saved to {config_path}")

    def load_probe(self, probe_type: str, layer: int, device: Optional[str] = None) -> nn.Module:
        """Load a previously saved probe"""
        if device is None:
            device = self.device
            
        model_name = self.config.models.model_name
        filename = f"{model_name}_{probe_type}_layer_{layer}.pth"
        probe_path = self.probe_save_dir / filename
        
        if not probe_path.exists():
            raise FileNotFoundError(f"Probe not found at {probe_path}")
        
        # Load the saved data
        saved_data = torch.load(probe_path, map_location=device)
        
        # Recreate the probe using the saved config
        probe_config = saved_data['probe_config']
        probe = create_probe(probe_config)
        
        # Load the state dict
        probe.load_state_dict(saved_data['model_state_dict'])
        probe.to(device)
        
        logger.info(f"Probe loaded from {probe_path}")
        return probe

    def create_optimizer(
        self, model: nn.Module, optimizer_config: Dict
    ) -> torch.optim.Optimizer:
        """Create optimizer from config using Hydra instantiate"""
        from hydra.utils import instantiate

        # Create a copy of config and add model parameters
        optimizer_config = optimizer_config.copy()
        optimizer_config["params"] = model.parameters()

        return instantiate(optimizer_config)

    def create_scheduler(
        self, optimizer: torch.optim.Optimizer, scheduler_config: Dict
    ):
        """Create learning rate scheduler from config using Hydra instantiate"""
        if not scheduler_config:
            return None

        from hydra.utils import instantiate

        scheduler_config = scheduler_config.copy()
        scheduler_config["optimizer"] = optimizer

        return instantiate(scheduler_config)

    def compute_detailed_metrics(
        self, probe: nn.Module, test_loader: DataLoader
    ) -> Dict:
        """Compute alles metrics"""
        probe.eval()

        all_predictions = []
        all_targets = []
        all_categories = []

        with torch.no_grad():
            for batch in test_loader:
                features = batch["features"].to(self.device)
                targets = batch["targets"]

                outputs = probe(features)

                all_predictions.append(outputs.cpu())
                all_targets.append(targets)

                # Get categories if available
                if "categories" in batch:
                    all_categories.extend(batch["categories"])

        predictions = torch.cat(all_predictions, dim=0)
        targets = torch.cat(all_targets, dim=0)

        # Basic regression metrics
        metrics = compute_regression_metrics(predictions, targets, return_per_dim=True)

        # Viewpoint-specific metrics
        if predictions.shape[1] == 2:
            viewpoint_metrics = compute_viewpoint_specific_metrics(
                azimuth_pred=predictions[:, 0],
                elevation_pred=predictions[:, 1],
                azimuth_target=targets[:, 0],
                elevation_target=targets[:, 1],
            )
            metrics.update(viewpoint_metrics)

        return metrics

    def save_results(self, results: Dict) -> str:
        """Save results to disk"""
        import json

        # Create experiment directory
        exp_dir = self.results_dir / self.config.experiment.name
        exp_dir.mkdir(parents=True, exist_ok=True)

        # Save results
        results_file = exp_dir / "results.json"

        # Convert tensors to lists for JSON serialization
        serializable_results = self.make_json_serializable(results)

        combined_results = {
            "config": OmegaConf.to_container(self.config, resolve=True),
            "results": serializable_results,
        }

        with open(results_file, "w") as f:
            json.dump(combined_results, f, indent=2)

        logger.info(f"Results saved to {results_file}")
        return results_file

    def make_json_serializable(self, obj):
        """Convert object to JSON-serializable format"""
        if isinstance(obj, dict):
            return {k: self.make_json_serializable(v) for k, v in obj.items()}
        elif isinstance(obj, list):
            return [self.make_json_serializable(v) for v in obj]
        elif isinstance(obj, (torch.Tensor, np.ndarray)):
            return obj.tolist() if hasattr(obj, "tolist") else float(obj)
        elif isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        else:
            return obj



### Hydra Configuration Loading / Setup

In [3]:
from hydra import initialize, compose
from hydra.core.global_hydra import GlobalHydra
import os 
from pathlib import Path #

CONFIG_PATH = "../configs"
CONFIG_NAME = "experiment_config"

cfg: Optional[DictConfig] = None

if GlobalHydra.instance().is_initialized():
    logger.info("Clearing existing Hydra global state.")
    GlobalHydra.instance().clear()

try:
    project_root = Path(os.getcwd()).parent 
    data_dir_abs = project_root / "data"
    
    os.environ["DATA_DIR"] = str(data_dir_abs)

    logger.info(f"Initializing Hydra with config_path: '{CONFIG_PATH}'")
    
    initialize(version_base=None, config_path=CONFIG_PATH)
    
    logger.info(f"Composing configuration with config_name: '{CONFIG_NAME}'")
    
    cfg = compose(config_name=CONFIG_NAME)

except Exception as e:
    logger.error(f"Error initializing Hydra or loading configuration: {e}", exc_info=True)

if cfg:
    logger.info("Hydra configuration loaded successfully.")


2025-06-02 17:23:59,665 - __main__ - INFO - Initializing Hydra with config_path: '../configs'
2025-06-02 17:24:00,127 - __main__ - INFO - Composing configuration with config_name: 'experiment_config'
2025-06-02 17:24:00,227 - __main__ - INFO - Hydra configuration loaded successfully.


## Running the Experiment
The following code uses the above configurations and utility functions to run the actual experiment.

In [4]:
results = None
logger.info("Starting experiment execution")
experiment = ProbingExperiment(cfg)
    

2025-06-02 17:24:00,233 - __main__ - INFO - Starting experiment execution
2025-06-02 17:24:00,234 - __main__ - INFO - Using device: cuda
2025-06-02 17:24:00,564 - wandb.jupyter - ERROR - Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
wandb: Currently logged in as: rsen0811 (cse493g1_drn). Use `wandb login --relogin` to force relogin


### Load the Feature Extractor & Dataset

In [5]:
feature_extractor = experiment.load_feature_extractor()
extraction_config = cfg.models.get("feature_extraction", {})
layers = extraction_config.get("layers", [11])
feature_type = extraction_config.get("feature_type", "cls_token")
task_type = cfg.probing.get("task_type", "viewpoint_regression")

Downloading config.json:   0%|          | 0.00/69.7k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

Some weights of ViTModel were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized: ['vit.pooler.dense.bias', 'vit.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Downloading (…)rocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]

2025-06-02 17:24:18,801 - src.models.feature_extractor - INFO - Loaded supervised_vit model on cuda
2025-06-02 17:24:18,802 - __main__ - INFO - Loaded supervised_vit feature extractor


In [6]:
train_loader, val_loader, test_loader = experiment.load_dataset()

100%|██████████| 30648/30648 [01:13<00:00, 418.75it/s]


Using 5.00% of train data: 36777 samples.


100%|██████████| 6567/6567 [00:18<00:00, 363.69it/s]


Using 5.00% of val data: 7880 samples.


100%|██████████| 6569/6569 [00:17<00:00, 372.89it/s]


Using 5.00% of test data: 7882 samples.


### Train the Probes

In [7]:
results = {}
for layer in tqdm(layers):
    logger.info(f"Processing layer {layer}...")

    # Extract features for this layer
    train_dataset, val_dataset, test_dataset = experiment.extract_features_for_layer(
        feature_extractor,
        train_loader,
        val_loader,
        test_loader,
        layer,
        feature_type,
        task_type,
    )

    # Create probing dataloaders
    probe_train_loader, probe_val_loader, probe_test_loader = (
       create_probing_dataloaders(
            train_dataset,
            val_dataset,
            test_dataset,
            batch_size=cfg.probing.get("training", {}).get(
                "batch_size", 64
            ),
            num_workers=cfg.get("num_workers", 4),
        )
    )

    # Run probing experiments for each probe type
    layer_results = {}
    for probe_type in cfg.probing.probe_types:
        logger.info(f"Running {probe_type} probe on layer {layer}...")
        probe_results = experiment.run_probe_experiment(
            probe_type,
            probe_train_loader,
            probe_val_loader,
            probe_test_loader,
            train_dataset.features.shape[1],
            layer,
        )
        layer_results[probe_type] = probe_results

    results[f"layer_{layer}"] = layer_results

  0%|          | 0/6 [00:00<?, ?it/s]2025-06-02 17:26:07,863 - __main__ - INFO - Processing layer 2...
2025-06-02 17:26:07,864 - src.probing.data_preprocessing - INFO - Extracting features from 1149 batches...

Extracting features:   0%|          | 0/1149 [00:00<?, ?it/s]
Extracting features:   0%|          | 1/1149 [00:08<2:34:48,  8.09s/it]
Extracting features:   0%|          | 2/1149 [00:08<1:06:48,  3.49s/it]
Extracting features:   0%|          | 3/1149 [00:08<38:40,  2.02s/it]  
Extracting features:   0%|          | 4/1149 [00:08<25:24,  1.33s/it]
Extracting features:   0%|          | 5/1149 [00:09<18:07,  1.05it/s]
Extracting features:   1%|          | 6/1149 [00:09<13:47,  1.38it/s]
Extracting features:   1%|          | 7/1149 [00:09<11:09,  1.71it/s]
Extracting features:   1%|          | 8/1149 [00:10<09:16,  2.05it/s]
Extracting features:   1%|          | 9/1149 [00:10<07:58,  2.38it/s]
Extracting features:   1%|          | 10/1149 [00:10<07:10,  2.64it/s]
Extracting features:

Epoch 0: train_loss=0.2053, val_loss=0.1545



Training 2/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/30:   1%|          | 1/143 [00:09<22:40,  9.58s/it]
Training 2/30:  10%|▉         | 14/143 [00:09<01:04,  2.01it/s]
Training 2/30:  24%|██▍       | 35/143 [00:09<00:17,  6.28it/s]
Training 2/30:  45%|████▌     | 65/143 [00:09<00:05, 14.64it/s]
Training 2/30:  64%|██████▍   | 92/143 [00:09<00:02, 24.61it/s]
Training 2/30: 100%|██████████| 143/143 [00:10<00:00, 13.59it/s]


Epoch 1: train_loss=0.1487, val_loss=0.1431



Training 3/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/30:   1%|          | 1/143 [00:09<23:08,  9.78s/it]
Training 3/30:  10%|▉         | 14/143 [00:09<01:05,  1.97it/s]
Training 3/30:  28%|██▊       | 40/143 [00:09<00:14,  7.16it/s]
Training 3/30:  45%|████▍     | 64/143 [00:10<00:05, 13.66it/s]
Training 3/30:  64%|██████▎   | 91/143 [00:10<00:02, 23.48it/s]
Training 3/30: 100%|██████████| 143/143 [00:10<00:00, 13.32it/s]


Epoch 2: train_loss=0.1394, val_loss=0.1399



Training 4/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/30:   1%|          | 1/143 [00:09<22:44,  9.61s/it]
Training 4/30:  13%|█▎        | 19/143 [00:09<00:45,  2.74it/s]
Training 4/30:  24%|██▍       | 35/143 [00:09<00:18,  5.96it/s]
Training 4/30:  43%|████▎     | 62/143 [00:09<00:06, 13.46it/s]
Training 4/30:  66%|██████▋   | 95/143 [00:10<00:01, 25.78it/s]
Training 4/30: 100%|██████████| 143/143 [00:10<00:00, 13.54it/s]


Epoch 3: train_loss=0.1337, val_loss=0.1325



Training 5/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/30:   1%|          | 1/143 [00:09<22:40,  9.58s/it]
Training 5/30:  10%|▉         | 14/143 [00:09<01:04,  2.01it/s]
Training 5/30:  22%|██▏       | 32/143 [00:09<00:19,  5.67it/s]
Training 5/30:  34%|███▍      | 49/143 [00:09<00:09, 10.36it/s]
Training 5/30:  56%|█████▌    | 80/143 [00:09<00:02, 22.05it/s]
Training 5/30: 100%|██████████| 143/143 [00:10<00:00, 13.52it/s]


Epoch 4: train_loss=0.1306, val_loss=0.1289



Training 6/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/30:   1%|          | 1/143 [00:09<22:30,  9.51s/it]
Training 6/30:  11%|█         | 16/143 [00:09<00:54,  2.32it/s]
Training 6/30:  25%|██▌       | 36/143 [00:09<00:16,  6.41it/s]
Training 6/30:  45%|████▌     | 65/143 [00:09<00:05, 14.55it/s]
Training 6/30:  66%|██████▋   | 95/143 [00:09<00:01, 25.77it/s]
Training 6/30: 100%|██████████| 143/143 [00:10<00:00, 13.70it/s]


Epoch 5: train_loss=0.1284, val_loss=0.1267



Training 7/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/30:   1%|          | 1/143 [00:08<21:00,  8.88s/it]
Training 7/30:  14%|█▍        | 20/143 [00:08<00:39,  3.12it/s]
Training 7/30:  33%|███▎      | 47/143 [00:09<00:10,  9.01it/s]
Training 7/30:  50%|████▉     | 71/143 [00:09<00:04, 16.07it/s]
Training 7/30: 100%|██████████| 143/143 [00:09<00:00, 14.62it/s]


Epoch 6: train_loss=0.1266, val_loss=0.1252



Training 8/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/30:   1%|          | 1/143 [00:09<21:41,  9.17s/it]
Training 8/30:  11%|█         | 16/143 [00:09<00:52,  2.41it/s]
Training 8/30:  23%|██▎       | 33/143 [00:09<00:18,  5.99it/s]
Training 8/30:  39%|███▉      | 56/143 [00:09<00:06, 12.64it/s]
Training 8/30:  58%|█████▊    | 83/143 [00:09<00:02, 23.07it/s]
Training 8/30: 100%|██████████| 143/143 [00:10<00:00, 14.06it/s]


Epoch 7: train_loss=0.1255, val_loss=0.1254



Training 9/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/30:   1%|          | 1/143 [00:09<22:42,  9.60s/it]
Training 9/30:  11%|█         | 16/143 [00:09<00:55,  2.30it/s]
Training 9/30:  24%|██▍       | 35/143 [00:09<00:17,  6.15it/s]
Training 9/30:  43%|████▎     | 61/143 [00:09<00:06, 13.38it/s]
Training 9/30:  60%|██████    | 86/143 [00:10<00:02, 22.62it/s]
Training 9/30: 100%|██████████| 143/143 [00:10<00:00, 13.56it/s]


Epoch 8: train_loss=0.1256, val_loss=0.1242



Training 10/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/30:   1%|          | 1/143 [00:09<22:00,  9.30s/it]
Training 10/30:  10%|█         | 15/143 [00:09<00:57,  2.22it/s]
Training 10/30:  26%|██▌       | 37/143 [00:09<00:15,  6.83it/s]
Training 10/30:  41%|████▏     | 59/143 [00:09<00:06, 13.04it/s]
Training 10/30:  58%|█████▊    | 83/143 [00:09<00:02, 22.11it/s]
Training 10/30: 100%|██████████| 143/143 [00:10<00:00, 13.93it/s]


Epoch 9: train_loss=0.1238, val_loss=0.1225



Training 11/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/30:   1%|          | 1/143 [00:09<21:48,  9.21s/it]
Training 11/30:  12%|█▏        | 17/143 [00:09<00:49,  2.55it/s]
Training 11/30:  26%|██▌       | 37/143 [00:09<00:15,  6.76it/s]
Training 11/30:  48%|████▊     | 68/143 [00:09<00:04, 15.72it/s]
Training 11/30:  66%|██████▋   | 95/143 [00:09<00:01, 26.02it/s]
Training 11/30: 100%|██████████| 143/143 [00:10<00:00, 14.11it/s]


Epoch 10: train_loss=0.1227, val_loss=0.1221



Training 12/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/30:   1%|          | 1/143 [00:09<21:30,  9.09s/it]
Training 12/30:  10%|█         | 15/143 [00:09<00:56,  2.27it/s]
Training 12/30:  27%|██▋       | 39/143 [00:09<00:14,  7.41it/s]
Training 12/30:  42%|████▏     | 60/143 [00:09<00:06, 13.47it/s]
Training 12/30:  62%|██████▏   | 89/143 [00:09<00:02, 24.77it/s]
Training 12/30: 100%|██████████| 143/143 [00:10<00:00, 14.21it/s]


Epoch 11: train_loss=0.1221, val_loss=0.1215



Training 13/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/30:   1%|          | 1/143 [00:09<21:19,  9.01s/it]
Training 13/30:  17%|█▋        | 24/143 [00:09<00:32,  3.70it/s]
Training 13/30:  28%|██▊       | 40/143 [00:09<00:14,  7.10it/s]
Training 13/30:  41%|████      | 58/143 [00:09<00:06, 12.31it/s]
Training 13/30:  57%|█████▋    | 81/143 [00:09<00:02, 21.31it/s]
Training 13/30: 100%|██████████| 143/143 [00:09<00:00, 14.31it/s]


Epoch 12: train_loss=0.1220, val_loss=0.1221



Training 14/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/30:   1%|          | 1/143 [00:08<21:12,  8.96s/it]
Training 14/30:   8%|▊         | 12/143 [00:09<01:11,  1.84it/s]
Training 14/30:  24%|██▍       | 34/143 [00:09<00:16,  6.62it/s]
Training 14/30:  40%|███▉      | 57/143 [00:09<00:06, 13.40it/s]
Training 14/30:  62%|██████▏   | 89/143 [00:09<00:02, 26.12it/s]
Training 14/30: 100%|██████████| 143/143 [00:09<00:00, 14.41it/s]


Epoch 13: train_loss=0.1212, val_loss=0.1229



Training 15/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/30:   1%|          | 1/143 [00:09<21:42,  9.17s/it]
Training 15/30:   9%|▉         | 13/143 [00:09<01:06,  1.95it/s]
Training 15/30:  22%|██▏       | 31/143 [00:09<00:19,  5.76it/s]
Training 15/30:  40%|███▉      | 57/143 [00:09<00:06, 13.31it/s]
Training 15/30:  57%|█████▋    | 82/143 [00:09<00:02, 22.93it/s]
Training 15/30: 100%|██████████| 143/143 [00:10<00:00, 14.05it/s]


Epoch 14: train_loss=0.1219, val_loss=0.1246



Training 16/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/30:   1%|          | 1/143 [00:09<22:09,  9.36s/it]
Training 16/30:  10%|█         | 15/143 [00:09<00:57,  2.21it/s]
Training 16/30:  27%|██▋       | 39/143 [00:09<00:14,  7.20it/s]
Training 16/30:  43%|████▎     | 61/143 [00:09<00:06, 13.39it/s]
Training 16/30:  64%|██████▍   | 92/143 [00:09<00:02, 25.19it/s]
Training 16/30: 100%|██████████| 143/143 [00:10<00:00, 13.86it/s]


Epoch 15: train_loss=0.1219, val_loss=0.1200



Training 17/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/30:   1%|          | 1/143 [00:09<21:20,  9.02s/it]
Training 17/30:  13%|█▎        | 19/143 [00:09<00:42,  2.92it/s]
Training 17/30:  27%|██▋       | 39/143 [00:09<00:14,  7.21it/s]
Training 17/30:  45%|████▍     | 64/143 [00:09<00:05, 14.53it/s]
Training 17/30:  61%|██████    | 87/143 [00:09<00:02, 23.41it/s]
Training 17/30: 100%|██████████| 143/143 [00:09<00:00, 14.36it/s]


Epoch 16: train_loss=0.1211, val_loss=0.1224



Training 18/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/30:   1%|          | 1/143 [00:09<21:24,  9.05s/it]
Training 18/30:  12%|█▏        | 17/143 [00:09<00:48,  2.60it/s]
Training 18/30:  24%|██▍       | 34/143 [00:09<00:17,  6.22it/s]
Training 18/30:  37%|███▋      | 53/143 [00:09<00:07, 11.74it/s]
Training 18/30:  59%|█████▉    | 85/143 [00:09<00:02, 24.38it/s]
Training 18/30: 100%|██████████| 143/143 [00:10<00:00, 14.26it/s]


Epoch 17: train_loss=0.1201, val_loss=0.1262



Training 19/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/30:   1%|          | 1/143 [00:09<22:09,  9.37s/it]
Training 19/30:  10%|█         | 15/143 [00:09<00:57,  2.21it/s]
Training 19/30:  21%|██        | 30/143 [00:09<00:21,  5.31it/s]
Training 19/30:  38%|███▊      | 55/143 [00:09<00:07, 12.42it/s]
Training 19/30:  58%|█████▊    | 83/143 [00:09<00:02, 23.07it/s]
Training 19/30: 100%|██████████| 143/143 [00:10<00:00, 13.84it/s]


Epoch 18: train_loss=0.1199, val_loss=0.1206



Training 20/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/30:   1%|          | 1/143 [00:09<21:41,  9.17s/it]
Training 20/30:  10%|█         | 15/143 [00:09<00:56,  2.25it/s]
Training 20/30:  27%|██▋       | 38/143 [00:09<00:14,  7.13it/s]
Training 20/30:  43%|████▎     | 62/143 [00:09<00:05, 14.05it/s]
Training 20/30:  66%|██████▌   | 94/143 [00:09<00:01, 26.48it/s]
Training 20/30: 100%|██████████| 143/143 [00:10<00:00, 14.05it/s]


Epoch 19: train_loss=0.1204, val_loss=0.1188



Training 21/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/30:   1%|          | 1/143 [00:09<21:26,  9.06s/it]
Training 21/30:  14%|█▍        | 20/143 [00:09<00:40,  3.06it/s]
Training 21/30:  25%|██▌       | 36/143 [00:09<00:16,  6.45it/s]
Training 21/30:  37%|███▋      | 53/143 [00:09<00:07, 11.36it/s]
Training 21/30:  57%|█████▋    | 81/143 [00:09<00:02, 22.39it/s]
Training 21/30: 100%|██████████| 143/143 [00:10<00:00, 14.21it/s]


Epoch 20: train_loss=0.1199, val_loss=0.1219



Training 22/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/30:   1%|          | 1/143 [00:09<21:22,  9.03s/it]
Training 22/30:  10%|█         | 15/143 [00:09<00:55,  2.29it/s]
Training 22/30:  23%|██▎       | 33/143 [00:09<00:17,  6.14it/s]
Training 22/30:  32%|███▏      | 46/143 [00:09<00:09,  9.85it/s]
Training 22/30:  52%|█████▏    | 74/143 [00:09<00:03, 20.95it/s]
Training 22/30:  72%|███████▏  | 103/143 [00:09<00:01, 35.85it/s]
Training 22/30: 100%|██████████| 143/143 [00:10<00:00, 14.21it/s]


Epoch 21: train_loss=0.1196, val_loss=0.1194



Training 23/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/30:   1%|          | 1/143 [00:09<21:46,  9.20s/it]
Training 23/30:   9%|▉         | 13/143 [00:09<01:06,  1.94it/s]
Training 23/30:  23%|██▎       | 33/143 [00:09<00:17,  6.17it/s]
Training 23/30:  40%|███▉      | 57/143 [00:09<00:06, 13.07it/s]
Training 23/30:  59%|█████▉    | 85/143 [00:09<00:02, 23.88it/s]
Training 23/30: 100%|██████████| 143/143 [00:10<00:00, 14.07it/s]


Epoch 22: train_loss=0.1195, val_loss=0.1180



Training 24/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/30:   1%|          | 1/143 [00:09<21:28,  9.07s/it]
Training 24/30:  11%|█         | 16/143 [00:09<00:52,  2.43it/s]
Training 24/30:  22%|██▏       | 32/143 [00:09<00:19,  5.84it/s]
Training 24/30:  41%|████      | 58/143 [00:09<00:06, 13.45it/s]
Training 24/30:  59%|█████▊    | 84/143 [00:09<00:02, 23.57it/s]
Training 24/30: 100%|██████████| 143/143 [00:10<00:00, 14.24it/s]


Epoch 23: train_loss=0.1193, val_loss=0.1183



Training 25/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/30:   1%|          | 1/143 [00:09<21:46,  9.20s/it]
Training 25/30:  12%|█▏        | 17/143 [00:09<00:49,  2.55it/s]
Training 25/30:  22%|██▏       | 32/143 [00:09<00:19,  5.70it/s]
Training 25/30:  36%|███▋      | 52/143 [00:09<00:07, 11.44it/s]
Training 25/30:  59%|█████▉    | 85/143 [00:09<00:02, 24.35it/s]
Training 25/30: 100%|██████████| 143/143 [00:10<00:00, 14.06it/s]


Epoch 24: train_loss=0.1185, val_loss=0.1185



Training 26/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/30:   1%|          | 1/143 [00:08<21:17,  9.00s/it]
Training 26/30:   8%|▊         | 12/143 [00:09<01:11,  1.83it/s]
Training 26/30:  22%|██▏       | 32/143 [00:09<00:18,  6.15it/s]
Training 26/30:  37%|███▋      | 53/143 [00:09<00:07, 12.32it/s]
Training 26/30:  60%|██████    | 86/143 [00:09<00:02, 25.45it/s]
Training 26/30: 100%|██████████| 143/143 [00:09<00:00, 14.36it/s]


Epoch 25: train_loss=0.1197, val_loss=0.1176



Training 27/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/30:   1%|          | 1/143 [00:09<22:26,  9.48s/it]
Training 27/30:  13%|█▎        | 18/143 [00:09<00:47,  2.63it/s]
Training 27/30:  29%|██▉       | 42/143 [00:09<00:13,  7.55it/s]
Training 27/30:  44%|████▍     | 63/143 [00:09<00:05, 13.37it/s]
Training 27/30:  63%|██████▎   | 90/143 [00:09<00:02, 23.49it/s]
Training 27/30: 100%|██████████| 143/143 [00:10<00:00, 13.68it/s]


Epoch 26: train_loss=0.1180, val_loss=0.1190



Training 28/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/30:   1%|          | 1/143 [00:08<21:11,  8.96s/it]
Training 28/30:  15%|█▌        | 22/143 [00:09<00:35,  3.41it/s]
Training 28/30:  28%|██▊       | 40/143 [00:09<00:14,  7.28it/s]
Training 28/30:  41%|████      | 58/143 [00:09<00:06, 12.51it/s]
Training 28/30:  59%|█████▊    | 84/143 [00:09<00:02, 22.77it/s]
Training 28/30: 100%|██████████| 143/143 [00:09<00:00, 14.36it/s]


Epoch 27: train_loss=0.1180, val_loss=0.1175



Training 29/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/30:   1%|          | 1/143 [00:08<21:15,  8.98s/it]
Training 29/30:  11%|█         | 16/143 [00:09<00:51,  2.46it/s]
Training 29/30:  25%|██▌       | 36/143 [00:09<00:15,  6.77it/s]
Training 29/30:  38%|███▊      | 55/143 [00:09<00:07, 12.31it/s]
Training 29/30:  57%|█████▋    | 81/143 [00:09<00:02, 22.53it/s]
Training 29/30:  75%|███████▍  | 107/143 [00:09<00:01, 35.75it/s]
Training 29/30: 100%|██████████| 143/143 [00:09<00:00, 14.31it/s]


Epoch 28: train_loss=0.1185, val_loss=0.1204



Training 30/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/30:   1%|          | 1/143 [00:08<21:03,  8.90s/it]
Training 30/30:   8%|▊         | 12/143 [00:09<01:10,  1.85it/s]
Training 30/30:  24%|██▍       | 34/143 [00:09<00:16,  6.65it/s]
Training 30/30:  41%|████▏     | 59/143 [00:09<00:05, 14.06it/s]
Training 30/30:  60%|██████    | 86/143 [00:09<00:02, 24.73it/s]
Training 30/30: 100%|██████████| 143/143 [00:09<00:00, 14.50it/s]


Epoch 29: train_loss=0.1179, val_loss=0.1176


2025-06-02 17:44:08,342 - __main__ - INFO - Saved linear probe for layer 2 to cache\probes\phase1_supervisedVIT_viewpoint_probing\linear_layer_2_probe.pth
2025-06-02 17:44:27,631 - __main__ - INFO - Running mlp probe on layer 2...
2025-06-02 17:44:27,631 - __main__ - INFO - Running mlp probe on layer 2 (feature_dim: 768)

Training 1/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 1/40:   1%|          | 1/143 [00:09<21:38,  9.14s/it]
Training 1/40:   6%|▌         | 8/143 [00:09<01:54,  1.18it/s]
Training 1/40:  13%|█▎        | 19/143 [00:09<00:35,  3.51it/s]
Training 1/40:  44%|████▍     | 63/143 [00:09<00:04, 16.48it/s]
Training 1/40: 100%|██████████| 143/143 [00:09<00:00, 14.36it/s]


Epoch 0: train_loss=0.3104, val_loss=0.1532



Training 2/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/40:   1%|          | 1/143 [00:09<21:39,  9.15s/it]
Training 2/40:   7%|▋         | 10/143 [00:09<01:29,  1.49it/s]
Training 2/40:  20%|██        | 29/143 [00:09<00:20,  5.53it/s]
Training 2/40: 100%|██████████| 143/143 [00:09<00:00, 14.45it/s]


Epoch 1: train_loss=0.1427, val_loss=0.1354



Training 3/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/40:   1%|          | 1/143 [00:08<21:08,  8.93s/it]
Training 3/40:   6%|▌         | 8/143 [00:09<01:51,  1.21it/s]
Training 3/40:  13%|█▎        | 18/143 [00:09<00:36,  3.38it/s]
Training 3/40:  24%|██▍       | 35/143 [00:09<00:12,  8.45it/s]
Training 3/40:  36%|███▌      | 51/143 [00:09<00:06, 14.74it/s]
Training 3/40:  49%|████▉     | 70/143 [00:09<00:02, 24.56it/s]
Training 3/40: 100%|██████████| 143/143 [00:09<00:00, 14.34it/s]


Epoch 2: train_loss=0.1295, val_loss=0.1229



Training 4/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/40:   1%|          | 1/143 [00:08<21:06,  8.92s/it]
Training 4/40:   8%|▊         | 12/143 [00:09<01:11,  1.84it/s]
Training 4/40:  15%|█▌        | 22/143 [00:09<00:30,  4.00it/s]
Training 4/40:  24%|██▍       | 35/143 [00:09<00:13,  7.82it/s]
Training 4/40: 100%|██████████| 143/143 [00:09<00:00, 14.67it/s]


Epoch 3: train_loss=0.1220, val_loss=0.1278



Training 5/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/40:   1%|          | 1/143 [00:08<21:17,  9.00s/it]
Training 5/40:   8%|▊         | 12/143 [00:09<01:11,  1.83it/s]
Training 5/40:  13%|█▎        | 19/143 [00:09<00:37,  3.32it/s]
Training 5/40: 100%|██████████| 143/143 [00:09<00:00, 14.51it/s]


Epoch 4: train_loss=0.1175, val_loss=0.1152



Training 6/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/40:   1%|          | 1/143 [00:08<21:04,  8.91s/it]
Training 6/40:   5%|▍         | 7/143 [00:09<02:08,  1.06it/s]
Training 6/40:  14%|█▍        | 20/143 [00:09<00:31,  3.90it/s]
Training 6/40:  22%|██▏       | 32/143 [00:09<00:14,  7.42it/s]
Training 6/40: 100%|██████████| 143/143 [00:09<00:00, 14.64it/s]


Epoch 5: train_loss=0.1149, val_loss=0.1133



Training 7/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/40:   1%|          | 1/143 [00:08<21:09,  8.94s/it]
Training 7/40:   6%|▌         | 8/143 [00:09<01:51,  1.21it/s]
Training 7/40:  14%|█▍        | 20/143 [00:09<00:32,  3.82it/s]
Training 7/40:  22%|██▏       | 31/143 [00:09<00:15,  7.05it/s]
Training 7/40: 100%|██████████| 143/143 [00:09<00:00, 14.61it/s]


Epoch 6: train_loss=0.1114, val_loss=0.1081



Training 8/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/40:   1%|          | 1/143 [00:09<21:29,  9.08s/it]
Training 8/40:   6%|▌         | 8/143 [00:09<01:53,  1.19it/s]
Training 8/40:  14%|█▍        | 20/143 [00:09<00:32,  3.75it/s]
Training 8/40:  21%|██        | 30/143 [00:09<00:17,  6.63it/s]
Training 8/40:  31%|███▏      | 45/143 [00:09<00:07, 12.48it/s]
Training 8/40:  43%|████▎     | 62/143 [00:09<00:03, 21.11it/s]
Training 8/40:  58%|█████▊    | 83/143 [00:09<00:01, 34.80it/s]
Training 8/40:  72%|███████▏  | 103/143 [00:09<00:00, 50.34it/s]
Training 8/40: 100%|██████████| 143/143 [00:10<00:00, 13.77it/s]


Epoch 7: train_loss=0.1100, val_loss=0.1084



Training 9/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/40:   1%|          | 1/143 [00:09<21:33,  9.11s/it]
Training 9/40:   7%|▋         | 10/143 [00:09<01:28,  1.49it/s]
Training 9/40:  13%|█▎        | 19/143 [00:09<00:36,  3.40it/s]
Training 9/40: 100%|██████████| 143/143 [00:09<00:00, 14.46it/s]


Epoch 8: train_loss=0.1085, val_loss=0.1114



Training 10/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/40:   1%|          | 1/143 [00:09<21:17,  9.00s/it]
Training 10/40:   6%|▌         | 8/143 [00:09<01:52,  1.20it/s]
Training 10/40:  13%|█▎        | 18/143 [00:09<00:37,  3.36it/s]
Training 10/40: 100%|██████████| 143/143 [00:09<00:00, 14.67it/s]


Epoch 9: train_loss=0.1061, val_loss=0.1092



Training 11/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/40:   1%|          | 1/143 [00:09<22:19,  9.43s/it]
Training 11/40:   5%|▍         | 7/143 [00:09<02:16,  1.00s/it]
Training 11/40:  11%|█         | 16/143 [00:09<00:44,  2.85it/s]
Training 11/40:  35%|███▍      | 50/143 [00:09<00:07, 12.60it/s]
Training 11/40: 100%|██████████| 143/143 [00:10<00:00, 13.91it/s]


Epoch 10: train_loss=0.1063, val_loss=0.1103



Training 12/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/40:   1%|          | 1/143 [00:09<22:16,  9.41s/it]
Training 12/40:   6%|▌         | 8/143 [00:09<01:57,  1.15it/s]
Training 12/40:  13%|█▎        | 19/143 [00:09<00:36,  3.42it/s]
Training 12/40: 100%|██████████| 143/143 [00:10<00:00, 14.06it/s]


Epoch 11: train_loss=0.1065, val_loss=0.1049



Training 13/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/40:   1%|          | 1/143 [00:09<22:08,  9.35s/it]
Training 13/40:   8%|▊         | 11/143 [00:09<01:22,  1.61it/s]
Training 13/40:  13%|█▎        | 19/143 [00:09<00:38,  3.25it/s]
Training 13/40:  20%|█▉        | 28/143 [00:09<00:19,  5.77it/s]
Training 13/40: 100%|██████████| 143/143 [00:10<00:00, 13.99it/s]


Epoch 12: train_loss=0.1029, val_loss=0.1023



Training 14/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/40:   1%|          | 1/143 [00:10<24:47, 10.48s/it]
Training 14/40:   8%|▊         | 11/143 [00:10<01:31,  1.44it/s]
Training 14/40:  15%|█▍        | 21/143 [00:10<00:37,  3.28it/s]
Training 14/40:  23%|██▎       | 33/143 [00:10<00:17,  6.30it/s]
Training 14/40:  35%|███▍      | 50/143 [00:10<00:07, 12.16it/s]
Training 14/40:  48%|████▊     | 68/143 [00:11<00:03, 20.33it/s]
Training 14/40:  63%|██████▎   | 90/143 [00:11<00:01, 33.25it/s]
Training 14/40: 100%|██████████| 143/143 [00:11<00:00, 12.25it/s]


Epoch 13: train_loss=0.1012, val_loss=0.1083



Training 15/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/40:   1%|          | 1/143 [00:09<22:43,  9.60s/it]
Training 15/40:   6%|▌         | 8/143 [00:09<01:59,  1.13it/s]
Training 15/40:  15%|█▍        | 21/143 [00:09<00:32,  3.77it/s]
Training 15/40:  23%|██▎       | 33/143 [00:09<00:15,  7.07it/s]
Training 15/40: 100%|██████████| 143/143 [00:10<00:00, 13.71it/s]


Epoch 14: train_loss=0.1010, val_loss=0.1005



Training 16/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/40:   1%|          | 1/143 [00:09<22:14,  9.40s/it]
Training 16/40:   5%|▍         | 7/143 [00:09<02:15,  1.00it/s]
Training 16/40:  13%|█▎        | 19/143 [00:09<00:35,  3.48it/s]
Training 16/40:  22%|██▏       | 31/143 [00:09<00:16,  6.86it/s]
Training 16/40:  34%|███▍      | 49/143 [00:09<00:06, 13.71it/s]
Training 16/40:  48%|████▊     | 69/143 [00:09<00:03, 23.65it/s]
Training 16/40:  64%|██████▎   | 91/143 [00:10<00:01, 37.54it/s]
Training 16/40: 100%|██████████| 143/143 [00:10<00:00, 13.53it/s]


Epoch 15: train_loss=0.1005, val_loss=0.0998



Training 17/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/40:   1%|          | 1/143 [00:09<22:55,  9.69s/it]
Training 17/40:   4%|▍         | 6/143 [00:09<02:45,  1.21s/it]
Training 17/40:  13%|█▎        | 18/143 [00:09<00:38,  3.24it/s]
Training 17/40:  21%|██        | 30/143 [00:10<00:17,  6.52it/s]
Training 17/40: 100%|██████████| 143/143 [00:10<00:00, 13.58it/s]


Epoch 16: train_loss=0.1004, val_loss=0.1009



Training 18/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/40:   1%|          | 1/143 [00:09<23:23,  9.89s/it]
Training 18/40:   6%|▌         | 8/143 [00:09<02:03,  1.10it/s]
Training 18/40:  11%|█         | 16/143 [00:10<00:47,  2.67it/s]
Training 18/40: 100%|██████████| 143/143 [00:10<00:00, 13.42it/s]


Epoch 17: train_loss=0.0987, val_loss=0.0989



Training 19/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/40:   1%|          | 1/143 [00:10<23:40, 10.00s/it]
Training 19/40:   5%|▍         | 7/143 [00:10<02:24,  1.06s/it]
Training 19/40:  10%|█         | 15/143 [00:10<00:51,  2.49it/s]
Training 19/40: 100%|██████████| 143/143 [00:10<00:00, 13.27it/s]


Epoch 18: train_loss=0.0966, val_loss=0.1019



Training 20/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/40:   1%|          | 1/143 [00:10<24:03, 10.16s/it]
Training 20/40:   8%|▊         | 12/143 [00:10<01:20,  1.62it/s]
Training 20/40:  16%|█▌        | 23/143 [00:10<00:32,  3.69it/s]
Training 20/40: 100%|██████████| 143/143 [00:10<00:00, 13.07it/s]


Epoch 19: train_loss=0.0964, val_loss=0.0993



Training 21/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/40:   1%|          | 1/143 [00:09<23:32,  9.95s/it]
Training 21/40:   6%|▌         | 8/143 [00:10<02:03,  1.09it/s]
Training 21/40:  12%|█▏        | 17/143 [00:10<00:44,  2.85it/s]
Training 21/40: 100%|██████████| 143/143 [00:10<00:00, 13.32it/s]


Epoch 20: train_loss=0.0963, val_loss=0.1004



Training 22/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/40:   1%|          | 1/143 [00:08<20:47,  8.78s/it]
Training 22/40:   6%|▌         | 8/143 [00:08<01:49,  1.23it/s]
Training 22/40:  13%|█▎        | 19/143 [00:08<00:33,  3.65it/s]
Training 22/40:  26%|██▌       | 37/143 [00:09<00:11,  9.09it/s]
Training 22/40: 100%|██████████| 143/143 [00:09<00:00, 14.87it/s]


Epoch 21: train_loss=0.0961, val_loss=0.0987



Training 23/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/40:   1%|          | 1/143 [00:09<21:48,  9.22s/it]
Training 23/40:   6%|▋         | 9/143 [00:09<01:41,  1.33it/s]
Training 23/40:  13%|█▎        | 19/143 [00:09<00:36,  3.42it/s]
Training 23/40:  22%|██▏       | 32/143 [00:09<00:15,  7.15it/s]
Training 23/40:  34%|███▍      | 49/143 [00:09<00:06, 13.72it/s]
Training 23/40:  49%|████▉     | 70/143 [00:09<00:02, 24.38it/s]
Training 23/40:  66%|██████▌   | 94/143 [00:09<00:01, 39.86it/s]
Training 23/40: 100%|██████████| 143/143 [00:10<00:00, 13.78it/s]


Epoch 22: train_loss=0.0947, val_loss=0.0975



Training 24/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/40:   1%|          | 1/143 [00:09<22:28,  9.50s/it]
Training 24/40:   6%|▋         | 9/143 [00:09<01:44,  1.29it/s]
Training 24/40:  13%|█▎        | 18/143 [00:09<00:40,  3.12it/s]
Training 24/40:  22%|██▏       | 31/143 [00:09<00:16,  6.76it/s]
Training 24/40:  33%|███▎      | 47/143 [00:09<00:07, 12.78it/s]
Training 24/40:  45%|████▌     | 65/143 [00:10<00:03, 21.63it/s]
Training 24/40:  66%|██████▋   | 95/143 [00:10<00:01, 41.17it/s]
Training 24/40: 100%|██████████| 143/143 [00:10<00:00, 13.43it/s]


Epoch 23: train_loss=0.0963, val_loss=0.0980



Training 25/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/40:   1%|          | 1/143 [00:09<22:19,  9.43s/it]
Training 25/40:   8%|▊         | 11/143 [00:09<01:22,  1.60it/s]
Training 25/40:  13%|█▎        | 19/143 [00:09<00:38,  3.22it/s]
Training 25/40:  22%|██▏       | 32/143 [00:09<00:16,  6.87it/s]
Training 25/40:  33%|███▎      | 47/143 [00:09<00:07, 12.52it/s]
Training 25/40:  47%|████▋     | 67/143 [00:09<00:03, 22.51it/s]
Training 25/40: 100%|██████████| 143/143 [00:10<00:00, 13.64it/s]


Epoch 24: train_loss=0.0955, val_loss=0.0971



Training 26/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/40:   1%|          | 1/143 [00:09<22:59,  9.71s/it]
Training 26/40:   6%|▌         | 8/143 [00:09<02:01,  1.11it/s]
Training 26/40:  11%|█         | 16/143 [00:09<00:46,  2.71it/s]
Training 26/40:  36%|███▋      | 52/143 [00:10<00:07, 12.74it/s]
Training 26/40: 100%|██████████| 143/143 [00:10<00:00, 13.56it/s]


Epoch 25: train_loss=0.0947, val_loss=0.0962



Training 27/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/40:   1%|          | 1/143 [00:09<23:05,  9.76s/it]
Training 27/40:   7%|▋         | 10/143 [00:09<01:35,  1.40it/s]
Training 27/40:  14%|█▍        | 20/143 [00:09<00:36,  3.39it/s]
Training 27/40:  22%|██▏       | 31/143 [00:10<00:17,  6.37it/s]
Training 27/40:  35%|███▍      | 50/143 [00:10<00:06, 13.43it/s]
Training 27/40:  48%|████▊     | 69/143 [00:10<00:03, 22.58it/s]
Training 27/40:  63%|██████▎   | 90/143 [00:10<00:01, 35.41it/s]
Training 27/40: 100%|██████████| 143/143 [00:10<00:00, 13.08it/s]


Epoch 26: train_loss=0.0932, val_loss=0.0958



Training 28/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/40:   1%|          | 1/143 [00:09<22:02,  9.31s/it]
Training 28/40:   5%|▍         | 7/143 [00:09<02:14,  1.01it/s]
Training 28/40:  12%|█▏        | 17/143 [00:09<00:40,  3.10it/s]
Training 28/40:  20%|██        | 29/143 [00:09<00:17,  6.52it/s]
Training 28/40:  32%|███▏      | 46/143 [00:09<00:07, 13.07it/s]
Training 28/40:  46%|████▌     | 66/143 [00:09<00:03, 23.15it/s]
Training 28/40:  61%|██████    | 87/143 [00:09<00:01, 36.40it/s]
Training 28/40: 100%|██████████| 143/143 [00:10<00:00, 13.65it/s]


Epoch 27: train_loss=0.0951, val_loss=0.0979



Training 29/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/40:   1%|          | 1/143 [00:09<21:24,  9.05s/it]
Training 29/40:   7%|▋         | 10/143 [00:09<01:28,  1.50it/s]
Training 29/40:  12%|█▏        | 17/143 [00:09<00:42,  2.98it/s]
Training 29/40:  20%|██        | 29/143 [00:09<00:17,  6.48it/s]
Training 29/40: 100%|██████████| 143/143 [00:09<00:00, 14.43it/s]


Epoch 28: train_loss=0.0948, val_loss=0.0987



Training 30/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/40:   1%|          | 1/143 [00:09<21:43,  9.18s/it]
Training 30/40:   8%|▊         | 11/143 [00:09<01:20,  1.64it/s]
Training 30/40:  14%|█▍        | 20/143 [00:09<00:34,  3.52it/s]
Training 30/40:  22%|██▏       | 31/143 [00:09<00:16,  6.67it/s]
Training 30/40: 100%|██████████| 143/143 [00:10<00:00, 14.25it/s]


Epoch 29: train_loss=0.0922, val_loss=0.0985



Training 31/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 31/40:   1%|          | 1/143 [00:09<22:17,  9.42s/it]
Training 31/40:   7%|▋         | 10/143 [00:09<01:31,  1.45it/s]
Training 31/40:  13%|█▎        | 18/143 [00:09<00:40,  3.08it/s]
Training 31/40:  20%|█▉        | 28/143 [00:09<00:19,  5.88it/s]
Training 31/40: 100%|██████████| 143/143 [00:10<00:00, 13.93it/s]


Epoch 30: train_loss=0.0935, val_loss=0.0945



Training 32/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 32/40:   1%|          | 1/143 [00:09<23:20,  9.87s/it]
Training 32/40:   6%|▋         | 9/143 [00:09<01:48,  1.24it/s]
Training 32/40:  12%|█▏        | 17/143 [00:10<00:44,  2.81it/s]
Training 32/40:  21%|██        | 30/143 [00:10<00:17,  6.32it/s]
Training 32/40:  31%|███▏      | 45/143 [00:10<00:08, 11.77it/s]
Training 32/40:  48%|████▊     | 68/143 [00:10<00:03, 22.95it/s]
Training 32/40:  63%|██████▎   | 90/143 [00:10<00:01, 36.39it/s]
Training 32/40: 100%|██████████| 143/143 [00:11<00:00, 12.95it/s]


Epoch 31: train_loss=0.0914, val_loss=0.1082



Training 33/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 33/40:   1%|          | 1/143 [00:09<21:23,  9.04s/it]
Training 33/40:   6%|▌         | 8/143 [00:09<01:52,  1.20it/s]
Training 33/40:  12%|█▏        | 17/143 [00:09<00:40,  3.12it/s]
Training 33/40: 100%|██████████| 143/143 [00:09<00:00, 14.59it/s]


Epoch 32: train_loss=0.0927, val_loss=0.0972



Training 34/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 34/40:   1%|          | 1/143 [00:09<22:15,  9.41s/it]
Training 34/40:   5%|▍         | 7/143 [00:09<02:15,  1.00it/s]
Training 34/40:  10%|▉         | 14/143 [00:09<00:52,  2.44it/s]
Training 34/40:  38%|███▊      | 55/143 [00:09<00:06, 14.23it/s]
Training 34/40: 100%|██████████| 143/143 [00:10<00:00, 13.99it/s]


Epoch 33: train_loss=0.0913, val_loss=0.1030



Training 35/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 35/40:   1%|          | 1/143 [00:09<22:12,  9.38s/it]
Training 35/40:   5%|▍         | 7/143 [00:09<02:15,  1.00it/s]
Training 35/40:  12%|█▏        | 17/143 [00:09<00:40,  3.08it/s]
Training 35/40:  20%|██        | 29/143 [00:09<00:17,  6.48it/s]
Training 35/40:  32%|███▏      | 46/143 [00:09<00:07, 12.99it/s]
Training 35/40:  48%|████▊     | 69/143 [00:09<00:02, 24.68it/s]
Training 35/40:  63%|██████▎   | 90/143 [00:09<00:01, 37.79it/s]
Training 35/40: 100%|██████████| 143/143 [00:10<00:00, 13.58it/s]


Epoch 34: train_loss=0.0934, val_loss=0.0963



Training 36/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 36/40:   1%|          | 1/143 [00:09<21:33,  9.11s/it]
Training 36/40:   5%|▍         | 7/143 [00:09<02:11,  1.03it/s]
Training 36/40:  12%|█▏        | 17/143 [00:09<00:39,  3.17it/s]
Training 36/40:  34%|███▍      | 49/143 [00:09<00:07, 12.63it/s]
Training 36/40: 100%|██████████| 143/143 [00:09<00:00, 14.42it/s]


Epoch 35: train_loss=0.0908, val_loss=0.0964



Training 37/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 37/40:   1%|          | 1/143 [00:09<21:37,  9.14s/it]
Training 37/40:   5%|▍         | 7/143 [00:09<02:12,  1.03it/s]
Training 37/40:  11%|█         | 16/143 [00:09<00:43,  2.94it/s]
Training 37/40:  43%|████▎     | 62/143 [00:09<00:04, 16.53it/s]
Training 37/40: 100%|██████████| 143/143 [00:09<00:00, 14.39it/s]


Epoch 36: train_loss=0.0898, val_loss=0.0956



Training 38/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 38/40:   1%|          | 1/143 [00:09<21:20,  9.02s/it]
Training 38/40:   5%|▍         | 7/143 [00:09<02:10,  1.04it/s]
Training 38/40:  12%|█▏        | 17/143 [00:09<00:39,  3.19it/s]
Training 38/40:  20%|█▉        | 28/143 [00:09<00:17,  6.41it/s]
Training 38/40: 100%|██████████| 143/143 [00:09<00:00, 14.50it/s]


Epoch 37: train_loss=0.0904, val_loss=0.0963



Training 39/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 39/40:   1%|          | 1/143 [00:09<21:41,  9.16s/it]
Training 39/40:   5%|▍         | 7/143 [00:09<02:12,  1.03it/s]
Training 39/40:  10%|█         | 15/143 [00:09<00:47,  2.71it/s]
Training 39/40:  34%|███▍      | 49/143 [00:09<00:07, 12.71it/s]
Training 39/40: 100%|██████████| 143/143 [00:09<00:00, 14.33it/s]


Epoch 38: train_loss=0.0902, val_loss=0.0978



Training 40/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 40/40:   1%|          | 1/143 [00:09<22:02,  9.32s/it]
Training 40/40:   6%|▌         | 8/143 [00:09<01:56,  1.16it/s]
Training 40/40:  13%|█▎        | 19/143 [00:09<00:35,  3.46it/s]
Training 40/40:  22%|██▏       | 31/143 [00:09<00:16,  6.85it/s]
Training 40/40: 100%|██████████| 143/143 [00:10<00:00, 14.04it/s]


Epoch 39: train_loss=0.0901, val_loss=0.0967


2025-06-02 17:57:51,118 - __main__ - INFO - Saved mlp probe for layer 2 to cache\probes\phase1_supervisedVIT_viewpoint_probing\mlp_layer_2_probe.pth
 17%|█▋        | 1/6 [32:02<2:40:13, 1922.64s/it]2025-06-02 17:58:10,509 - __main__ - INFO - Processing layer 4...
2025-06-02 17:58:10,510 - src.probing.data_preprocessing - INFO - Extracting features from 1149 batches...

Extracting features:   0%|          | 0/1149 [00:00<?, ?it/s]
Extracting features:   0%|          | 1/1149 [00:07<2:18:16,  7.23s/it]
Extracting features:   0%|          | 2/1149 [00:07<59:54,  3.13s/it]  
Extracting features:   0%|          | 3/1149 [00:07<34:51,  1.83s/it]
Extracting features:   0%|          | 4/1149 [00:08<23:02,  1.21s/it]
Extracting features:   0%|          | 5/1149 [00:08<16:36,  1.15it/s]
Extracting features:   1%|          | 6/1149 [00:08<12:41,  1.50it/s]
Extracting features:   1%|          | 7/1149 [00:08<10:07,  1.88it/s]
Extracting features:   1%|          | 8/1149 [00:09<08:25,  2.26it/s]
Ex

Epoch 0: train_loss=0.8198, val_loss=0.2239



Training 2/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/30:   1%|          | 1/143 [00:10<24:04, 10.17s/it]
Training 2/30:   8%|▊         | 12/143 [00:10<01:20,  1.62it/s]
Training 2/30:  23%|██▎       | 33/143 [00:10<00:19,  5.66it/s]
Training 2/30:  37%|███▋      | 53/143 [00:10<00:08, 10.87it/s]
Training 2/30:  60%|██████    | 86/143 [00:10<00:02, 22.67it/s]
Training 2/30: 100%|██████████| 143/143 [00:11<00:00, 12.84it/s]


Epoch 1: train_loss=0.2031, val_loss=0.1844



Training 3/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/30:   1%|          | 1/143 [00:10<23:48, 10.06s/it]
Training 3/30:  13%|█▎        | 18/143 [00:10<00:50,  2.48it/s]
Training 3/30:  24%|██▍       | 35/143 [00:10<00:18,  5.75it/s]
Training 3/30:  41%|████▏     | 59/143 [00:10<00:06, 12.12it/s]
Training 3/30:  62%|██████▏   | 88/143 [00:10<00:02, 22.49it/s]
Training 3/30: 100%|██████████| 143/143 [00:11<00:00, 12.98it/s]


Epoch 2: train_loss=0.1758, val_loss=0.1662



Training 4/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/30:   1%|          | 1/143 [00:10<24:02, 10.16s/it]
Training 4/30:   7%|▋         | 10/143 [00:10<01:38,  1.34it/s]
Training 4/30:  20%|█▉        | 28/143 [00:10<00:23,  4.81it/s]
Training 4/30:  34%|███▍      | 49/143 [00:10<00:09, 10.35it/s]
Training 4/30:  49%|████▉     | 70/143 [00:10<00:04, 17.75it/s]
Training 4/30:  69%|██████▉   | 99/143 [00:10<00:01, 31.44it/s]
Training 4/30: 100%|██████████| 143/143 [00:11<00:00, 12.81it/s]


Epoch 3: train_loss=0.1616, val_loss=0.1570



Training 5/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/30:   1%|          | 1/143 [00:09<22:59,  9.71s/it]
Training 5/30:   9%|▉         | 13/143 [00:09<01:10,  1.84it/s]
Training 5/30:  23%|██▎       | 33/143 [00:09<00:18,  5.86it/s]
Training 5/30:  43%|████▎     | 62/143 [00:10<00:05, 13.85it/s]
Training 5/30:  66%|██████▌   | 94/143 [00:10<00:01, 25.65it/s]
Training 5/30: 100%|██████████| 143/143 [00:10<00:00, 13.43it/s]


Epoch 4: train_loss=0.1523, val_loss=0.1497



Training 6/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/30:   1%|          | 1/143 [00:10<25:17, 10.69s/it]
Training 6/30:   9%|▉         | 13/143 [00:10<01:17,  1.67it/s]
Training 6/30:  20%|██        | 29/143 [00:10<00:24,  4.60it/s]
Training 6/30:  37%|███▋      | 53/143 [00:10<00:08, 10.66it/s]
Training 6/30:  55%|█████▌    | 79/143 [00:11<00:03, 19.48it/s]
Training 6/30: 100%|██████████| 143/143 [00:11<00:00, 12.26it/s]


Epoch 5: train_loss=0.1445, val_loss=0.1437



Training 7/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/30:   1%|          | 1/143 [00:08<20:58,  8.87s/it]
Training 7/30:   8%|▊         | 12/143 [00:08<01:10,  1.85it/s]
Training 7/30:  22%|██▏       | 32/143 [00:09<00:17,  6.24it/s]
Training 7/30:  43%|████▎     | 62/143 [00:09<00:05, 15.23it/s]
Training 7/30:  64%|██████▎   | 91/143 [00:09<00:01, 26.71it/s]
Training 7/30: 100%|██████████| 143/143 [00:09<00:00, 14.58it/s]


Epoch 6: train_loss=0.1400, val_loss=0.1369



Training 8/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/30:   1%|          | 1/143 [00:09<22:22,  9.45s/it]
Training 8/30:  14%|█▍        | 20/143 [00:09<00:41,  2.93it/s]
Training 8/30:  28%|██▊       | 40/143 [00:09<00:14,  7.03it/s]
Training 8/30:  41%|████▏     | 59/143 [00:09<00:06, 12.31it/s]
Training 8/30:  57%|█████▋    | 82/143 [00:09<00:02, 20.92it/s]
Training 8/30: 100%|██████████| 143/143 [00:10<00:00, 13.69it/s]


Epoch 7: train_loss=0.1358, val_loss=0.1341



Training 9/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/30:   1%|          | 1/143 [00:09<22:23,  9.46s/it]
Training 9/30:   9%|▉         | 13/143 [00:09<01:08,  1.89it/s]
Training 9/30:  22%|██▏       | 32/143 [00:09<00:19,  5.80it/s]
Training 9/30:  41%|████      | 58/143 [00:09<00:06, 13.12it/s]
Training 9/30:  58%|█████▊    | 83/143 [00:09<00:02, 22.47it/s]
Training 9/30: 100%|██████████| 143/143 [00:10<00:00, 13.72it/s]


Epoch 8: train_loss=0.1329, val_loss=0.1311



Training 10/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/30:   1%|          | 1/143 [00:09<22:19,  9.43s/it]
Training 10/30:   9%|▉         | 13/143 [00:09<01:08,  1.89it/s]
Training 10/30:  22%|██▏       | 31/143 [00:09<00:19,  5.61it/s]
Training 10/30:  33%|███▎      | 47/143 [00:09<00:09, 10.07it/s]
Training 10/30:  51%|█████     | 73/143 [00:09<00:03, 19.99it/s]
Training 10/30:  67%|██████▋   | 96/143 [00:09<00:01, 31.24it/s]
Training 10/30: 100%|██████████| 143/143 [00:10<00:00, 13.68it/s]


Epoch 9: train_loss=0.1313, val_loss=0.1305



Training 11/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/30:   1%|          | 1/143 [00:09<22:23,  9.46s/it]
Training 11/30:  14%|█▍        | 20/143 [00:09<00:41,  2.93it/s]
Training 11/30:  27%|██▋       | 38/143 [00:09<00:15,  6.61it/s]
Training 11/30:  43%|████▎     | 61/143 [00:09<00:06, 13.04it/s]
Training 11/30:  61%|██████    | 87/143 [00:09<00:02, 22.78it/s]
Training 11/30: 100%|██████████| 143/143 [00:10<00:00, 13.71it/s]


Epoch 10: train_loss=0.1267, val_loss=0.1266



Training 12/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/30:   1%|          | 1/143 [00:09<21:32,  9.10s/it]
Training 12/30:  10%|▉         | 14/143 [00:09<01:00,  2.12it/s]
Training 12/30:  22%|██▏       | 32/143 [00:09<00:18,  5.95it/s]
Training 12/30:  43%|████▎     | 61/143 [00:09<00:05, 14.43it/s]
Training 12/30:  62%|██████▏   | 88/143 [00:09<00:02, 24.87it/s]
Training 12/30: 100%|██████████| 143/143 [00:10<00:00, 14.21it/s]


Epoch 11: train_loss=0.1255, val_loss=0.1243



Training 13/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/30:   1%|          | 1/143 [00:09<21:59,  9.29s/it]
Training 13/30:  10%|█         | 15/143 [00:09<00:57,  2.23it/s]
Training 13/30:  23%|██▎       | 33/143 [00:09<00:18,  5.99it/s]
Training 13/30:  41%|████      | 58/143 [00:09<00:06, 13.14it/s]
Training 13/30:  62%|██████▏   | 88/143 [00:09<00:02, 24.66it/s]
Training 13/30: 100%|██████████| 143/143 [00:10<00:00, 13.98it/s]


Epoch 12: train_loss=0.1244, val_loss=0.1302



Training 14/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/30:   1%|          | 1/143 [00:09<21:32,  9.10s/it]
Training 14/30:   9%|▉         | 13/143 [00:09<01:06,  1.96it/s]
Training 14/30:  24%|██▍       | 34/143 [00:09<00:16,  6.45it/s]
Training 14/30:  40%|███▉      | 57/143 [00:09<00:06, 13.13it/s]
Training 14/30:  57%|█████▋    | 81/143 [00:09<00:02, 22.40it/s]
Training 14/30: 100%|██████████| 143/143 [00:10<00:00, 14.15it/s]


Epoch 13: train_loss=0.1250, val_loss=0.1361



Training 15/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/30:   1%|          | 1/143 [00:09<22:03,  9.32s/it]
Training 15/30:  13%|█▎        | 19/143 [00:09<00:43,  2.82it/s]
Training 15/30:  27%|██▋       | 38/143 [00:09<00:15,  6.77it/s]
Training 15/30:  41%|████▏     | 59/143 [00:09<00:06, 12.71it/s]
Training 15/30:  64%|██████▎   | 91/143 [00:09<00:02, 25.02it/s]
Training 15/30: 100%|██████████| 143/143 [00:10<00:00, 13.92it/s]


Epoch 14: train_loss=0.1208, val_loss=0.1231



Training 16/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/30:   1%|          | 1/143 [00:09<21:56,  9.27s/it]
Training 16/30:  13%|█▎        | 18/143 [00:09<00:46,  2.69it/s]
Training 16/30:  27%|██▋       | 39/143 [00:09<00:14,  7.08it/s]
Training 16/30:  41%|████▏     | 59/143 [00:09<00:06, 12.74it/s]
Training 16/30:  62%|██████▏   | 89/143 [00:09<00:02, 24.27it/s]
Training 16/30: 100%|██████████| 143/143 [00:10<00:00, 13.90it/s]


Epoch 15: train_loss=0.1205, val_loss=0.1234



Training 17/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/30:   1%|          | 1/143 [00:09<21:38,  9.14s/it]
Training 17/30:  10%|█         | 15/143 [00:09<00:56,  2.26it/s]
Training 17/30:  27%|██▋       | 38/143 [00:09<00:14,  7.15it/s]
Training 17/30:  46%|████▌     | 66/143 [00:09<00:05, 15.26it/s]
Training 17/30:  65%|██████▌   | 93/143 [00:09<00:01, 25.63it/s]
Training 17/30: 100%|██████████| 143/143 [00:10<00:00, 14.18it/s]


Epoch 16: train_loss=0.1189, val_loss=0.1246



Training 18/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/30:   1%|          | 1/143 [00:10<24:13, 10.24s/it]
Training 18/30:  10%|█         | 15/143 [00:10<01:03,  2.02it/s]
Training 18/30:  26%|██▌       | 37/143 [00:10<00:17,  6.22it/s]
Training 18/30:  39%|███▉      | 56/143 [00:10<00:07, 11.13it/s]
Training 18/30:  63%|██████▎   | 90/143 [00:10<00:02, 23.22it/s]
Training 18/30: 100%|██████████| 143/143 [00:11<00:00, 12.76it/s]


Epoch 17: train_loss=0.1186, val_loss=0.1230



Training 19/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/30:   1%|          | 1/143 [00:09<21:35,  9.12s/it]
Training 19/30:  14%|█▍        | 20/143 [00:09<00:40,  3.03it/s]
Training 19/30:  27%|██▋       | 38/143 [00:09<00:15,  6.84it/s]
Training 19/30:  43%|████▎     | 61/143 [00:09<00:06, 13.49it/s]
Training 19/30:  60%|██████    | 86/143 [00:09<00:02, 23.16it/s]
Training 19/30: 100%|██████████| 143/143 [00:10<00:00, 14.18it/s]


Epoch 18: train_loss=0.1182, val_loss=0.1196



Training 20/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/30:   1%|          | 1/143 [00:09<22:07,  9.35s/it]
Training 20/30:  10%|█         | 15/143 [00:09<00:57,  2.21it/s]
Training 20/30:  24%|██▍       | 35/143 [00:09<00:16,  6.37it/s]
Training 20/30:  43%|████▎     | 62/143 [00:09<00:05, 14.04it/s]
Training 20/30:  64%|██████▎   | 91/143 [00:09<00:02, 25.07it/s]
Training 20/30: 100%|██████████| 143/143 [00:10<00:00, 13.87it/s]


Epoch 19: train_loss=0.1186, val_loss=0.1191



Training 21/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/30:   1%|          | 1/143 [00:09<23:14,  9.82s/it]
Training 21/30:  12%|█▏        | 17/143 [00:09<00:52,  2.39it/s]
Training 21/30:  23%|██▎       | 33/143 [00:10<00:19,  5.55it/s]
Training 21/30:  38%|███▊      | 55/143 [00:10<00:07, 11.52it/s]
Training 21/30:  58%|█████▊    | 83/143 [00:10<00:02, 21.78it/s]
Training 21/30: 100%|██████████| 143/143 [00:10<00:00, 13.25it/s]


Epoch 20: train_loss=0.1193, val_loss=0.1175



Training 22/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/30:   1%|          | 1/143 [00:09<21:53,  9.25s/it]
Training 22/30:  13%|█▎        | 18/143 [00:09<00:46,  2.69it/s]
Training 22/30:  28%|██▊       | 40/143 [00:09<00:14,  7.31it/s]
Training 22/30:  45%|████▌     | 65/143 [00:09<00:05, 14.45it/s]
Training 22/30:  65%|██████▌   | 93/143 [00:09<00:01, 25.16it/s]
Training 22/30: 100%|██████████| 143/143 [00:10<00:00, 14.02it/s]


Epoch 21: train_loss=0.1157, val_loss=0.1182



Training 23/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/30:   1%|          | 1/143 [00:09<21:42,  9.17s/it]
Training 23/30:  15%|█▌        | 22/143 [00:09<00:36,  3.33it/s]
Training 23/30:  29%|██▊       | 41/143 [00:09<00:13,  7.33it/s]
Training 23/30:  45%|████▍     | 64/143 [00:09<00:05, 13.94it/s]
Training 23/30:  65%|██████▌   | 93/143 [00:09<00:01, 25.14it/s]
Training 23/30: 100%|██████████| 143/143 [00:10<00:00, 14.19it/s]


Epoch 22: train_loss=0.1155, val_loss=0.1184



Training 24/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/30:   1%|          | 1/143 [00:09<21:29,  9.08s/it]
Training 24/30:  10%|█         | 15/143 [00:09<00:56,  2.28it/s]
Training 24/30:  22%|██▏       | 31/143 [00:09<00:19,  5.68it/s]
Training 24/30:  36%|███▋      | 52/143 [00:09<00:07, 11.80it/s]
Training 24/30:  57%|█████▋    | 81/143 [00:09<00:02, 23.21it/s]
Training 24/30: 100%|██████████| 143/143 [00:10<00:00, 14.20it/s]


Epoch 23: train_loss=0.1146, val_loss=0.1230



Training 25/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/30:   1%|          | 1/143 [00:08<21:16,  8.99s/it]
Training 25/30:   8%|▊         | 12/143 [00:09<01:11,  1.83it/s]
Training 25/30:  19%|█▉        | 27/143 [00:09<00:22,  5.06it/s]
Training 25/30:  34%|███▍      | 49/143 [00:09<00:08, 11.55it/s]
Training 25/30:  52%|█████▏    | 75/143 [00:09<00:03, 21.81it/s]
Training 25/30: 100%|██████████| 143/143 [00:09<00:00, 14.33it/s]


Epoch 24: train_loss=0.1156, val_loss=0.1152



Training 26/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/30:   1%|          | 1/143 [00:08<21:17,  9.00s/it]
Training 26/30:  12%|█▏        | 17/143 [00:09<00:48,  2.61it/s]
Training 26/30:  27%|██▋       | 38/143 [00:09<00:14,  7.14it/s]
Training 26/30:  45%|████▍     | 64/143 [00:09<00:05, 14.79it/s]
Training 26/30:  66%|██████▌   | 94/143 [00:09<00:01, 26.60it/s]
Training 26/30: 100%|██████████| 143/143 [00:09<00:00, 14.38it/s]


Epoch 25: train_loss=0.1138, val_loss=0.1174



Training 27/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/30:   1%|          | 1/143 [00:09<21:18,  9.00s/it]
Training 27/30:  15%|█▌        | 22/143 [00:09<00:35,  3.39it/s]
Training 27/30:  29%|██▊       | 41/143 [00:09<00:13,  7.45it/s]
Training 27/30:  45%|████▍     | 64/143 [00:09<00:05, 14.15it/s]
Training 27/30: 100%|██████████| 143/143 [00:09<00:00, 14.40it/s]


Epoch 26: train_loss=0.1141, val_loss=0.1239



Training 28/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/30:   1%|          | 1/143 [00:09<21:57,  9.28s/it]
Training 28/30:  10%|█         | 15/143 [00:09<00:57,  2.23it/s]
Training 28/30:  24%|██▍       | 34/143 [00:09<00:17,  6.20it/s]
Training 28/30:  40%|███▉      | 57/143 [00:09<00:06, 12.76it/s]
Training 28/30:  56%|█████▌    | 80/143 [00:09<00:02, 21.50it/s]
Training 28/30: 100%|██████████| 143/143 [00:10<00:00, 13.90it/s]


Epoch 27: train_loss=0.1130, val_loss=0.1131



Training 29/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/30:   1%|          | 1/143 [00:08<21:04,  8.91s/it]
Training 29/30:   9%|▉         | 13/143 [00:09<01:04,  2.00it/s]
Training 29/30:  20%|██        | 29/143 [00:09<00:20,  5.48it/s]
Training 29/30:  38%|███▊      | 54/143 [00:09<00:06, 12.94it/s]
Training 29/30:  57%|█████▋    | 81/143 [00:09<00:02, 23.65it/s]
Training 29/30: 100%|██████████| 143/143 [00:09<00:00, 14.50it/s]


Epoch 28: train_loss=0.1119, val_loss=0.1144



Training 30/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/30:   1%|          | 1/143 [00:09<22:19,  9.44s/it]
Training 30/30:  13%|█▎        | 18/143 [00:09<00:47,  2.64it/s]
Training 30/30:  26%|██▌       | 37/143 [00:09<00:16,  6.55it/s]
Training 30/30:  48%|████▊     | 68/143 [00:09<00:04, 15.32it/s]
Training 30/30:  67%|██████▋   | 96/143 [00:09<00:01, 25.78it/s]
Training 30/30: 100%|██████████| 143/143 [00:10<00:00, 13.78it/s]


Epoch 29: train_loss=0.1125, val_loss=0.1127


2025-06-02 18:16:07,430 - __main__ - INFO - Saved linear probe for layer 4 to cache\probes\phase1_supervisedVIT_viewpoint_probing\linear_layer_4_probe.pth
2025-06-02 18:16:26,381 - __main__ - INFO - Running mlp probe on layer 4...
2025-06-02 18:16:26,381 - __main__ - INFO - Running mlp probe on layer 4 (feature_dim: 768)

Training 1/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 1/40:   1%|          | 1/143 [00:08<21:03,  8.90s/it]
Training 1/40:   6%|▌         | 8/143 [00:09<01:51,  1.22it/s]
Training 1/40:  13%|█▎        | 18/143 [00:09<00:36,  3.39it/s]
Training 1/40:  22%|██▏       | 32/143 [00:09<00:14,  7.56it/s]
Training 1/40: 100%|██████████| 143/143 [00:09<00:00, 14.66it/s]


Epoch 0: train_loss=0.3998, val_loss=0.1506



Training 2/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/40:   1%|          | 1/143 [00:08<21:16,  8.99s/it]
Training 2/40:   7%|▋         | 10/143 [00:09<01:27,  1.52it/s]
Training 2/40:  13%|█▎        | 18/143 [00:09<00:38,  3.23it/s]
Training 2/40: 100%|██████████| 143/143 [00:09<00:00, 14.65it/s]


Epoch 1: train_loss=0.1351, val_loss=0.1266



Training 3/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/40:   1%|          | 1/143 [00:09<21:22,  9.03s/it]
Training 3/40:   7%|▋         | 10/143 [00:09<01:28,  1.51it/s]
Training 3/40:  13%|█▎        | 18/143 [00:09<00:38,  3.22it/s]
Training 3/40: 100%|██████████| 143/143 [00:09<00:00, 14.65it/s]


Epoch 2: train_loss=0.1234, val_loss=0.1197



Training 4/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/40:   1%|          | 1/143 [00:08<21:15,  8.98s/it]
Training 4/40:   9%|▉         | 13/143 [00:09<01:05,  1.99it/s]
Training 4/40:  14%|█▍        | 20/143 [00:09<00:35,  3.47it/s]
Training 4/40: 100%|██████████| 143/143 [00:09<00:00, 14.69it/s]


Epoch 3: train_loss=0.1175, val_loss=0.1152



Training 5/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/40:   1%|          | 1/143 [00:09<21:56,  9.27s/it]
Training 5/40:   5%|▍         | 7/143 [00:09<02:13,  1.02it/s]
Training 5/40:  13%|█▎        | 19/143 [00:09<00:35,  3.54it/s]
Training 5/40:  30%|███       | 43/143 [00:09<00:09, 10.48it/s]
Training 5/40: 100%|██████████| 143/143 [00:10<00:00, 14.18it/s]


Epoch 4: train_loss=0.1127, val_loss=0.1114



Training 6/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/40:   1%|          | 1/143 [00:09<21:31,  9.09s/it]
Training 6/40:   6%|▌         | 8/143 [00:09<01:53,  1.19it/s]
Training 6/40:  16%|█▌        | 23/143 [00:09<00:27,  4.40it/s]
Training 6/40: 100%|██████████| 143/143 [00:09<00:00, 14.55it/s]


Epoch 5: train_loss=0.1085, val_loss=0.1096



Training 7/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/40:   1%|          | 1/143 [00:09<21:27,  9.07s/it]
Training 7/40:   6%|▋         | 9/143 [00:09<01:39,  1.35it/s]
Training 7/40:  13%|█▎        | 19/143 [00:09<00:35,  3.48it/s]
Training 7/40:  22%|██▏       | 32/143 [00:09<00:15,  7.26it/s]
Training 7/40:  37%|███▋      | 53/143 [00:09<00:05, 15.56it/s]
Training 7/40:  50%|█████     | 72/143 [00:09<00:02, 25.18it/s]
Training 7/40:  66%|██████▌   | 94/143 [00:09<00:01, 39.38it/s]
Training 7/40: 100%|██████████| 143/143 [00:10<00:00, 13.98it/s]


Epoch 6: train_loss=0.1062, val_loss=0.1086



Training 8/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/40:   1%|          | 1/143 [00:09<22:56,  9.70s/it]
Training 8/40:   5%|▍         | 7/143 [00:09<02:19,  1.03s/it]
Training 8/40:  13%|█▎        | 18/143 [00:09<00:39,  3.18it/s]
Training 8/40:  19%|█▉        | 27/143 [00:10<00:20,  5.63it/s]
Training 8/40: 100%|██████████| 143/143 [00:10<00:00, 13.51it/s]


Epoch 7: train_loss=0.1010, val_loss=0.1052



Training 9/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/40:   1%|          | 1/143 [00:09<21:36,  9.13s/it]
Training 9/40:   7%|▋         | 10/143 [00:09<01:29,  1.49it/s]
Training 9/40:  15%|█▍        | 21/143 [00:09<00:31,  3.82it/s]
Training 9/40:  22%|██▏       | 32/143 [00:09<00:15,  6.98it/s]
Training 9/40: 100%|██████████| 143/143 [00:09<00:00, 14.31it/s]


Epoch 8: train_loss=0.0985, val_loss=0.1031



Training 10/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/40:   1%|          | 1/143 [00:09<21:23,  9.04s/it]
Training 10/40:   6%|▋         | 9/143 [00:09<01:39,  1.35it/s]
Training 10/40:  13%|█▎        | 19/143 [00:09<00:35,  3.50it/s]
Training 10/40:  22%|██▏       | 31/143 [00:09<00:16,  7.00it/s]
Training 10/40:  37%|███▋      | 53/143 [00:09<00:05, 15.73it/s]
Training 10/40:  50%|█████     | 72/143 [00:09<00:02, 25.35it/s]
Training 10/40:  65%|██████▌   | 93/143 [00:09<00:01, 38.86it/s]
Training 10/40: 100%|██████████| 143/143 [00:10<00:00, 14.07it/s]


Epoch 9: train_loss=0.0972, val_loss=0.1005



Training 11/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/40:   1%|          | 1/143 [00:09<22:19,  9.43s/it]
Training 11/40:   7%|▋         | 10/143 [00:09<01:31,  1.45it/s]
Training 11/40:  13%|█▎        | 18/143 [00:09<00:40,  3.08it/s]
Training 11/40:  21%|██        | 30/143 [00:09<00:17,  6.46it/s]
Training 11/40:  31%|███       | 44/143 [00:09<00:08, 11.74it/s]
Training 11/40:  49%|████▉     | 70/143 [00:09<00:02, 24.99it/s]
Training 11/40: 100%|██████████| 143/143 [00:10<00:00, 13.64it/s]


Epoch 10: train_loss=0.0932, val_loss=0.0993



Training 12/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/40:   1%|          | 1/143 [00:09<21:25,  9.06s/it]
Training 12/40:   6%|▋         | 9/143 [00:09<01:39,  1.35it/s]
Training 12/40:  14%|█▍        | 20/143 [00:09<00:33,  3.70it/s]
Training 12/40:  24%|██▍       | 35/143 [00:09<00:13,  8.08it/s]
Training 12/40:  37%|███▋      | 53/143 [00:09<00:05, 15.10it/s]
Training 12/40:  52%|█████▏    | 75/143 [00:09<00:02, 26.40it/s]
Training 12/40:  69%|██████▉   | 99/143 [00:09<00:01, 41.97it/s]
Training 12/40: 100%|██████████| 143/143 [00:10<00:00, 14.01it/s]


Epoch 11: train_loss=0.0915, val_loss=0.0962



Training 13/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/40:   1%|          | 1/143 [00:09<21:18,  9.01s/it]
Training 13/40:   6%|▋         | 9/143 [00:09<01:38,  1.36it/s]
Training 13/40:  15%|█▍        | 21/143 [00:09<00:30,  3.94it/s]
Training 13/40:  24%|██▍       | 34/143 [00:09<00:14,  7.74it/s]
Training 13/40: 100%|██████████| 143/143 [00:09<00:00, 14.54it/s]


Epoch 12: train_loss=0.0887, val_loss=0.0942



Training 14/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/40:   1%|          | 1/143 [00:09<22:28,  9.49s/it]
Training 14/40:   6%|▋         | 9/143 [00:09<01:43,  1.29it/s]
Training 14/40:  15%|█▌        | 22/143 [00:09<00:30,  3.95it/s]
Training 14/40:  24%|██▍       | 35/143 [00:09<00:14,  7.56it/s]
Training 14/40: 100%|██████████| 143/143 [00:10<00:00, 13.82it/s]


Epoch 13: train_loss=0.0868, val_loss=0.0955



Training 15/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/40:   1%|          | 1/143 [00:09<22:40,  9.58s/it]
Training 15/40:   6%|▋         | 9/143 [00:09<01:44,  1.28it/s]
Training 15/40:  13%|█▎        | 19/143 [00:09<00:37,  3.31it/s]
Training 15/40:  23%|██▎       | 33/143 [00:09<00:15,  7.20it/s]
Training 15/40: 100%|██████████| 143/143 [00:10<00:00, 13.73it/s]


Epoch 14: train_loss=0.0852, val_loss=0.0948



Training 16/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/40:   1%|          | 1/143 [00:09<23:22,  9.87s/it]
Training 16/40:   7%|▋         | 10/143 [00:09<01:36,  1.38it/s]
Training 16/40:  15%|█▍        | 21/143 [00:10<00:34,  3.55it/s]
Training 16/40:  24%|██▍       | 34/143 [00:10<00:15,  7.05it/s]
Training 16/40: 100%|██████████| 143/143 [00:10<00:00, 13.37it/s]


Epoch 15: train_loss=0.0846, val_loss=0.0928



Training 17/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/40:   1%|          | 1/143 [00:09<22:35,  9.55s/it]
Training 17/40:   6%|▋         | 9/143 [00:09<01:44,  1.28it/s]
Training 17/40:  13%|█▎        | 18/143 [00:09<00:40,  3.11it/s]
Training 17/40: 100%|██████████| 143/143 [00:10<00:00, 13.88it/s]


Epoch 16: train_loss=0.0825, val_loss=0.0918



Training 18/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/40:   1%|          | 1/143 [00:10<24:45, 10.46s/it]
Training 18/40:   8%|▊         | 11/143 [00:10<01:31,  1.44it/s]
Training 18/40:  15%|█▍        | 21/143 [00:10<00:37,  3.29it/s]
Training 18/40:  22%|██▏       | 31/143 [00:10<00:19,  5.82it/s]
Training 18/40: 100%|██████████| 143/143 [00:11<00:00, 12.66it/s]


Epoch 17: train_loss=0.0806, val_loss=0.0913



Training 19/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/40:   1%|          | 1/143 [00:09<23:34,  9.96s/it]
Training 19/40:   8%|▊         | 11/143 [00:10<01:27,  1.51it/s]
Training 19/40:  13%|█▎        | 19/143 [00:10<00:40,  3.06it/s]
Training 19/40: 100%|██████████| 143/143 [00:10<00:00, 13.34it/s]


Epoch 18: train_loss=0.0799, val_loss=0.0938



Training 20/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/40:   1%|          | 1/143 [00:10<24:03, 10.16s/it]
Training 20/40:   6%|▌         | 8/143 [00:10<02:06,  1.07it/s]
Training 20/40:  15%|█▍        | 21/143 [00:10<00:34,  3.56it/s]
Training 20/40:  22%|██▏       | 32/143 [00:10<00:17,  6.43it/s]
Training 20/40:  34%|███▎      | 48/143 [00:10<00:07, 12.11it/s]
Training 20/40:  46%|████▌     | 66/143 [00:10<00:03, 20.48it/s]
Training 20/40:  60%|██████    | 86/143 [00:10<00:01, 32.41it/s]
Training 20/40: 100%|██████████| 143/143 [00:11<00:00, 12.62it/s]


Epoch 19: train_loss=0.0790, val_loss=0.0901



Training 21/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/40:   1%|          | 1/143 [00:11<27:06, 11.45s/it]
Training 21/40:   4%|▍         | 6/143 [00:11<03:15,  1.42s/it]
Training 21/40:  10%|█         | 15/143 [00:11<00:57,  2.24it/s]
Training 21/40:  22%|██▏       | 32/143 [00:11<00:17,  6.28it/s]
Training 21/40: 100%|██████████| 143/143 [00:12<00:00, 11.64it/s]


Epoch 20: train_loss=0.0783, val_loss=0.0888



Training 22/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/40:   1%|          | 1/143 [00:10<23:54, 10.10s/it]
Training 22/40:   8%|▊         | 11/143 [00:10<01:28,  1.49it/s]
Training 22/40:  15%|█▍        | 21/143 [00:10<00:35,  3.39it/s]
Training 22/40: 100%|██████████| 143/143 [00:10<00:00, 13.13it/s]


Epoch 21: train_loss=0.0770, val_loss=0.0955



Training 23/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/40:   1%|          | 1/143 [00:09<23:08,  9.78s/it]
Training 23/40:   6%|▌         | 8/143 [00:09<02:01,  1.11it/s]
Training 23/40:  11%|█         | 16/143 [00:09<00:47,  2.70it/s]
Training 23/40:  36%|███▋      | 52/143 [00:10<00:07, 12.69it/s]
Training 23/40: 100%|██████████| 143/143 [00:10<00:00, 13.51it/s]


Epoch 22: train_loss=0.0770, val_loss=0.0862



Training 24/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/40:   1%|          | 1/143 [00:09<22:57,  9.70s/it]
Training 24/40:   6%|▋         | 9/143 [00:09<01:46,  1.26it/s]
Training 24/40:  14%|█▍        | 20/143 [00:09<00:35,  3.46it/s]
Training 24/40:  23%|██▎       | 33/143 [00:10<00:15,  7.01it/s]
Training 24/40:  35%|███▍      | 50/143 [00:10<00:06, 13.29it/s]
Training 24/40:  50%|████▉     | 71/143 [00:10<00:03, 23.52it/s]
Training 24/40:  65%|██████▌   | 93/143 [00:10<00:01, 37.05it/s]
Training 24/40: 100%|██████████| 143/143 [00:10<00:00, 13.14it/s]


Epoch 23: train_loss=0.0761, val_loss=0.0886



Training 25/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/40:   1%|          | 1/143 [00:11<26:57, 11.39s/it]
Training 25/40:   8%|▊         | 11/143 [00:11<01:39,  1.33it/s]
Training 25/40:  15%|█▍        | 21/143 [00:11<00:40,  3.03it/s]
Training 25/40:  33%|███▎      | 47/143 [00:11<00:10,  9.24it/s]
Training 25/40: 100%|██████████| 143/143 [00:12<00:00, 11.71it/s]


Epoch 24: train_loss=0.0755, val_loss=0.0983



Training 26/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/40:   1%|          | 1/143 [00:09<22:42,  9.60s/it]
Training 26/40:   9%|▉         | 13/143 [00:09<01:09,  1.86it/s]
Training 26/40:  16%|█▌        | 23/143 [00:09<00:30,  3.87it/s]
Training 26/40:  24%|██▍       | 35/143 [00:09<00:15,  7.17it/s]
Training 26/40: 100%|██████████| 143/143 [00:10<00:00, 13.70it/s]


Epoch 25: train_loss=0.0752, val_loss=0.0891



Training 27/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/40:   1%|          | 1/143 [00:09<22:15,  9.41s/it]
Training 27/40:   8%|▊         | 12/143 [00:09<01:14,  1.75it/s]
Training 27/40:  14%|█▍        | 20/143 [00:09<00:36,  3.38it/s]
Training 27/40:  22%|██▏       | 31/143 [00:09<00:17,  6.46it/s]
Training 27/40:  31%|███▏      | 45/143 [00:09<00:08, 11.76it/s]
Training 27/40:  48%|████▊     | 68/143 [00:09<00:03, 23.47it/s]
Training 27/40:  62%|██████▏   | 88/143 [00:10<00:01, 35.95it/s]
Training 27/40: 100%|██████████| 143/143 [00:10<00:00, 13.46it/s]


Epoch 26: train_loss=0.0742, val_loss=0.0932



Training 28/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/40:   1%|          | 1/143 [00:09<22:27,  9.49s/it]
Training 28/40:   7%|▋         | 10/143 [00:09<01:32,  1.44it/s]
Training 28/40:  14%|█▍        | 20/143 [00:09<00:35,  3.48it/s]
Training 28/40:  22%|██▏       | 32/143 [00:09<00:16,  6.81it/s]
Training 28/40:  33%|███▎      | 47/143 [00:09<00:07, 12.44it/s]
Training 28/40:  46%|████▌     | 66/143 [00:10<00:03, 21.87it/s]
Training 28/40:  62%|██████▏   | 88/143 [00:10<00:01, 35.86it/s]
Training 28/40: 100%|██████████| 143/143 [00:10<00:00, 13.46it/s]


Epoch 27: train_loss=0.0720, val_loss=0.0878



Training 29/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/40:   1%|          | 1/143 [00:09<22:46,  9.62s/it]
Training 29/40:   6%|▋         | 9/143 [00:09<01:45,  1.27it/s]
Training 29/40:  13%|█▎        | 18/143 [00:09<00:40,  3.09it/s]
Training 29/40: 100%|██████████| 143/143 [00:10<00:00, 13.78it/s]


Epoch 28: train_loss=0.0728, val_loss=0.0904



Training 30/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/40:   1%|          | 1/143 [00:09<21:40,  9.16s/it]
Training 30/40:   6%|▋         | 9/143 [00:09<01:40,  1.33it/s]
Training 30/40:  13%|█▎        | 19/143 [00:09<00:35,  3.45it/s]
Training 30/40:  22%|██▏       | 32/143 [00:09<00:15,  7.18it/s]
Training 30/40:  35%|███▍      | 50/143 [00:09<00:06, 14.19it/s]
Training 30/40:  48%|████▊     | 69/143 [00:09<00:03, 23.81it/s]
Training 30/40:  62%|██████▏   | 88/143 [00:09<00:01, 35.80it/s]
Training 30/40: 100%|██████████| 143/143 [00:10<00:00, 13.79it/s]


Epoch 29: train_loss=0.0716, val_loss=0.0852



Training 31/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 31/40:   1%|          | 1/143 [00:09<21:25,  9.05s/it]
Training 31/40:   8%|▊         | 12/143 [00:09<01:12,  1.82it/s]
Training 31/40:  17%|█▋        | 24/143 [00:09<00:27,  4.38it/s]
Training 31/40:  24%|██▍       | 35/143 [00:09<00:14,  7.54it/s]
Training 31/40: 100%|██████████| 143/143 [00:09<00:00, 14.43it/s]


Epoch 30: train_loss=0.0719, val_loss=0.0886



Training 32/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 32/40:   1%|          | 1/143 [00:09<21:37,  9.13s/it]
Training 32/40:   6%|▋         | 9/143 [00:09<01:40,  1.34it/s]
Training 32/40:  14%|█▍        | 20/143 [00:09<00:33,  3.67it/s]
Training 32/40:  22%|██▏       | 31/143 [00:09<00:16,  6.84it/s]
Training 32/40: 100%|██████████| 143/143 [00:09<00:00, 14.33it/s]


Epoch 31: train_loss=0.0700, val_loss=0.0883



Training 33/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 33/40:   1%|          | 1/143 [00:09<21:26,  9.06s/it]
Training 33/40:   6%|▋         | 9/143 [00:09<01:39,  1.35it/s]
Training 33/40:  13%|█▎        | 18/143 [00:09<00:38,  3.26it/s]
Training 33/40: 100%|██████████| 143/143 [00:09<00:00, 14.52it/s]


Epoch 32: train_loss=0.0689, val_loss=0.0886



Training 34/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 34/40:   1%|          | 1/143 [00:09<21:29,  9.08s/it]
Training 34/40:   6%|▋         | 9/143 [00:09<01:39,  1.35it/s]
Training 34/40:  15%|█▍        | 21/143 [00:09<00:31,  3.91it/s]
Training 34/40:  23%|██▎       | 33/143 [00:09<00:14,  7.38it/s]
Training 34/40:  36%|███▌      | 51/143 [00:09<00:06, 14.45it/s]
Training 34/40:  48%|████▊     | 69/143 [00:09<00:03, 23.59it/s]
Training 34/40:  64%|██████▍   | 92/143 [00:09<00:01, 38.64it/s]
Training 34/40: 100%|██████████| 143/143 [00:10<00:00, 13.99it/s]


Epoch 33: train_loss=0.0694, val_loss=0.0867



Training 35/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 35/40:   1%|          | 1/143 [00:09<22:40,  9.58s/it]
Training 35/40:   6%|▌         | 8/143 [00:09<01:59,  1.13it/s]
Training 35/40:  11%|█         | 16/143 [00:09<00:46,  2.75it/s]
Training 35/40: 100%|██████████| 143/143 [00:10<00:00, 13.79it/s]


Epoch 34: train_loss=0.0691, val_loss=0.0915



Training 36/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 36/40:   1%|          | 1/143 [00:09<21:23,  9.04s/it]
Training 36/40:   5%|▍         | 7/143 [00:09<02:10,  1.04it/s]
Training 36/40:  13%|█▎        | 18/143 [00:09<00:36,  3.40it/s]
Training 36/40:  20%|█▉        | 28/143 [00:09<00:18,  6.30it/s]
Training 36/40: 100%|██████████| 143/143 [00:09<00:00, 14.39it/s]


Epoch 35: train_loss=0.0685, val_loss=0.0878



Training 37/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 37/40:   1%|          | 1/143 [00:09<21:36,  9.13s/it]
Training 37/40:   6%|▌         | 8/143 [00:09<01:53,  1.19it/s]
Training 37/40:  11%|█         | 16/143 [00:09<00:44,  2.88it/s]
Training 37/40: 100%|██████████| 143/143 [00:09<00:00, 14.43it/s]


Epoch 36: train_loss=0.0666, val_loss=0.0891



Training 38/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 38/40:   1%|          | 1/143 [00:09<21:52,  9.24s/it]
Training 38/40:   8%|▊         | 11/143 [00:09<01:21,  1.63it/s]
Training 38/40:  14%|█▍        | 20/143 [00:09<00:35,  3.50it/s]
Training 38/40: 100%|██████████| 143/143 [00:09<00:00, 14.30it/s]


Epoch 37: train_loss=0.0689, val_loss=0.0912



Training 39/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 39/40:   1%|          | 1/143 [00:09<21:34,  9.12s/it]
Training 39/40:   6%|▌         | 8/143 [00:09<01:53,  1.19it/s]
Training 39/40:  13%|█▎        | 18/143 [00:09<00:37,  3.31it/s]
Training 39/40: 100%|██████████| 143/143 [00:09<00:00, 14.43it/s]


Epoch 38: train_loss=0.0688, val_loss=0.0842



Training 40/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 40/40:   1%|          | 1/143 [00:09<21:46,  9.20s/it]
Training 40/40:   6%|▌         | 8/143 [00:09<01:54,  1.17it/s]
Training 40/40:  12%|█▏        | 17/143 [00:09<00:41,  3.07it/s]
Training 40/40:  38%|███▊      | 54/143 [00:09<00:06, 13.90it/s]
Training 40/40: 100%|██████████| 143/143 [00:10<00:00, 14.25it/s]


Epoch 39: train_loss=0.0656, val_loss=0.0904


2025-06-02 18:29:57,147 - __main__ - INFO - Saved mlp probe for layer 4 to cache\probes\phase1_supervisedVIT_viewpoint_probing\mlp_layer_4_probe.pth
 33%|███▎      | 2/6 [1:04:09<2:08:19, 1924.88s/it]2025-06-02 18:30:16,953 - __main__ - INFO - Processing layer 6...
2025-06-02 18:30:16,954 - src.probing.data_preprocessing - INFO - Extracting features from 1149 batches...

Extracting features:   0%|          | 0/1149 [00:00<?, ?it/s]
Extracting features:   0%|          | 1/1149 [00:07<2:16:04,  7.11s/it]
Extracting features:   0%|          | 2/1149 [00:07<59:08,  3.09s/it]  
Extracting features:   0%|          | 3/1149 [00:07<34:26,  1.80s/it]
Extracting features:   0%|          | 4/1149 [00:07<22:46,  1.19s/it]
Extracting features:   0%|          | 5/1149 [00:08<16:18,  1.17it/s]
Extracting features:   1%|          | 6/1149 [00:08<12:25,  1.53it/s]
Extracting features:   1%|          | 7/1149 [00:08<09:57,  1.91it/s]
Extracting features:   1%|          | 8/1149 [00:08<08:21,  2.28it/s]


Epoch 0: train_loss=0.8233, val_loss=0.3731



Training 2/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/30:   1%|          | 1/143 [00:08<21:16,  8.99s/it]
Training 2/30:  10%|▉         | 14/143 [00:09<01:00,  2.14it/s]
Training 2/30:  24%|██▍       | 35/143 [00:09<00:16,  6.68it/s]
Training 2/30:  43%|████▎     | 61/143 [00:09<00:05, 14.34it/s]
Training 2/30:  62%|██████▏   | 88/143 [00:09<00:02, 24.91it/s]
Training 2/30: 100%|██████████| 143/143 [00:09<00:00, 14.41it/s]


Epoch 1: train_loss=0.3145, val_loss=0.2770



Training 3/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/30:   1%|          | 1/143 [00:08<21:10,  8.95s/it]
Training 3/30:  12%|█▏        | 17/143 [00:09<00:48,  2.62it/s]
Training 3/30:  26%|██▌       | 37/143 [00:09<00:15,  6.95it/s]
Training 3/30:  41%|████      | 58/143 [00:09<00:06, 13.12it/s]
Training 3/30:  59%|█████▊    | 84/143 [00:09<00:02, 23.35it/s]
Training 3/30: 100%|██████████| 143/143 [00:09<00:00, 14.39it/s]


Epoch 2: train_loss=0.2475, val_loss=0.2264



Training 4/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/30:   1%|          | 1/143 [00:09<22:04,  9.33s/it]
Training 4/30:  11%|█         | 16/143 [00:09<00:53,  2.37it/s]
Training 4/30:  25%|██▌       | 36/143 [00:09<00:16,  6.53it/s]
Training 4/30:  41%|████▏     | 59/143 [00:09<00:06, 13.04it/s]
Training 4/30:  59%|█████▉    | 85/143 [00:09<00:02, 22.91it/s]
Training 4/30: 100%|██████████| 143/143 [00:10<00:00, 13.87it/s]


Epoch 3: train_loss=0.2138, val_loss=0.2004



Training 5/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/30:   1%|          | 1/143 [00:09<23:19,  9.85s/it]
Training 5/30:  11%|█         | 16/143 [00:09<00:56,  2.24it/s]
Training 5/30:  24%|██▍       | 34/143 [00:10<00:18,  5.79it/s]
Training 5/30:  37%|███▋      | 53/143 [00:10<00:08, 10.89it/s]
Training 5/30:  58%|█████▊    | 83/143 [00:10<00:02, 21.88it/s]
Training 5/30: 100%|██████████| 143/143 [00:10<00:00, 13.19it/s]


Epoch 4: train_loss=0.1913, val_loss=0.1872



Training 6/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/30:   1%|          | 1/143 [00:09<22:12,  9.39s/it]
Training 6/30:  10%|█         | 15/143 [00:09<00:58,  2.20it/s]
Training 6/30:  20%|██        | 29/143 [00:09<00:22,  5.09it/s]
Training 6/30:  37%|███▋      | 53/143 [00:09<00:07, 11.91it/s]
Training 6/30:  57%|█████▋    | 81/143 [00:09<00:02, 22.53it/s]
Training 6/30: 100%|██████████| 143/143 [00:10<00:00, 13.78it/s]


Epoch 5: train_loss=0.1772, val_loss=0.1681



Training 7/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/30:   1%|          | 1/143 [00:11<27:15, 11.52s/it]
Training 7/30:  10%|▉         | 14/143 [00:11<01:16,  1.68it/s]
Training 7/30:  25%|██▌       | 36/143 [00:11<00:19,  5.43it/s]
Training 7/30:  40%|███▉      | 57/143 [00:11<00:08, 10.32it/s]
Training 7/30:  62%|██████▏   | 88/143 [00:11<00:02, 20.20it/s]
Training 7/30: 100%|██████████| 143/143 [00:12<00:00, 11.46it/s]


Epoch 6: train_loss=0.1660, val_loss=0.1589



Training 8/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/30:   1%|          | 1/143 [00:10<24:33, 10.38s/it]
Training 8/30:  12%|█▏        | 17/143 [00:10<00:55,  2.27it/s]
Training 8/30:  24%|██▍       | 35/143 [00:10<00:19,  5.65it/s]
Training 8/30:  43%|████▎     | 61/143 [00:10<00:06, 12.37it/s]
Training 8/30:  61%|██████    | 87/143 [00:10<00:02, 21.39it/s]
Training 8/30: 100%|██████████| 143/143 [00:11<00:00, 12.64it/s]


Epoch 7: train_loss=0.1562, val_loss=0.1508



Training 9/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/30:   1%|          | 1/143 [00:10<24:26, 10.32s/it]
Training 9/30:  10%|▉         | 14/143 [00:10<01:09,  1.87it/s]
Training 9/30:  23%|██▎       | 33/143 [00:10<00:20,  5.46it/s]
Training 9/30: 100%|██████████| 143/143 [00:11<00:00, 12.94it/s]


Epoch 8: train_loss=0.1481, val_loss=0.1500



Training 10/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/30:   1%|          | 1/143 [00:11<26:10, 11.06s/it]
Training 10/30:  13%|█▎        | 19/143 [00:11<00:52,  2.38it/s]
Training 10/30:  29%|██▊       | 41/143 [00:11<00:16,  6.27it/s]
Training 10/30:  43%|████▎     | 61/143 [00:11<00:07, 11.08it/s]
Training 10/30:  65%|██████▌   | 93/143 [00:11<00:02, 21.66it/s]
Training 10/30: 100%|██████████| 143/143 [00:11<00:00, 11.93it/s]


Epoch 9: train_loss=0.1455, val_loss=0.1493



Training 11/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/30:   1%|          | 1/143 [00:11<26:19, 11.13s/it]
Training 11/30:   8%|▊         | 12/143 [00:11<01:28,  1.48it/s]
Training 11/30:  20%|█▉        | 28/143 [00:11<00:26,  4.30it/s]
Training 11/30:  33%|███▎      | 47/143 [00:11<00:10,  8.90it/s]
Training 11/30:  57%|█████▋    | 82/143 [00:11<00:02, 20.55it/s]
Training 11/30: 100%|██████████| 143/143 [00:12<00:00, 11.82it/s]


Epoch 10: train_loss=0.1411, val_loss=0.1446



Training 12/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/30:   1%|          | 1/143 [00:10<25:16, 10.68s/it]
Training 12/30:  10%|▉         | 14/143 [00:10<01:11,  1.81it/s]
Training 12/30:  28%|██▊       | 40/143 [00:10<00:15,  6.58it/s]
Training 12/30:  46%|████▌     | 66/143 [00:10<00:05, 13.10it/s]
Training 12/30: 100%|██████████| 143/143 [00:11<00:00, 12.36it/s]


Epoch 11: train_loss=0.1356, val_loss=0.1357



Training 13/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/30:   1%|          | 1/143 [00:09<23:10,  9.79s/it]
Training 13/30:  10%|█         | 15/143 [00:09<01:00,  2.11it/s]
Training 13/30:  23%|██▎       | 33/143 [00:10<00:19,  5.69it/s]
Training 13/30:  44%|████▍     | 63/143 [00:10<00:05, 13.89it/s]
Training 13/30:  62%|██████▏   | 88/143 [00:10<00:02, 22.93it/s]
Training 13/30: 100%|██████████| 143/143 [00:10<00:00, 13.33it/s]


Epoch 12: train_loss=0.1339, val_loss=0.1330



Training 14/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/30:   1%|          | 1/143 [00:10<24:31, 10.36s/it]
Training 14/30:  10%|█         | 15/143 [00:10<01:04,  2.00it/s]
Training 14/30:  27%|██▋       | 38/143 [00:10<00:16,  6.33it/s]
Training 14/30:  43%|████▎     | 62/143 [00:10<00:06, 12.50it/s]
Training 14/30:  62%|██████▏   | 88/143 [00:10<00:02, 21.48it/s]
Training 14/30: 100%|██████████| 143/143 [00:11<00:00, 12.62it/s]


Epoch 13: train_loss=0.1298, val_loss=0.1270



Training 15/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/30:   1%|          | 1/143 [00:09<21:55,  9.27s/it]
Training 15/30:  16%|█▌        | 23/143 [00:09<00:34,  3.45it/s]
Training 15/30:  26%|██▌       | 37/143 [00:09<00:16,  6.34it/s]
Training 15/30:  41%|████▏     | 59/143 [00:09<00:06, 12.63it/s]
Training 15/30:  61%|██████    | 87/143 [00:09<00:02, 23.38it/s]
Training 15/30: 100%|██████████| 143/143 [00:10<00:00, 13.99it/s]


Epoch 14: train_loss=0.1274, val_loss=0.1319



Training 16/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/30:   1%|          | 1/143 [00:09<22:31,  9.51s/it]
Training 16/30:  11%|█         | 16/143 [00:09<00:54,  2.32it/s]
Training 16/30:  27%|██▋       | 39/143 [00:09<00:14,  7.03it/s]
Training 16/30:  43%|████▎     | 62/143 [00:09<00:06, 13.43it/s]
Training 16/30:  65%|██████▌   | 93/143 [00:09<00:01, 25.08it/s]
Training 16/30: 100%|██████████| 143/143 [00:10<00:00, 13.70it/s]


Epoch 15: train_loss=0.1265, val_loss=0.1241



Training 17/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/30:   1%|          | 1/143 [00:09<22:31,  9.52s/it]
Training 17/30:   9%|▉         | 13/143 [00:09<01:09,  1.88it/s]
Training 17/30:  22%|██▏       | 31/143 [00:09<00:20,  5.56it/s]
Training 17/30:  43%|████▎     | 62/143 [00:09<00:05, 14.28it/s]
Training 17/30:  65%|██████▌   | 93/143 [00:09<00:01, 25.89it/s]
Training 17/30: 100%|██████████| 143/143 [00:10<00:00, 13.67it/s]


Epoch 16: train_loss=0.1238, val_loss=0.1221



Training 18/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/30:   1%|          | 1/143 [00:09<22:13,  9.39s/it]
Training 18/30:  10%|▉         | 14/143 [00:09<01:02,  2.05it/s]
Training 18/30:  29%|██▉       | 42/143 [00:09<00:12,  7.87it/s]
Training 18/30:  47%|████▋     | 67/143 [00:09<00:05, 14.89it/s]
Training 18/30:  68%|██████▊   | 97/143 [00:09<00:01, 26.23it/s]
Training 18/30: 100%|██████████| 143/143 [00:10<00:00, 13.84it/s]


Epoch 17: train_loss=0.1228, val_loss=0.1253



Training 19/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/30:   1%|          | 1/143 [00:09<21:53,  9.25s/it]
Training 19/30:  12%|█▏        | 17/143 [00:09<00:49,  2.54it/s]
Training 19/30:  26%|██▌       | 37/143 [00:09<00:15,  6.74it/s]
Training 19/30:  43%|████▎     | 62/143 [00:09<00:05, 13.90it/s]
Training 19/30:  62%|██████▏   | 88/143 [00:09<00:02, 23.84it/s]
Training 19/30: 100%|██████████| 143/143 [00:10<00:00, 14.06it/s]


Epoch 18: train_loss=0.1213, val_loss=0.1240



Training 20/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/30:   1%|          | 1/143 [00:09<22:22,  9.46s/it]
Training 20/30:  13%|█▎        | 18/143 [00:09<00:47,  2.63it/s]
Training 20/30:  27%|██▋       | 38/143 [00:09<00:15,  6.74it/s]
Training 20/30:  44%|████▍     | 63/143 [00:09<00:05, 13.74it/s]
Training 20/30:  64%|██████▎   | 91/143 [00:09<00:02, 24.26it/s]
Training 20/30: 100%|██████████| 143/143 [00:10<00:00, 13.72it/s]


Epoch 19: train_loss=0.1209, val_loss=0.1336



Training 21/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/30:   1%|          | 1/143 [00:09<22:38,  9.56s/it]
Training 21/30:  10%|█         | 15/143 [00:09<00:59,  2.16it/s]
Training 21/30:  20%|█▉        | 28/143 [00:09<00:23,  4.79it/s]
Training 21/30:  36%|███▋      | 52/143 [00:09<00:07, 11.50it/s]
Training 21/30:  57%|█████▋    | 82/143 [00:09<00:02, 22.80it/s]
Training 21/30: 100%|██████████| 143/143 [00:10<00:00, 13.60it/s]


Epoch 20: train_loss=0.1191, val_loss=0.1209



Training 22/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/30:   1%|          | 1/143 [00:09<21:33,  9.11s/it]
Training 22/30:  13%|█▎        | 18/143 [00:09<00:45,  2.73it/s]
Training 22/30:  27%|██▋       | 39/143 [00:09<00:14,  7.20it/s]
Training 22/30:  43%|████▎     | 61/143 [00:09<00:06, 13.55it/s]
Training 22/30:  61%|██████    | 87/143 [00:09<00:02, 23.60it/s]
Training 22/30: 100%|██████████| 143/143 [00:10<00:00, 14.18it/s]


Epoch 21: train_loss=0.1221, val_loss=0.1430



Training 23/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/30:   1%|          | 1/143 [00:09<21:29,  9.08s/it]
Training 23/30:  12%|█▏        | 17/143 [00:09<00:48,  2.59it/s]
Training 23/30:  24%|██▍       | 35/143 [00:09<00:16,  6.42it/s]
Training 23/30:  38%|███▊      | 55/143 [00:09<00:07, 12.21it/s]
Training 23/30:  58%|█████▊    | 83/143 [00:09<00:02, 23.17it/s]
Training 23/30: 100%|██████████| 143/143 [00:10<00:00, 14.22it/s]


Epoch 22: train_loss=0.1191, val_loss=0.1269



Training 24/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/30:   1%|          | 1/143 [00:09<21:29,  9.08s/it]
Training 24/30:  10%|▉         | 14/143 [00:09<01:00,  2.12it/s]
Training 24/30:  22%|██▏       | 32/143 [00:09<00:18,  5.97it/s]
Training 24/30:  36%|███▌      | 51/143 [00:09<00:08, 11.48it/s]
Training 24/30:  59%|█████▊    | 84/143 [00:09<00:02, 24.53it/s]
Training 24/30: 100%|██████████| 143/143 [00:10<00:00, 14.12it/s]


Epoch 23: train_loss=0.1178, val_loss=0.1194



Training 25/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/30:   1%|          | 1/143 [00:09<21:28,  9.07s/it]
Training 25/30:   9%|▉         | 13/143 [00:09<01:06,  1.97it/s]
Training 25/30:  25%|██▌       | 36/143 [00:09<00:15,  6.90it/s]
Training 25/30:  42%|████▏     | 60/143 [00:09<00:05, 13.89it/s]
Training 25/30:  64%|██████▍   | 92/143 [00:09<00:01, 26.45it/s]
Training 25/30: 100%|██████████| 143/143 [00:10<00:00, 14.25it/s]


Epoch 24: train_loss=0.1179, val_loss=0.1298



Training 26/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/30:   1%|          | 1/143 [00:09<21:53,  9.25s/it]
Training 26/30:  10%|▉         | 14/143 [00:09<01:01,  2.08it/s]
Training 26/30:  25%|██▌       | 36/143 [00:09<00:15,  6.71it/s]
Training 26/30:  38%|███▊      | 54/143 [00:09<00:07, 11.80it/s]
Training 26/30:  55%|█████▍    | 78/143 [00:09<00:03, 20.98it/s]
Training 26/30:  73%|███████▎  | 104/143 [00:09<00:01, 33.96it/s]
Training 26/30: 100%|██████████| 143/143 [00:10<00:00, 13.93it/s]


Epoch 25: train_loss=0.1156, val_loss=0.1199



Training 27/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/30:   1%|          | 1/143 [00:09<21:32,  9.10s/it]
Training 27/30:  13%|█▎        | 19/143 [00:09<00:42,  2.89it/s]
Training 27/30:  26%|██▌       | 37/143 [00:09<00:15,  6.71it/s]
Training 27/30:  43%|████▎     | 61/143 [00:09<00:05, 13.68it/s]
Training 27/30:  63%|██████▎   | 90/143 [00:09<00:02, 24.97it/s]
Training 27/30: 100%|██████████| 143/143 [00:10<00:00, 14.18it/s]


Epoch 26: train_loss=0.1155, val_loss=0.1143



Training 28/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/30:   1%|          | 1/143 [00:09<22:36,  9.56s/it]
Training 28/30:  11%|█         | 16/143 [00:09<00:54,  2.31it/s]
Training 28/30:  22%|██▏       | 32/143 [00:09<00:19,  5.55it/s]
Training 28/30:  40%|███▉      | 57/143 [00:09<00:06, 12.53it/s]
Training 28/30:  59%|█████▊    | 84/143 [00:09<00:02, 22.60it/s]
Training 28/30: 100%|██████████| 143/143 [00:10<00:00, 13.54it/s]


Epoch 27: train_loss=0.1139, val_loss=0.1141



Training 29/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/30:   1%|          | 1/143 [00:09<21:37,  9.14s/it]
Training 29/30:  13%|█▎        | 19/143 [00:09<00:43,  2.88it/s]
Training 29/30:  27%|██▋       | 39/143 [00:09<00:14,  7.12it/s]
Training 29/30:  44%|████▍     | 63/143 [00:09<00:05, 14.05it/s]
Training 29/30:  61%|██████    | 87/143 [00:09<00:02, 23.25it/s]
Training 29/30: 100%|██████████| 143/143 [00:10<00:00, 14.16it/s]


Epoch 28: train_loss=0.1134, val_loss=0.1128



Training 30/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/30:   1%|          | 1/143 [00:09<22:36,  9.55s/it]
Training 30/30:  12%|█▏        | 17/143 [00:09<00:51,  2.46it/s]
Training 30/30:  24%|██▍       | 35/143 [00:09<00:17,  6.12it/s]
Training 30/30:  37%|███▋      | 53/143 [00:09<00:08, 11.08it/s]
Training 30/30:  54%|█████▍    | 77/143 [00:09<00:03, 20.05it/s]
Training 30/30: 100%|██████████| 143/143 [00:10<00:00, 13.57it/s]


Epoch 29: train_loss=0.1144, val_loss=0.1197


2025-06-02 18:48:11,930 - __main__ - INFO - Saved linear probe for layer 6 to cache\probes\phase1_supervisedVIT_viewpoint_probing\linear_layer_6_probe.pth
2025-06-02 18:48:31,203 - __main__ - INFO - Running mlp probe on layer 6...
2025-06-02 18:48:31,203 - __main__ - INFO - Running mlp probe on layer 6 (feature_dim: 768)

Training 1/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 1/40:   1%|          | 1/143 [00:09<21:28,  9.07s/it]
Training 1/40:   8%|▊         | 12/143 [00:09<01:12,  1.81it/s]
Training 1/40:  15%|█▌        | 22/143 [00:09<00:30,  3.93it/s]
Training 1/40:  21%|██        | 30/143 [00:09<00:18,  6.21it/s]
Training 1/40: 100%|██████████| 143/143 [00:09<00:00, 14.41it/s]


Epoch 0: train_loss=0.8564, val_loss=0.1549



Training 2/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/40:   1%|          | 1/143 [00:10<23:41, 10.01s/it]
Training 2/40:   8%|▊         | 12/143 [00:10<01:19,  1.64it/s]
Training 2/40:  15%|█▌        | 22/143 [00:10<00:33,  3.57it/s]
Training 2/40:  24%|██▍       | 34/143 [00:10<00:16,  6.73it/s]
Training 2/40: 100%|██████████| 143/143 [00:10<00:00, 13.13it/s]


Epoch 1: train_loss=0.1341, val_loss=0.1251



Training 3/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/40:   1%|          | 1/143 [00:09<22:05,  9.34s/it]
Training 3/40:   8%|▊         | 12/143 [00:09<01:14,  1.76it/s]
Training 3/40:  13%|█▎        | 19/143 [00:09<00:38,  3.19it/s]
Training 3/40: 100%|██████████| 143/143 [00:10<00:00, 14.12it/s]


Epoch 2: train_loss=0.1219, val_loss=0.1180



Training 4/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/40:   1%|          | 1/143 [00:09<21:42,  9.17s/it]
Training 4/40:   6%|▌         | 8/143 [00:09<01:54,  1.18it/s]
Training 4/40:  14%|█▍        | 20/143 [00:09<00:33,  3.72it/s]
Training 4/40: 100%|██████████| 143/143 [00:09<00:00, 14.36it/s]


Epoch 3: train_loss=0.1174, val_loss=0.1150



Training 5/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/40:   1%|          | 1/143 [00:09<22:16,  9.41s/it]
Training 5/40:   6%|▌         | 8/143 [00:09<01:57,  1.15it/s]
Training 5/40:  12%|█▏        | 17/143 [00:09<00:41,  3.00it/s]
Training 5/40:  24%|██▍       | 34/143 [00:09<00:13,  7.84it/s]
Training 5/40: 100%|██████████| 143/143 [00:10<00:00, 13.96it/s]


Epoch 4: train_loss=0.1123, val_loss=0.1149



Training 6/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/40:   1%|          | 1/143 [00:09<22:26,  9.48s/it]
Training 6/40:   5%|▍         | 7/143 [00:09<02:16,  1.01s/it]
Training 6/40:   9%|▉         | 13/143 [00:09<00:58,  2.22it/s]
Training 6/40: 100%|██████████| 143/143 [00:10<00:00, 13.93it/s]


Epoch 5: train_loss=0.1090, val_loss=0.1091



Training 7/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/40:   1%|          | 1/143 [00:09<21:56,  9.27s/it]
Training 7/40:   5%|▍         | 7/143 [00:09<02:13,  1.02it/s]
Training 7/40:  12%|█▏        | 17/143 [00:09<00:40,  3.11it/s]
Training 7/40:  22%|██▏       | 31/143 [00:09<00:15,  7.12it/s]
Training 7/40:  34%|███▎      | 48/143 [00:09<00:06, 13.64it/s]
Training 7/40:  48%|████▊     | 68/143 [00:09<00:03, 23.74it/s]
Training 7/40:  62%|██████▏   | 89/143 [00:09<00:01, 36.97it/s]
Training 7/40: 100%|██████████| 143/143 [00:10<00:00, 13.60it/s]


Epoch 6: train_loss=0.1053, val_loss=0.1074



Training 8/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/40:   1%|          | 1/143 [00:09<22:28,  9.50s/it]
Training 8/40:   7%|▋         | 10/143 [00:09<01:32,  1.44it/s]
Training 8/40:  15%|█▍        | 21/143 [00:09<00:33,  3.68it/s]
Training 8/40:  22%|██▏       | 32/143 [00:09<00:16,  6.73it/s]
Training 8/40: 100%|██████████| 143/143 [00:10<00:00, 13.83it/s]


Epoch 7: train_loss=0.1020, val_loss=0.1049



Training 9/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/40:   1%|          | 1/143 [00:09<21:48,  9.21s/it]
Training 9/40:   8%|▊         | 11/143 [00:09<01:20,  1.63it/s]
Training 9/40:  13%|█▎        | 19/143 [00:09<00:37,  3.30it/s]
Training 9/40: 100%|██████████| 143/143 [00:09<00:00, 14.33it/s]


Epoch 8: train_loss=0.0979, val_loss=0.1034



Training 10/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/40:   1%|          | 1/143 [00:09<21:41,  9.16s/it]
Training 10/40:   8%|▊         | 11/143 [00:09<01:20,  1.64it/s]
Training 10/40:  16%|█▌        | 23/143 [00:09<00:28,  4.17it/s]
Training 10/40:  25%|██▌       | 36/143 [00:09<00:13,  7.88it/s]
Training 10/40:  39%|███▉      | 56/143 [00:09<00:05, 15.66it/s]
Training 10/40:  52%|█████▏    | 75/143 [00:09<00:02, 25.13it/s]
Training 10/40:  72%|███████▏  | 103/143 [00:09<00:00, 43.45it/s]
Training 10/40: 100%|██████████| 143/143 [00:10<00:00, 13.88it/s]


Epoch 9: train_loss=0.0949, val_loss=0.1029



Training 11/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/40:   1%|          | 1/143 [00:09<21:52,  9.24s/it]
Training 11/40:   5%|▍         | 7/143 [00:09<02:13,  1.02it/s]
Training 11/40:  10%|█         | 15/143 [00:09<00:47,  2.70it/s]
Training 11/40:  33%|███▎      | 47/143 [00:09<00:07, 12.05it/s]
Training 11/40: 100%|██████████| 143/143 [00:10<00:00, 14.18it/s]


Epoch 10: train_loss=0.0921, val_loss=0.0989



Training 12/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/40:   1%|          | 1/143 [00:09<21:24,  9.04s/it]
Training 12/40:   6%|▌         | 8/143 [00:09<01:52,  1.20it/s]
Training 12/40:  13%|█▎        | 18/143 [00:09<00:37,  3.34it/s]
Training 12/40:  22%|██▏       | 31/143 [00:09<00:15,  7.14it/s]
Training 12/40:  34%|███▍      | 49/143 [00:09<00:06, 14.24it/s]
Training 12/40:  49%|████▉     | 70/143 [00:09<00:02, 25.08it/s]
Training 12/40:  67%|██████▋   | 96/143 [00:09<00:01, 42.14it/s]
Training 12/40: 100%|██████████| 143/143 [00:10<00:00, 14.05it/s]


Epoch 11: train_loss=0.0894, val_loss=0.0967



Training 13/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/40:   1%|          | 1/143 [00:09<22:17,  9.42s/it]
Training 13/40:   7%|▋         | 10/143 [00:09<01:31,  1.45it/s]
Training 13/40:  14%|█▍        | 20/143 [00:09<00:35,  3.51it/s]
Training 13/40: 100%|██████████| 143/143 [00:10<00:00, 14.03it/s]


Epoch 12: train_loss=0.0878, val_loss=0.0949



Training 14/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/40:   1%|          | 1/143 [00:09<22:33,  9.53s/it]
Training 14/40:   6%|▋         | 9/143 [00:09<01:44,  1.28it/s]
Training 14/40:  13%|█▎        | 18/143 [00:09<00:40,  3.11it/s]
Training 14/40:  20%|█▉        | 28/143 [00:09<00:19,  5.88it/s]
Training 14/40: 100%|██████████| 143/143 [00:10<00:00, 13.74it/s]


Epoch 13: train_loss=0.0854, val_loss=0.0953



Training 15/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/40:   1%|          | 1/143 [00:09<22:00,  9.30s/it]
Training 15/40:   8%|▊         | 11/143 [00:09<01:21,  1.62it/s]
Training 15/40:  14%|█▍        | 20/143 [00:09<00:35,  3.48it/s]
Training 15/40:  27%|██▋       | 39/143 [00:09<00:11,  8.94it/s]
Training 15/40: 100%|██████████| 143/143 [00:10<00:00, 14.12it/s]


Epoch 14: train_loss=0.0825, val_loss=0.0943



Training 16/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/40:   1%|          | 1/143 [00:09<22:03,  9.32s/it]
Training 16/40:   7%|▋         | 10/143 [00:09<01:30,  1.46it/s]
Training 16/40:  13%|█▎        | 19/143 [00:09<00:37,  3.33it/s]
Training 16/40:  22%|██▏       | 32/143 [00:09<00:15,  7.03it/s]
Training 16/40:  31%|███▏      | 45/143 [00:09<00:08, 11.94it/s]
Training 16/40:  48%|████▊     | 69/143 [00:09<00:03, 24.26it/s]
Training 16/40:  66%|██████▌   | 94/143 [00:09<00:01, 40.40it/s]
Training 16/40: 100%|██████████| 143/143 [00:10<00:00, 13.64it/s]


Epoch 15: train_loss=0.0811, val_loss=0.0916



Training 17/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/40:   1%|          | 1/143 [00:11<26:09, 11.05s/it]
Training 17/40:   5%|▍         | 7/143 [00:11<02:39,  1.17s/it]
Training 17/40:  13%|█▎        | 19/143 [00:11<00:41,  2.99it/s]
Training 17/40:  21%|██        | 30/143 [00:11<00:20,  5.64it/s]
Training 17/40:  33%|███▎      | 47/143 [00:11<00:08, 11.27it/s]
Training 17/40:  46%|████▌     | 66/143 [00:11<00:03, 19.61it/s]
Training 17/40:  64%|██████▍   | 92/143 [00:11<00:01, 34.56it/s]
Training 17/40: 100%|██████████| 143/143 [00:12<00:00, 11.71it/s]


Epoch 16: train_loss=0.0787, val_loss=0.0927



Training 18/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/40:   1%|          | 1/143 [00:09<22:24,  9.47s/it]
Training 18/40:   7%|▋         | 10/143 [00:09<01:32,  1.44it/s]
Training 18/40:  15%|█▍        | 21/143 [00:09<00:33,  3.69it/s]
Training 18/40:  24%|██▍       | 34/143 [00:09<00:14,  7.32it/s]
Training 18/40:  41%|████      | 58/143 [00:09<00:05, 16.49it/s]
Training 18/40:  54%|█████▍    | 77/143 [00:09<00:02, 25.68it/s]
Training 18/40:  71%|███████▏  | 102/143 [00:10<00:00, 41.49it/s]
Training 18/40: 100%|██████████| 143/143 [00:10<00:00, 13.49it/s]


Epoch 17: train_loss=0.0787, val_loss=0.0925



Training 19/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/40:   1%|          | 1/143 [00:09<22:57,  9.70s/it]
Training 19/40:   6%|▌         | 8/143 [00:09<02:00,  1.12it/s]
Training 19/40:  13%|█▎        | 18/143 [00:09<00:40,  3.12it/s]
Training 19/40:  21%|██        | 30/143 [00:10<00:17,  6.41it/s]
Training 19/40: 100%|██████████| 143/143 [00:10<00:00, 13.53it/s]


Epoch 18: train_loss=0.0765, val_loss=0.0904



Training 20/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/40:   1%|          | 1/143 [00:09<21:25,  9.05s/it]
Training 20/40:   5%|▍         | 7/143 [00:09<02:10,  1.04it/s]
Training 20/40:  10%|█         | 15/143 [00:09<00:46,  2.75it/s]
Training 20/40: 100%|██████████| 143/143 [00:09<00:00, 14.57it/s]


Epoch 19: train_loss=0.0767, val_loss=0.0899



Training 21/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/40:   1%|          | 1/143 [00:09<21:39,  9.15s/it]
Training 21/40:   8%|▊         | 11/143 [00:09<01:20,  1.64it/s]
Training 21/40:  14%|█▍        | 20/143 [00:09<00:34,  3.54it/s]
Training 21/40:  23%|██▎       | 33/143 [00:09<00:15,  7.30it/s]
Training 21/40: 100%|██████████| 143/143 [00:09<00:00, 14.33it/s]


Epoch 20: train_loss=0.0742, val_loss=0.0908



Training 22/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/40:   1%|          | 1/143 [00:09<22:14,  9.40s/it]
Training 22/40:   7%|▋         | 10/143 [00:09<01:31,  1.45it/s]
Training 22/40:  13%|█▎        | 18/143 [00:09<00:40,  3.09it/s]
Training 22/40: 100%|██████████| 143/143 [00:10<00:00, 14.02it/s]


Epoch 21: train_loss=0.0743, val_loss=0.0908



Training 23/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/40:   1%|          | 1/143 [00:09<22:57,  9.70s/it]
Training 23/40:   6%|▋         | 9/143 [00:09<01:46,  1.26it/s]
Training 23/40:  14%|█▍        | 20/143 [00:09<00:35,  3.47it/s]
Training 23/40:  21%|██        | 30/143 [00:10<00:18,  6.18it/s]
Training 23/40:  34%|███▎      | 48/143 [00:10<00:07, 12.86it/s]
Training 23/40:  49%|████▉     | 70/143 [00:10<00:03, 23.62it/s]
Training 23/40:  64%|██████▎   | 91/143 [00:10<00:01, 36.44it/s]
Training 23/40: 100%|██████████| 143/143 [00:10<00:00, 13.17it/s]


Epoch 22: train_loss=0.0718, val_loss=0.0896



Training 24/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/40:   1%|          | 1/143 [00:09<22:40,  9.58s/it]
Training 24/40:   7%|▋         | 10/143 [00:09<01:33,  1.42it/s]
Training 24/40:  13%|█▎        | 18/143 [00:09<00:41,  3.03it/s]
Training 24/40: 100%|██████████| 143/143 [00:10<00:00, 13.75it/s]


Epoch 23: train_loss=0.0705, val_loss=0.0911



Training 25/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/40:   1%|          | 1/143 [00:09<22:02,  9.32s/it]
Training 25/40:   6%|▋         | 9/143 [00:09<01:42,  1.31it/s]
Training 25/40:  13%|█▎        | 18/143 [00:09<00:39,  3.18it/s]
Training 25/40:  20%|██        | 29/143 [00:09<00:18,  6.30it/s]
Training 25/40:  32%|███▏      | 46/143 [00:09<00:07, 12.83it/s]
Training 25/40:  47%|████▋     | 67/143 [00:09<00:03, 23.47it/s]
Training 25/40:  61%|██████    | 87/143 [00:09<00:01, 36.09it/s]
Training 25/40: 100%|██████████| 143/143 [00:10<00:00, 13.63it/s]


Epoch 24: train_loss=0.0691, val_loss=0.0898



Training 26/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/40:   1%|          | 1/143 [00:09<21:44,  9.19s/it]
Training 26/40:   8%|▊         | 12/143 [00:09<01:13,  1.79it/s]
Training 26/40:  15%|█▌        | 22/143 [00:09<00:31,  3.89it/s]
Training 26/40: 100%|██████████| 143/143 [00:09<00:00, 14.41it/s]


Epoch 25: train_loss=0.0679, val_loss=0.0891



Training 27/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/40:   1%|          | 1/143 [00:09<21:30,  9.09s/it]
Training 27/40:   6%|▋         | 9/143 [00:09<01:39,  1.35it/s]
Training 27/40:  15%|█▍        | 21/143 [00:09<00:31,  3.90it/s]
Training 27/40: 100%|██████████| 143/143 [00:09<00:00, 14.48it/s]


Epoch 26: train_loss=0.0672, val_loss=0.0894



Training 28/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/40:   1%|          | 1/143 [00:09<22:27,  9.49s/it]
Training 28/40:   8%|▊         | 11/143 [00:09<01:23,  1.58it/s]
Training 28/40:  15%|█▌        | 22/143 [00:09<00:31,  3.83it/s]
Training 28/40:  26%|██▌       | 37/143 [00:09<00:13,  8.02it/s]
Training 28/40: 100%|██████████| 143/143 [00:10<00:00, 13.82it/s]


Epoch 27: train_loss=0.0671, val_loss=0.0913



Training 29/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/40:   1%|          | 1/143 [00:09<22:08,  9.35s/it]
Training 29/40:   6%|▌         | 8/143 [00:09<01:56,  1.16it/s]
Training 29/40:  13%|█▎        | 19/143 [00:09<00:36,  3.44it/s]
Training 29/40:  22%|██▏       | 32/143 [00:09<00:15,  7.12it/s]
Training 29/40: 100%|██████████| 143/143 [00:10<00:00, 14.00it/s]


Epoch 28: train_loss=0.0664, val_loss=0.0906



Training 30/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/40:   1%|          | 1/143 [00:09<21:54,  9.26s/it]
Training 30/40:   6%|▌         | 8/143 [00:09<01:55,  1.17it/s]
Training 30/40:  20%|█▉        | 28/143 [00:09<00:21,  5.39it/s]
Training 30/40: 100%|██████████| 143/143 [00:09<00:00, 14.31it/s]


Epoch 29: train_loss=0.0653, val_loss=0.0933



Training 31/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 31/40:   1%|          | 1/143 [00:09<21:35,  9.12s/it]
Training 31/40:   6%|▋         | 9/143 [00:09<01:40,  1.34it/s]
Training 31/40:  15%|█▍        | 21/143 [00:09<00:31,  3.89it/s]
Training 31/40:  21%|██        | 30/143 [00:09<00:17,  6.45it/s]
Training 31/40: 100%|██████████| 143/143 [00:09<00:00, 14.33it/s]


Epoch 30: train_loss=0.0631, val_loss=0.0895



Training 32/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 32/40:   1%|          | 1/143 [00:09<21:33,  9.11s/it]
Training 32/40:   6%|▌         | 8/143 [00:09<01:53,  1.19it/s]
Training 32/40:  13%|█▎        | 19/143 [00:09<00:35,  3.54it/s]
Training 32/40:  22%|██▏       | 32/143 [00:09<00:15,  7.30it/s]
Training 32/40:  36%|███▌      | 51/143 [00:09<00:06, 14.76it/s]
Training 32/40:  49%|████▉     | 70/143 [00:09<00:02, 24.42it/s]
Training 32/40:  64%|██████▍   | 92/143 [00:09<00:01, 38.68it/s]
Training 32/40: 100%|██████████| 143/143 [00:10<00:00, 13.90it/s]


Epoch 31: train_loss=0.0637, val_loss=0.0941



Training 33/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 33/40:   1%|          | 1/143 [00:09<21:23,  9.04s/it]
Training 33/40:   7%|▋         | 10/143 [00:09<01:28,  1.51it/s]
Training 33/40:  13%|█▎        | 18/143 [00:09<00:38,  3.21it/s]
Training 33/40:  45%|████▌     | 65/143 [00:09<00:04, 17.26it/s]
Training 33/40: 100%|██████████| 143/143 [00:09<00:00, 14.54it/s]


Epoch 32: train_loss=0.0631, val_loss=0.0897



Training 34/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 34/40:   1%|          | 1/143 [00:09<21:42,  9.17s/it]
Training 34/40:   8%|▊         | 12/143 [00:09<01:13,  1.79it/s]
Training 34/40:  13%|█▎        | 19/143 [00:09<00:38,  3.25it/s]
Training 34/40:  36%|███▌      | 51/143 [00:09<00:07, 12.63it/s]
Training 34/40: 100%|██████████| 143/143 [00:09<00:00, 14.32it/s]


Epoch 33: train_loss=0.0617, val_loss=0.0875



Training 35/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 35/40:   1%|          | 1/143 [00:09<21:33,  9.11s/it]
Training 35/40:   8%|▊         | 12/143 [00:09<01:12,  1.81it/s]
Training 35/40:  14%|█▍        | 20/143 [00:09<00:35,  3.49it/s]
Training 35/40:  31%|███       | 44/143 [00:09<00:09, 10.54it/s]
Training 35/40: 100%|██████████| 143/143 [00:09<00:00, 14.40it/s]


Epoch 34: train_loss=0.0614, val_loss=0.1006



Training 36/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 36/40:   1%|          | 1/143 [00:08<21:17,  9.00s/it]
Training 36/40:   7%|▋         | 10/143 [00:09<01:27,  1.52it/s]
Training 36/40:  15%|█▍        | 21/143 [00:09<00:31,  3.88it/s]
Training 36/40:  20%|██        | 29/143 [00:09<00:18,  6.18it/s]
Training 36/40: 100%|██████████| 143/143 [00:09<00:00, 14.44it/s]


Epoch 35: train_loss=0.0595, val_loss=0.0900



Training 37/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 37/40:   1%|          | 1/143 [00:09<21:46,  9.20s/it]
Training 37/40:   7%|▋         | 10/143 [00:09<01:29,  1.48it/s]
Training 37/40:  13%|█▎        | 18/143 [00:09<00:39,  3.16it/s]
Training 37/40:  21%|██        | 30/143 [00:09<00:17,  6.61it/s]
Training 37/40:  30%|███       | 43/143 [00:09<00:08, 11.61it/s]
Training 37/40:  44%|████▍     | 63/143 [00:09<00:03, 21.90it/s]
Training 37/40:  62%|██████▏   | 88/143 [00:09<00:01, 38.38it/s]
Training 37/40: 100%|██████████| 143/143 [00:10<00:00, 13.77it/s]


Epoch 36: train_loss=0.0602, val_loss=0.0905



Training 38/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 38/40:   1%|          | 1/143 [00:09<22:26,  9.48s/it]
Training 38/40:   7%|▋         | 10/143 [00:09<01:32,  1.44it/s]
Training 38/40:  15%|█▍        | 21/143 [00:09<00:33,  3.69it/s]
Training 38/40: 100%|██████████| 143/143 [00:10<00:00, 13.93it/s]


Epoch 37: train_loss=0.0580, val_loss=0.0916



Training 39/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 39/40:   1%|          | 1/143 [00:09<21:25,  9.05s/it]
Training 39/40:   6%|▌         | 8/143 [00:09<01:53,  1.19it/s]
Training 39/40:  12%|█▏        | 17/143 [00:09<00:40,  3.12it/s]
Training 39/40:  24%|██▍       | 34/143 [00:09<00:13,  8.12it/s]
Training 39/40: 100%|██████████| 143/143 [00:09<00:00, 14.46it/s]


Epoch 38: train_loss=0.0604, val_loss=0.0881



Training 40/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 40/40:   1%|          | 1/143 [00:09<22:13,  9.39s/it]
Training 40/40:   7%|▋         | 10/143 [00:09<01:31,  1.45it/s]
Training 40/40:  14%|█▍        | 20/143 [00:09<00:35,  3.51it/s]
Training 40/40:  43%|████▎     | 62/143 [00:09<00:05, 15.57it/s]
Training 40/40: 100%|██████████| 143/143 [00:10<00:00, 14.00it/s]


Epoch 39: train_loss=0.0570, val_loss=0.0890


2025-06-02 19:01:53,811 - __main__ - INFO - Saved mlp probe for layer 6 to cache\probes\phase1_supervisedVIT_viewpoint_probing\mlp_layer_6_probe.pth
 50%|█████     | 3/6 [1:36:05<1:36:03, 1921.09s/it]2025-06-02 19:02:13,528 - __main__ - INFO - Processing layer 8...
2025-06-02 19:02:13,529 - src.probing.data_preprocessing - INFO - Extracting features from 1149 batches...

Extracting features:   0%|          | 0/1149 [00:00<?, ?it/s]
Extracting features:   0%|          | 1/1149 [00:07<2:16:53,  7.15s/it]
Extracting features:   0%|          | 2/1149 [00:07<59:10,  3.10s/it]  
Extracting features:   0%|          | 3/1149 [00:07<34:26,  1.80s/it]
Extracting features:   0%|          | 4/1149 [00:07<22:43,  1.19s/it]
Extracting features:   0%|          | 5/1149 [00:08<16:18,  1.17it/s]
Extracting features:   1%|          | 6/1149 [00:08<12:26,  1.53it/s]
Extracting features:   1%|          | 7/1149 [00:08<09:55,  1.92it/s]
Extracting features:   1%|          | 8/1149 [00:08<08:18,  2.29it/s]


Epoch 0: train_loss=1.5421, val_loss=0.6302



Training 2/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/30:   1%|          | 1/143 [00:09<21:40,  9.16s/it]
Training 2/30:  10%|▉         | 14/143 [00:09<01:01,  2.10it/s]
Training 2/30:  23%|██▎       | 33/143 [00:09<00:17,  6.13it/s]
Training 2/30:  43%|████▎     | 61/143 [00:09<00:05, 14.26it/s]
Training 2/30:  63%|██████▎   | 90/143 [00:09<00:02, 25.47it/s]
Training 2/30: 100%|██████████| 143/143 [00:10<00:00, 14.13it/s]


Epoch 1: train_loss=0.5196, val_loss=0.4253



Training 3/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/30:   1%|          | 1/143 [00:09<22:16,  9.41s/it]
Training 3/30:  13%|█▎        | 19/143 [00:09<00:44,  2.80it/s]
Training 3/30:  28%|██▊       | 40/143 [00:09<00:14,  7.12it/s]
Training 3/30:  45%|████▌     | 65/143 [00:09<00:05, 14.15it/s]
Training 3/30:  64%|██████▍   | 92/143 [00:09<00:02, 24.28it/s]
Training 3/30: 100%|██████████| 143/143 [00:10<00:00, 13.77it/s]


Epoch 2: train_loss=0.3776, val_loss=0.3301



Training 4/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/30:   1%|          | 1/143 [00:09<23:35,  9.97s/it]
Training 4/30:  11%|█         | 16/143 [00:10<00:57,  2.22it/s]
Training 4/30:  23%|██▎       | 33/143 [00:10<00:19,  5.53it/s]
Training 4/30:  41%|████▏     | 59/143 [00:10<00:06, 12.52it/s]
Training 4/30:  61%|██████    | 87/143 [00:10<00:02, 22.62it/s]
Training 4/30: 100%|██████████| 143/143 [00:10<00:00, 13.09it/s]


Epoch 3: train_loss=0.3080, val_loss=0.2788



Training 5/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/30:   1%|          | 1/143 [00:09<23:04,  9.75s/it]
Training 5/30:  10%|█         | 15/143 [00:09<01:00,  2.12it/s]
Training 5/30:  22%|██▏       | 31/143 [00:09<00:21,  5.30it/s]
Training 5/30:  34%|███▎      | 48/143 [00:10<00:09,  9.91it/s]
Training 5/30:  51%|█████     | 73/143 [00:10<00:03, 19.14it/s]
Training 5/30:  71%|███████   | 101/143 [00:10<00:01, 32.69it/s]
Training 5/30: 100%|██████████| 143/143 [00:10<00:00, 13.28it/s]


Epoch 4: train_loss=0.2628, val_loss=0.2514



Training 6/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/30:   1%|          | 1/143 [00:10<25:20, 10.71s/it]
Training 6/30:  10%|█         | 15/143 [00:10<01:06,  1.93it/s]
Training 6/30:  27%|██▋       | 38/143 [00:10<00:17,  6.14it/s]
Training 6/30: 100%|██████████| 143/143 [00:11<00:00, 12.50it/s]


Epoch 5: train_loss=0.2310, val_loss=0.2228



Training 7/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/30:   1%|          | 1/143 [00:10<25:54, 10.95s/it]
Training 7/30:   8%|▊         | 11/143 [00:11<01:35,  1.38it/s]
Training 7/30:  22%|██▏       | 32/143 [00:11<00:21,  5.15it/s]
Training 7/30:  41%|████▏     | 59/143 [00:11<00:07, 11.80it/s]
Training 7/30:  68%|██████▊   | 97/143 [00:11<00:01, 24.55it/s]
Training 7/30: 100%|██████████| 143/143 [00:11<00:00, 12.06it/s]


Epoch 6: train_loss=0.2099, val_loss=0.1978



Training 8/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/30:   1%|          | 1/143 [00:10<24:21, 10.29s/it]
Training 8/30:  10%|▉         | 14/143 [00:10<01:08,  1.88it/s]
Training 8/30:  27%|██▋       | 38/143 [00:10<00:16,  6.44it/s]
Training 8/30:  42%|████▏     | 60/143 [00:10<00:06, 12.13it/s]
Training 8/30:  61%|██████    | 87/143 [00:10<00:02, 21.59it/s]
Training 8/30: 100%|██████████| 143/143 [00:11<00:00, 12.69it/s]


Epoch 7: train_loss=0.1898, val_loss=0.1823



Training 9/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/30:   1%|          | 1/143 [00:10<25:46, 10.89s/it]
Training 9/30:  13%|█▎        | 18/143 [00:10<00:54,  2.29it/s]
Training 9/30:  25%|██▌       | 36/143 [00:11<00:19,  5.52it/s]
Training 9/30:  43%|████▎     | 62/143 [00:11<00:06, 11.94it/s]
Training 9/30:  62%|██████▏   | 89/143 [00:11<00:02, 20.93it/s]
Training 9/30: 100%|██████████| 143/143 [00:11<00:00, 12.11it/s]


Epoch 8: train_loss=0.1778, val_loss=0.1768



Training 10/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/30:   1%|          | 1/143 [00:10<25:23, 10.73s/it]
Training 10/30:   8%|▊         | 12/143 [00:10<01:25,  1.54it/s]
Training 10/30:  20%|██        | 29/143 [00:10<00:24,  4.63it/s]
Training 10/30:  35%|███▍      | 50/143 [00:11<00:09,  9.88it/s]
Training 10/30:  60%|██████    | 86/143 [00:11<00:02, 22.20it/s]
Training 10/30: 100%|██████████| 143/143 [00:11<00:00, 12.25it/s]


Epoch 9: train_loss=0.1659, val_loss=0.1593



Training 11/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/30:   1%|          | 1/143 [00:10<23:52, 10.09s/it]
Training 11/30:  10%|▉         | 14/143 [00:10<01:07,  1.91it/s]
Training 11/30:  24%|██▍       | 35/143 [00:10<00:18,  5.97it/s]
Training 11/30:  43%|████▎     | 61/143 [00:10<00:06, 12.87it/s]
Training 11/30:  63%|██████▎   | 90/143 [00:10<00:02, 23.20it/s]
Training 11/30: 100%|██████████| 143/143 [00:11<00:00, 12.97it/s]


Epoch 10: train_loss=0.1593, val_loss=0.1700



Training 12/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/30:   1%|          | 1/143 [00:09<23:11,  9.80s/it]
Training 12/30:   8%|▊         | 12/143 [00:09<01:17,  1.68it/s]
Training 12/30:  25%|██▌       | 36/143 [00:10<00:16,  6.47it/s]
Training 12/30:  43%|████▎     | 61/143 [00:10<00:06, 13.26it/s]
Training 12/30:  62%|██████▏   | 89/143 [00:10<00:02, 23.46it/s]
Training 12/30: 100%|██████████| 143/143 [00:10<00:00, 13.34it/s]


Epoch 11: train_loss=0.1563, val_loss=0.1469



Training 13/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/30:   1%|          | 1/143 [00:09<22:46,  9.62s/it]
Training 13/30:  12%|█▏        | 17/143 [00:09<00:51,  2.44it/s]
Training 13/30:  24%|██▍       | 35/143 [00:09<00:17,  6.07it/s]
Training 13/30:  43%|████▎     | 61/143 [00:09<00:06, 13.28it/s]
Training 13/30:  64%|██████▍   | 92/143 [00:10<00:02, 24.78it/s]
Training 13/30: 100%|██████████| 143/143 [00:10<00:00, 13.55it/s]


Epoch 12: train_loss=0.1448, val_loss=0.1408



Training 14/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/30:   1%|          | 1/143 [00:08<21:03,  8.90s/it]
Training 14/30:   9%|▉         | 13/143 [00:09<01:04,  2.00it/s]
Training 14/30:  23%|██▎       | 33/143 [00:09<00:17,  6.37it/s]
Training 14/30:  39%|███▉      | 56/143 [00:09<00:06, 13.19it/s]
Training 14/30:  64%|██████▍   | 92/143 [00:09<00:01, 27.66it/s]
Training 14/30: 100%|██████████| 143/143 [00:09<00:00, 14.53it/s]


Epoch 13: train_loss=0.1411, val_loss=0.1369



Training 15/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/30:   1%|          | 1/143 [00:09<22:57,  9.70s/it]
Training 15/30:  10%|█         | 15/143 [00:09<01:00,  2.13it/s]
Training 15/30:  22%|██▏       | 31/143 [00:09<00:20,  5.33it/s]
Training 15/30:  42%|████▏     | 60/143 [00:10<00:06, 13.34it/s]
Training 15/30:  61%|██████    | 87/143 [00:10<00:02, 23.23it/s]
Training 15/30: 100%|██████████| 143/143 [00:10<00:00, 13.47it/s]


Epoch 14: train_loss=0.1380, val_loss=0.1353



Training 16/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/30:   1%|          | 1/143 [00:09<22:30,  9.51s/it]
Training 16/30:   8%|▊         | 12/143 [00:09<01:15,  1.73it/s]
Training 16/30:  23%|██▎       | 33/143 [00:09<00:18,  6.04it/s]
Training 16/30:  37%|███▋      | 53/143 [00:09<00:07, 11.61it/s]
Training 16/30:  57%|█████▋    | 82/143 [00:09<00:02, 22.56it/s]
Training 16/30: 100%|██████████| 143/143 [00:10<00:00, 13.66it/s]


Epoch 15: train_loss=0.1339, val_loss=0.1310



Training 17/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/30:   1%|          | 1/143 [00:09<22:09,  9.36s/it]
Training 17/30:   8%|▊         | 12/143 [00:09<01:14,  1.76it/s]
Training 17/30:  19%|█▉        | 27/143 [00:09<00:23,  4.86it/s]
Training 17/30:  33%|███▎      | 47/143 [00:09<00:09, 10.54it/s]
Training 17/30:  47%|████▋     | 67/143 [00:09<00:04, 18.09it/s]
Training 17/30:  67%|██████▋   | 96/143 [00:09<00:01, 32.73it/s]
Training 17/30: 100%|██████████| 143/143 [00:10<00:00, 13.79it/s]


Epoch 16: train_loss=0.1344, val_loss=0.1320



Training 18/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/30:   1%|          | 1/143 [00:09<21:38,  9.14s/it]
Training 18/30:   9%|▉         | 13/143 [00:09<01:06,  1.95it/s]
Training 18/30:  25%|██▌       | 36/143 [00:09<00:15,  6.85it/s]
Training 18/30:  42%|████▏     | 60/143 [00:09<00:06, 13.78it/s]
Training 18/30:  71%|███████▏  | 102/143 [00:09<00:01, 30.33it/s]
Training 18/30: 100%|██████████| 143/143 [00:10<00:00, 14.22it/s]


Epoch 17: train_loss=0.1314, val_loss=0.1346



Training 19/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/30:   1%|          | 1/143 [00:09<21:38,  9.15s/it]
Training 19/30:  13%|█▎        | 18/143 [00:09<00:45,  2.72it/s]
Training 19/30:  30%|███       | 43/143 [00:09<00:12,  8.03it/s]
Training 19/30:  43%|████▎     | 62/143 [00:09<00:06, 13.44it/s]
Training 19/30:  64%|██████▍   | 92/143 [00:09<00:02, 25.11it/s]
Training 19/30: 100%|██████████| 143/143 [00:10<00:00, 14.20it/s]


Epoch 18: train_loss=0.1273, val_loss=0.1260



Training 20/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/30:   1%|          | 1/143 [00:09<22:20,  9.44s/it]
Training 20/30:  13%|█▎        | 19/143 [00:09<00:44,  2.79it/s]
Training 20/30:  24%|██▍       | 35/143 [00:09<00:17,  6.06it/s]
Training 20/30:  39%|███▉      | 56/143 [00:09<00:07, 11.95it/s]
Training 20/30:  59%|█████▉    | 85/143 [00:09<00:02, 22.97it/s]
Training 20/30: 100%|██████████| 143/143 [00:10<00:00, 13.80it/s]


Epoch 19: train_loss=0.1262, val_loss=0.1271



Training 21/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/30:   1%|          | 1/143 [00:09<21:35,  9.12s/it]
Training 21/30:   9%|▉         | 13/143 [00:09<01:06,  1.96it/s]
Training 21/30:  24%|██▍       | 34/143 [00:09<00:16,  6.44it/s]
Training 21/30:  39%|███▉      | 56/143 [00:09<00:06, 12.80it/s]
Training 21/30:  59%|█████▉    | 85/143 [00:09<00:02, 24.12it/s]
Training 21/30: 100%|██████████| 143/143 [00:10<00:00, 14.22it/s]


Epoch 20: train_loss=0.1248, val_loss=0.1222



Training 22/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/30:   1%|          | 1/143 [00:09<22:27,  9.49s/it]
Training 22/30:  11%|█         | 16/143 [00:09<00:54,  2.33it/s]
Training 22/30:  22%|██▏       | 31/143 [00:09<00:20,  5.39it/s]
Training 22/30:  38%|███▊      | 55/143 [00:09<00:07, 12.13it/s]
Training 22/30:  54%|█████▍    | 77/143 [00:09<00:03, 20.30it/s]
Training 22/30: 100%|██████████| 143/143 [00:10<00:00, 13.61it/s]


Epoch 21: train_loss=0.1255, val_loss=0.1261



Training 23/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/30:   1%|          | 1/143 [00:09<22:48,  9.64s/it]
Training 23/30:  10%|█         | 15/143 [00:09<00:59,  2.15it/s]
Training 23/30:  21%|██        | 30/143 [00:09<00:21,  5.16it/s]
Training 23/30:  37%|███▋      | 53/143 [00:09<00:07, 11.53it/s]
Training 23/30:  57%|█████▋    | 81/143 [00:10<00:02, 21.94it/s]
Training 23/30: 100%|██████████| 143/143 [00:10<00:00, 13.50it/s]


Epoch 22: train_loss=0.1243, val_loss=0.1228



Training 24/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/30:   1%|          | 1/143 [00:09<22:56,  9.70s/it]
Training 24/30:  17%|█▋        | 24/143 [00:09<00:34,  3.44it/s]
Training 24/30:  29%|██▊       | 41/143 [00:09<00:14,  6.82it/s]
Training 24/30:  43%|████▎     | 61/143 [00:10<00:06, 12.26it/s]
Training 24/30:  62%|██████▏   | 88/143 [00:10<00:02, 22.19it/s]
Training 24/30: 100%|██████████| 143/143 [00:10<00:00, 13.43it/s]


Epoch 23: train_loss=0.1201, val_loss=0.1206



Training 25/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/30:   1%|          | 1/143 [00:09<21:44,  9.19s/it]
Training 25/30:   8%|▊         | 12/143 [00:09<01:13,  1.79it/s]
Training 25/30:  20%|██        | 29/143 [00:09<00:21,  5.39it/s]
Training 25/30:  36%|███▋      | 52/143 [00:09<00:07, 12.04it/s]
Training 25/30:  60%|██████    | 86/143 [00:09<00:02, 25.33it/s]
Training 25/30: 100%|██████████| 143/143 [00:10<00:00, 14.03it/s]


Epoch 24: train_loss=0.1204, val_loss=0.1320



Training 26/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/30:   1%|          | 1/143 [00:09<21:40,  9.16s/it]
Training 26/30:  10%|█         | 15/143 [00:09<00:56,  2.25it/s]
Training 26/30:  21%|██        | 30/143 [00:09<00:20,  5.41it/s]
Training 26/30: 100%|██████████| 143/143 [00:09<00:00, 14.42it/s]


Epoch 25: train_loss=0.1219, val_loss=0.1205



Training 27/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/30:   1%|          | 1/143 [00:10<23:42, 10.02s/it]
Training 27/30:  14%|█▍        | 20/143 [00:10<00:44,  2.77it/s]
Training 27/30:  27%|██▋       | 39/143 [00:10<00:16,  6.43it/s]
Training 27/30:  41%|████▏     | 59/143 [00:10<00:07, 11.70it/s]
Training 27/30:  66%|██████▌   | 94/143 [00:10<00:02, 24.37it/s]
Training 27/30: 100%|██████████| 143/143 [00:10<00:00, 13.03it/s]


Epoch 26: train_loss=0.1209, val_loss=0.1272



Training 28/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/30:   1%|          | 1/143 [00:09<22:31,  9.52s/it]
Training 28/30:  13%|█▎        | 18/143 [00:09<00:47,  2.62it/s]
Training 28/30:  29%|██▊       | 41/143 [00:09<00:13,  7.31it/s]
Training 28/30:  45%|████▍     | 64/143 [00:09<00:05, 13.69it/s]
Training 28/30:  71%|███████   | 101/143 [00:09<00:01, 27.70it/s]
Training 28/30: 100%|██████████| 143/143 [00:10<00:00, 13.71it/s]


Epoch 27: train_loss=0.1190, val_loss=0.1221



Training 29/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/30:   1%|          | 1/143 [00:09<22:05,  9.33s/it]
Training 29/30:  11%|█         | 16/143 [00:09<00:53,  2.37it/s]
Training 29/30:  27%|██▋       | 38/143 [00:09<00:15,  6.95it/s]
Training 29/30:  45%|████▍     | 64/143 [00:09<00:05, 14.35it/s]
Training 29/30:  63%|██████▎   | 90/143 [00:09<00:02, 24.16it/s]
Training 29/30: 100%|██████████| 143/143 [00:10<00:00, 13.91it/s]


Epoch 28: train_loss=0.1168, val_loss=0.1176



Training 30/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/30:   1%|          | 1/143 [00:09<22:09,  9.36s/it]
Training 30/30:  15%|█▍        | 21/143 [00:09<00:39,  3.11it/s]
Training 30/30:  26%|██▌       | 37/143 [00:09<00:16,  6.40it/s]
Training 30/30:  41%|████      | 58/143 [00:09<00:06, 12.33it/s]
Training 30/30:  61%|██████    | 87/143 [00:09<00:02, 23.42it/s]
Training 30/30: 100%|██████████| 143/143 [00:10<00:00, 13.87it/s]


Epoch 29: train_loss=0.1182, val_loss=0.1270


2025-06-02 19:20:08,014 - __main__ - INFO - Saved linear probe for layer 8 to cache\probes\phase1_supervisedVIT_viewpoint_probing\linear_layer_8_probe.pth
2025-06-02 19:20:27,852 - __main__ - INFO - Running mlp probe on layer 8...
2025-06-02 19:20:27,852 - __main__ - INFO - Running mlp probe on layer 8 (feature_dim: 768)

Training 1/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 1/40:   1%|          | 1/143 [00:09<21:34,  9.12s/it]
Training 1/40:   8%|▊         | 11/143 [00:09<01:19,  1.65it/s]
Training 1/40:  15%|█▍        | 21/143 [00:09<00:32,  3.77it/s]
Training 1/40: 100%|██████████| 143/143 [00:09<00:00, 14.49it/s]


Epoch 0: train_loss=1.3162, val_loss=0.1576



Training 2/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/40:   1%|          | 1/143 [00:09<23:04,  9.75s/it]
Training 2/40:   6%|▋         | 9/143 [00:09<01:46,  1.26it/s]
Training 2/40:  13%|█▎        | 18/143 [00:09<00:40,  3.05it/s]
Training 2/40:  34%|███▎      | 48/143 [00:10<00:08, 11.37it/s]
Training 2/40: 100%|██████████| 143/143 [00:10<00:00, 13.51it/s]


Epoch 1: train_loss=0.1488, val_loss=0.1380



Training 3/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/40:   1%|          | 1/143 [00:09<21:18,  9.00s/it]
Training 3/40:   6%|▋         | 9/143 [00:09<01:38,  1.36it/s]
Training 3/40:  15%|█▍        | 21/143 [00:09<00:30,  3.94it/s]
Training 3/40: 100%|██████████| 143/143 [00:09<00:00, 14.62it/s]


Epoch 2: train_loss=0.1336, val_loss=0.1263



Training 4/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/40:   1%|          | 1/143 [00:09<22:38,  9.57s/it]
Training 4/40:   5%|▍         | 7/143 [00:09<02:18,  1.02s/it]
Training 4/40:  13%|█▎        | 19/143 [00:09<00:36,  3.43it/s]
Training 4/40:  22%|██▏       | 32/143 [00:09<00:15,  7.02it/s]
Training 4/40:  34%|███▎      | 48/143 [00:09<00:07, 12.97it/s]
Training 4/40:  49%|████▉     | 70/143 [00:10<00:03, 23.85it/s]
Training 4/40:  69%|██████▉   | 99/143 [00:10<00:01, 42.39it/s]
Training 4/40: 100%|██████████| 143/143 [00:10<00:00, 13.28it/s]


Epoch 3: train_loss=0.1247, val_loss=0.1204



Training 5/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/40:   1%|          | 1/143 [00:09<21:51,  9.24s/it]
Training 5/40:   6%|▌         | 8/143 [00:09<01:55,  1.17it/s]
Training 5/40:  14%|█▍        | 20/143 [00:09<00:33,  3.70it/s]
Training 5/40:  22%|██▏       | 32/143 [00:09<00:15,  7.12it/s]
Training 5/40: 100%|██████████| 143/143 [00:10<00:00, 14.17it/s]


Epoch 4: train_loss=0.1209, val_loss=0.1188



Training 6/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/40:   1%|          | 1/143 [00:09<22:15,  9.41s/it]
Training 6/40:   6%|▌         | 8/143 [00:09<01:57,  1.15it/s]
Training 6/40:  10%|█         | 15/143 [00:09<00:49,  2.59it/s]
Training 6/40: 100%|██████████| 143/143 [00:10<00:00, 14.06it/s]


Epoch 5: train_loss=0.1180, val_loss=0.1150



Training 7/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/40:   1%|          | 1/143 [00:09<22:27,  9.49s/it]
Training 7/40:   8%|▊         | 12/143 [00:09<01:15,  1.73it/s]
Training 7/40:  14%|█▍        | 20/143 [00:09<00:36,  3.35it/s]
Training 7/40: 100%|██████████| 143/143 [00:10<00:00, 13.89it/s]


Epoch 6: train_loss=0.1154, val_loss=0.1138



Training 8/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/40:   1%|          | 1/143 [00:09<22:10,  9.37s/it]
Training 8/40:   6%|▌         | 8/143 [00:09<01:56,  1.15it/s]
Training 8/40:  14%|█▍        | 20/143 [00:09<00:33,  3.64it/s]
Training 8/40:  22%|██▏       | 31/143 [00:09<00:16,  6.73it/s]
Training 8/40:  33%|███▎      | 47/143 [00:09<00:07, 12.82it/s]
Training 8/40:  45%|████▌     | 65/143 [00:09<00:03, 21.81it/s]
Training 8/40:  69%|██████▊   | 98/143 [00:09<00:01, 43.62it/s]
Training 8/40: 100%|██████████| 143/143 [00:10<00:00, 13.58it/s]


Epoch 7: train_loss=0.1125, val_loss=0.1139



Training 9/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/40:   1%|          | 1/143 [00:09<21:56,  9.27s/it]
Training 9/40:   7%|▋         | 10/143 [00:09<01:30,  1.47it/s]
Training 9/40:  13%|█▎        | 18/143 [00:09<00:39,  3.13it/s]
Training 9/40:  24%|██▍       | 34/143 [00:09<00:14,  7.72it/s]
Training 9/40: 100%|██████████| 143/143 [00:10<00:00, 14.05it/s]


Epoch 8: train_loss=0.1114, val_loss=0.1106



Training 10/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/40:   1%|          | 1/143 [00:09<22:24,  9.47s/it]
Training 10/40:   6%|▌         | 8/143 [00:09<01:57,  1.14it/s]
Training 10/40:  11%|█         | 16/143 [00:09<00:45,  2.77it/s]
Training 10/40:  20%|██        | 29/143 [00:09<00:17,  6.43it/s]
Training 10/40:  29%|██▉       | 42/143 [00:09<00:08, 11.30it/s]
Training 10/40:  45%|████▍     | 64/143 [00:09<00:03, 22.38it/s]
Training 10/40:  61%|██████    | 87/143 [00:10<00:01, 36.97it/s]
Training 10/40:  78%|███████▊  | 111/143 [00:10<00:00, 55.52it/s]
Training 10/40: 100%|██████████| 143/143 [00:10<00:00, 13.35it/s]


Epoch 9: train_loss=0.1086, val_loss=0.1114



Training 11/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/40:   1%|          | 1/143 [00:08<21:17,  9.00s/it]
Training 11/40:   8%|▊         | 12/143 [00:09<01:11,  1.83it/s]
Training 11/40:  17%|█▋        | 25/143 [00:09<00:25,  4.62it/s]
Training 11/40:  26%|██▌       | 37/143 [00:09<00:13,  8.09it/s]
Training 11/40:  41%|████▏     | 59/143 [00:09<00:04, 16.84it/s]
Training 11/40:  56%|█████▌    | 80/143 [00:09<00:02, 27.57it/s]
Training 11/40:  71%|███████   | 101/143 [00:09<00:01, 40.95it/s]
Training 11/40: 100%|██████████| 143/143 [00:10<00:00, 14.14it/s]


Epoch 10: train_loss=0.1069, val_loss=0.1081



Training 12/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/40:   1%|          | 1/143 [00:09<21:32,  9.10s/it]
Training 12/40:   8%|▊         | 11/143 [00:09<01:19,  1.65it/s]
Training 12/40:  15%|█▌        | 22/143 [00:09<00:30,  3.99it/s]
Training 12/40:  24%|██▍       | 34/143 [00:09<00:14,  7.44it/s]
Training 12/40:  35%|███▍      | 50/143 [00:09<00:06, 13.67it/s]
Training 12/40:  48%|████▊     | 68/143 [00:09<00:03, 22.83it/s]
Training 12/40:  63%|██████▎   | 90/143 [00:09<00:01, 37.14it/s]
Training 12/40:  78%|███████▊  | 112/143 [00:09<00:00, 54.28it/s]
Training 12/40: 100%|██████████| 143/143 [00:10<00:00, 13.83it/s]


Epoch 11: train_loss=0.1071, val_loss=0.1086



Training 13/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/40:   1%|          | 1/143 [00:09<21:36,  9.13s/it]
Training 13/40:   8%|▊         | 11/143 [00:09<01:20,  1.65it/s]
Training 13/40:  17%|█▋        | 24/143 [00:09<00:26,  4.41it/s]
Training 13/40:  26%|██▌       | 37/143 [00:09<00:13,  8.15it/s]
Training 13/40: 100%|██████████| 143/143 [00:10<00:00, 14.27it/s]


Epoch 12: train_loss=0.1031, val_loss=0.1066



Training 14/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/40:   1%|          | 1/143 [00:09<22:52,  9.67s/it]
Training 14/40:   9%|▉         | 13/143 [00:09<01:10,  1.85it/s]
Training 14/40:  17%|█▋        | 25/143 [00:09<00:27,  4.25it/s]
Training 14/40: 100%|██████████| 143/143 [00:10<00:00, 13.67it/s]


Epoch 13: train_loss=0.1010, val_loss=0.1077



Training 15/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/40:   1%|          | 1/143 [00:09<21:57,  9.28s/it]
Training 15/40:   8%|▊         | 11/143 [00:09<01:21,  1.62it/s]
Training 15/40:  17%|█▋        | 25/143 [00:09<00:25,  4.55it/s]
Training 15/40:  28%|██▊       | 40/143 [00:09<00:11,  8.79it/s]
Training 15/40:  41%|████      | 58/143 [00:09<00:05, 15.66it/s]
Training 15/40:  55%|█████▌    | 79/143 [00:09<00:02, 26.16it/s]
Training 15/40:  73%|███████▎  | 104/143 [00:09<00:00, 42.20it/s]
Training 15/40: 100%|██████████| 143/143 [00:10<00:00, 13.71it/s]


Epoch 14: train_loss=0.1005, val_loss=0.1038



Training 16/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/40:   1%|          | 1/143 [00:10<23:42, 10.02s/it]
Training 16/40:   7%|▋         | 10/143 [00:10<01:37,  1.36it/s]
Training 16/40:  13%|█▎        | 19/143 [00:10<00:39,  3.11it/s]
Training 16/40:  24%|██▍       | 34/143 [00:10<00:15,  7.11it/s]
Training 16/40:  35%|███▍      | 50/143 [00:10<00:07, 12.85it/s]
Training 16/40:  50%|█████     | 72/143 [00:10<00:03, 23.37it/s]
Training 16/40:  67%|██████▋   | 96/143 [00:10<00:01, 37.92it/s]
Training 16/40: 100%|██████████| 143/143 [00:11<00:00, 12.67it/s]


Epoch 15: train_loss=0.0973, val_loss=0.1042



Training 17/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/40:   1%|          | 1/143 [00:09<21:56,  9.27s/it]
Training 17/40:   8%|▊         | 11/143 [00:09<01:21,  1.62it/s]
Training 17/40:  15%|█▌        | 22/143 [00:09<00:30,  3.91it/s]
Training 17/40:  24%|██▍       | 35/143 [00:09<00:14,  7.60it/s]
Training 17/40:  34%|███▍      | 49/143 [00:09<00:07, 12.91it/s]
Training 17/40:  52%|█████▏    | 74/143 [00:09<00:02, 25.77it/s]
Training 17/40:  66%|██████▋   | 95/143 [00:09<00:01, 39.01it/s]
Training 17/40: 100%|██████████| 143/143 [00:10<00:00, 13.65it/s]


Epoch 16: train_loss=0.0959, val_loss=0.1072



Training 18/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/40:   1%|          | 1/143 [00:09<21:31,  9.10s/it]
Training 18/40:   8%|▊         | 12/143 [00:09<01:12,  1.81it/s]
Training 18/40:  17%|█▋        | 24/143 [00:09<00:27,  4.36it/s]
Training 18/40: 100%|██████████| 143/143 [00:09<00:00, 14.44it/s]


Epoch 17: train_loss=0.0950, val_loss=0.1047



Training 19/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/40:   1%|          | 1/143 [00:09<21:19,  9.01s/it]
Training 19/40:   8%|▊         | 12/143 [00:09<01:11,  1.82it/s]
Training 19/40:  16%|█▌        | 23/143 [00:09<00:28,  4.18it/s]
Training 19/40:  27%|██▋       | 39/143 [00:09<00:11,  8.87it/s]
Training 19/40:  39%|███▉      | 56/143 [00:09<00:05, 15.49it/s]
Training 19/40:  55%|█████▌    | 79/143 [00:09<00:02, 27.38it/s]
Training 19/40:  70%|██████▉   | 100/143 [00:09<00:01, 40.72it/s]
Training 19/40: 100%|██████████| 143/143 [00:10<00:00, 14.05it/s]


Epoch 18: train_loss=0.0920, val_loss=0.1034



Training 20/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/40:   1%|          | 1/143 [00:09<21:44,  9.19s/it]
Training 20/40:   8%|▊         | 12/143 [00:09<01:13,  1.79it/s]
Training 20/40:  17%|█▋        | 24/143 [00:09<00:27,  4.31it/s]
Training 20/40: 100%|██████████| 143/143 [00:09<00:00, 14.39it/s]


Epoch 19: train_loss=0.0917, val_loss=0.1051



Training 21/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/40:   1%|          | 1/143 [00:09<21:20,  9.02s/it]
Training 21/40:   8%|▊         | 12/143 [00:09<01:11,  1.82it/s]
Training 21/40:  16%|█▌        | 23/143 [00:09<00:28,  4.17it/s]
Training 21/40:  36%|███▌      | 51/143 [00:09<00:07, 12.47it/s]
Training 21/40: 100%|██████████| 143/143 [00:09<00:00, 14.53it/s]


Epoch 20: train_loss=0.0895, val_loss=0.0981



Training 22/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/40:   1%|          | 1/143 [00:09<21:52,  9.24s/it]
Training 22/40:   8%|▊         | 11/143 [00:09<01:21,  1.63it/s]
Training 22/40:  16%|█▌        | 23/143 [00:09<00:28,  4.15it/s]
Training 22/40: 100%|██████████| 143/143 [00:10<00:00, 14.27it/s]


Epoch 21: train_loss=0.0879, val_loss=0.1008



Training 23/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/40:   1%|          | 1/143 [00:09<21:20,  9.02s/it]
Training 23/40:   7%|▋         | 10/143 [00:09<01:27,  1.51it/s]
Training 23/40:  15%|█▍        | 21/143 [00:09<00:31,  3.87it/s]
Training 23/40:  22%|██▏       | 31/143 [00:09<00:16,  6.77it/s]
Training 23/40:  31%|███▏      | 45/143 [00:09<00:08, 12.25it/s]
Training 23/40:  42%|████▏     | 60/143 [00:09<00:04, 19.88it/s]
Training 23/40:  56%|█████▌    | 80/143 [00:09<00:01, 33.01it/s]
Training 23/40:  76%|███████▌  | 108/143 [00:09<00:00, 56.33it/s]
Training 23/40: 100%|██████████| 143/143 [00:10<00:00, 13.89it/s]


Epoch 22: train_loss=0.0864, val_loss=0.0975



Training 24/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/40:   1%|          | 1/143 [00:09<21:26,  9.06s/it]
Training 24/40:   8%|▊         | 12/143 [00:09<01:12,  1.81it/s]
Training 24/40:  16%|█▌        | 23/143 [00:09<00:28,  4.16it/s]
Training 24/40:  37%|███▋      | 53/143 [00:09<00:06, 13.04it/s]
Training 24/40: 100%|██████████| 143/143 [00:09<00:00, 14.50it/s]


Epoch 23: train_loss=0.0842, val_loss=0.1025



Training 25/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/40:   1%|          | 1/143 [00:09<22:11,  9.38s/it]
Training 25/40:   8%|▊         | 11/143 [00:09<01:22,  1.61it/s]
Training 25/40:  17%|█▋        | 25/143 [00:09<00:26,  4.51it/s]
Training 25/40:  27%|██▋       | 38/143 [00:09<00:12,  8.15it/s]
Training 25/40:  38%|███▊      | 55/143 [00:09<00:06, 14.60it/s]
Training 25/40:  54%|█████▍    | 77/143 [00:09<00:02, 25.54it/s]
Training 25/40:  69%|██████▉   | 99/143 [00:09<00:01, 39.33it/s]
Training 25/40: 100%|██████████| 143/143 [00:10<00:00, 13.54it/s]


Epoch 24: train_loss=0.0828, val_loss=0.0980



Training 26/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/40:   1%|          | 1/143 [00:09<21:46,  9.20s/it]
Training 26/40:   9%|▉         | 13/143 [00:09<01:06,  1.94it/s]
Training 26/40:  17%|█▋        | 25/143 [00:09<00:26,  4.46it/s]
Training 26/40: 100%|██████████| 143/143 [00:09<00:00, 14.38it/s]


Epoch 25: train_loss=0.0798, val_loss=0.0999



Training 27/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/40:   1%|          | 1/143 [00:09<21:52,  9.24s/it]
Training 27/40:   9%|▉         | 13/143 [00:09<01:07,  1.93it/s]
Training 27/40:  17%|█▋        | 25/143 [00:09<00:26,  4.44it/s]
Training 27/40:  27%|██▋       | 39/143 [00:09<00:12,  8.42it/s]
Training 27/40:  42%|████▏     | 60/143 [00:09<00:05, 16.56it/s]
Training 27/40:  58%|█████▊    | 83/143 [00:09<00:02, 28.07it/s]
Training 27/40:  74%|███████▍  | 106/143 [00:09<00:00, 42.55it/s]
Training 27/40: 100%|██████████| 143/143 [00:10<00:00, 13.76it/s]


Epoch 26: train_loss=0.0794, val_loss=0.1028



Training 28/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/40:   1%|          | 1/143 [00:09<22:29,  9.51s/it]
Training 28/40:   8%|▊         | 11/143 [00:09<01:23,  1.58it/s]
Training 28/40:  15%|█▌        | 22/143 [00:09<00:31,  3.82it/s]
Training 28/40:  24%|██▍       | 35/143 [00:09<00:14,  7.44it/s]
Training 28/40:  34%|███▎      | 48/143 [00:09<00:07, 12.26it/s]
Training 28/40:  47%|████▋     | 67/143 [00:10<00:03, 21.69it/s]
Training 28/40:  61%|██████    | 87/143 [00:10<00:01, 34.26it/s]
Training 28/40:  77%|███████▋  | 110/143 [00:10<00:00, 52.05it/s]
Training 28/40: 100%|██████████| 143/143 [00:10<00:00, 13.31it/s]


Epoch 27: train_loss=0.0781, val_loss=0.0965



Training 29/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/40:   1%|          | 1/143 [00:09<23:21,  9.87s/it]
Training 29/40:   9%|▉         | 13/143 [00:09<01:11,  1.81it/s]
Training 29/40:  17%|█▋        | 25/143 [00:10<00:28,  4.17it/s]
Training 29/40:  27%|██▋       | 38/143 [00:10<00:13,  7.64it/s]
Training 29/40: 100%|██████████| 143/143 [00:10<00:00, 13.35it/s]


Epoch 28: train_loss=0.0751, val_loss=0.0965



Training 30/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/40:   1%|          | 1/143 [00:09<21:35,  9.12s/it]
Training 30/40:   7%|▋         | 10/143 [00:09<01:28,  1.49it/s]
Training 30/40:  15%|█▌        | 22/143 [00:09<00:29,  4.04it/s]
Training 30/40:  27%|██▋       | 39/143 [00:09<00:11,  8.98it/s]
Training 30/40: 100%|██████████| 143/143 [00:09<00:00, 14.39it/s]


Epoch 29: train_loss=0.0747, val_loss=0.0935



Training 31/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 31/40:   1%|          | 1/143 [00:09<21:47,  9.21s/it]
Training 31/40:   8%|▊         | 12/143 [00:09<01:13,  1.79it/s]
Training 31/40:  18%|█▊        | 26/143 [00:09<00:24,  4.72it/s]
Training 31/40:  27%|██▋       | 38/143 [00:09<00:12,  8.11it/s]
Training 31/40:  41%|████      | 58/143 [00:09<00:05, 15.86it/s]
Training 31/40:  54%|█████▍    | 77/143 [00:09<00:02, 25.34it/s]
Training 31/40:  70%|██████▉   | 100/143 [00:09<00:01, 40.11it/s]
Training 31/40: 100%|██████████| 143/143 [00:10<00:00, 13.78it/s]


Epoch 30: train_loss=0.0726, val_loss=0.0995



Training 32/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 32/40:   1%|          | 1/143 [00:09<22:13,  9.39s/it]
Training 32/40:   8%|▊         | 11/143 [00:09<01:22,  1.60it/s]
Training 32/40:  16%|█▌        | 23/143 [00:09<00:29,  4.08it/s]
Training 32/40:  26%|██▌       | 37/143 [00:09<00:13,  8.01it/s]
Training 32/40:  41%|████▏     | 59/143 [00:09<00:05, 16.40it/s]
Training 32/40:  55%|█████▌    | 79/143 [00:09<00:02, 26.19it/s]
Training 32/40:  71%|███████▏  | 102/143 [00:10<00:01, 40.59it/s]
Training 32/40: 100%|██████████| 143/143 [00:10<00:00, 13.56it/s]


Epoch 31: train_loss=0.0723, val_loss=0.0959



Training 33/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 33/40:   1%|          | 1/143 [00:09<22:03,  9.32s/it]
Training 33/40:   8%|▊         | 11/143 [00:09<01:21,  1.61it/s]
Training 33/40:  15%|█▌        | 22/143 [00:09<00:31,  3.90it/s]
Training 33/40:  27%|██▋       | 38/143 [00:09<00:12,  8.45it/s]
Training 33/40:  38%|███▊      | 54/143 [00:09<00:06, 14.51it/s]
Training 33/40:  53%|█████▎    | 76/143 [00:09<00:02, 25.56it/s]
Training 33/40:  68%|██████▊   | 97/143 [00:09<00:01, 38.74it/s]
Training 33/40: 100%|██████████| 143/143 [00:10<00:00, 13.62it/s]


Epoch 32: train_loss=0.0705, val_loss=0.0985



Training 34/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 34/40:   1%|          | 1/143 [00:09<21:41,  9.17s/it]
Training 34/40:   8%|▊         | 12/143 [00:09<01:13,  1.79it/s]
Training 34/40:  16%|█▌        | 23/143 [00:09<00:29,  4.11it/s]
Training 34/40:  26%|██▌       | 37/143 [00:09<00:13,  8.12it/s]
Training 34/40:  38%|███▊      | 54/143 [00:09<00:06, 14.67it/s]
Training 34/40:  51%|█████     | 73/143 [00:09<00:02, 24.26it/s]
Training 34/40:  64%|██████▍   | 92/143 [00:09<00:01, 36.25it/s]
Training 34/40: 100%|██████████| 143/143 [00:10<00:00, 13.81it/s]


Epoch 33: train_loss=0.0705, val_loss=0.0961



Training 35/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 35/40:   1%|          | 1/143 [00:09<22:22,  9.45s/it]
Training 35/40:   8%|▊         | 11/143 [00:09<01:22,  1.59it/s]
Training 35/40:  16%|█▌        | 23/143 [00:09<00:29,  4.05it/s]
Training 35/40:  38%|███▊      | 55/143 [00:09<00:06, 13.17it/s]
Training 35/40: 100%|██████████| 143/143 [00:10<00:00, 13.94it/s]


Epoch 34: train_loss=0.0699, val_loss=0.0929



Training 36/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 36/40:   1%|          | 1/143 [00:09<21:40,  9.16s/it]
Training 36/40:   8%|▊         | 12/143 [00:09<01:12,  1.80it/s]
Training 36/40:  17%|█▋        | 25/143 [00:09<00:25,  4.54it/s]
Training 36/40: 100%|██████████| 143/143 [00:09<00:00, 14.38it/s]


Epoch 35: train_loss=0.0681, val_loss=0.0933



Training 37/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 37/40:   1%|          | 1/143 [00:09<21:22,  9.03s/it]
Training 37/40:   9%|▉         | 13/143 [00:09<01:05,  1.97it/s]
Training 37/40:  19%|█▉        | 27/143 [00:09<00:23,  4.97it/s]
Training 37/40:  30%|███       | 43/143 [00:09<00:10,  9.62it/s]
Training 37/40:  41%|████▏     | 59/143 [00:09<00:05, 15.80it/s]
Training 37/40:  57%|█████▋    | 82/143 [00:09<00:02, 27.64it/s]
Training 37/40:  72%|███████▏  | 103/143 [00:09<00:00, 40.97it/s]
Training 37/40: 100%|██████████| 143/143 [00:10<00:00, 14.07it/s]


Epoch 36: train_loss=0.0658, val_loss=0.0958



Training 38/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 38/40:   1%|          | 1/143 [00:09<21:42,  9.17s/it]
Training 38/40:   9%|▉         | 13/143 [00:09<01:06,  1.95it/s]
Training 38/40:  17%|█▋        | 24/143 [00:09<00:27,  4.26it/s]
Training 38/40: 100%|██████████| 143/143 [00:09<00:00, 14.31it/s]


Epoch 37: train_loss=0.0647, val_loss=0.0944



Training 39/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 39/40:   1%|          | 1/143 [00:09<21:35,  9.12s/it]
Training 39/40:   8%|▊         | 11/143 [00:09<01:20,  1.65it/s]
Training 39/40:  15%|█▍        | 21/143 [00:09<00:32,  3.77it/s]
Training 39/40:  26%|██▌       | 37/143 [00:09<00:12,  8.42it/s]
Training 39/40:  38%|███▊      | 54/143 [00:09<00:05, 15.02it/s]
Training 39/40:  53%|█████▎    | 76/143 [00:09<00:02, 26.27it/s]
Training 39/40:  70%|██████▉   | 100/143 [00:09<00:01, 41.81it/s]
Training 39/40: 100%|██████████| 143/143 [00:10<00:00, 13.91it/s]


Epoch 38: train_loss=0.0639, val_loss=0.0997



Training 40/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 40/40:   1%|          | 1/143 [00:09<21:25,  9.06s/it]
Training 40/40:   7%|▋         | 10/143 [00:09<01:28,  1.51it/s]
Training 40/40:  16%|█▌        | 23/143 [00:09<00:27,  4.29it/s]
Training 40/40:  25%|██▌       | 36/143 [00:09<00:13,  8.05it/s]
Training 40/40:  38%|███▊      | 54/143 [00:09<00:05, 15.10it/s]
Training 40/40:  52%|█████▏    | 74/143 [00:09<00:02, 25.29it/s]
Training 40/40:  71%|███████   | 101/143 [00:09<00:00, 43.08it/s]
Training 40/40: 100%|██████████| 143/143 [00:10<00:00, 14.02it/s]


Epoch 39: train_loss=0.0638, val_loss=0.0918


2025-06-02 19:33:51,637 - __main__ - INFO - Saved mlp probe for layer 8 to cache\probes\phase1_supervisedVIT_viewpoint_probing\mlp_layer_8_probe.pth
 67%|██████▋   | 4/6 [2:08:02<1:03:59, 1919.53s/it]2025-06-02 19:34:10,679 - __main__ - INFO - Processing layer 10...
2025-06-02 19:34:10,679 - src.probing.data_preprocessing - INFO - Extracting features from 1149 batches...

Extracting features:   0%|          | 0/1149 [00:00<?, ?it/s]
Extracting features:   0%|          | 1/1149 [00:07<2:21:32,  7.40s/it]
Extracting features:   0%|          | 2/1149 [00:07<1:01:08,  3.20s/it]
Extracting features:   0%|          | 3/1149 [00:07<35:20,  1.85s/it]  
Extracting features:   0%|          | 4/1149 [00:08<23:09,  1.21s/it]
Extracting features:   0%|          | 5/1149 [00:08<16:30,  1.15it/s]
Extracting features:   1%|          | 6/1149 [00:08<12:28,  1.53it/s]
Extracting features:   1%|          | 7/1149 [00:08<09:59,  1.90it/s]
Extracting features:   1%|          | 8/1149 [00:09<08:30,  2.23it/

Epoch 0: train_loss=2.4778, val_loss=1.1162



Training 2/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/30:   1%|          | 1/143 [00:09<22:36,  9.55s/it]
Training 2/30:  13%|█▎        | 19/143 [00:09<00:44,  2.76it/s]
Training 2/30:  36%|███▋      | 52/143 [00:09<00:09,  9.50it/s]
Training 2/30:  65%|██████▌   | 93/143 [00:09<00:02, 20.96it/s]
Training 2/30: 100%|██████████| 143/143 [00:10<00:00, 13.78it/s]


Epoch 1: train_loss=0.9161, val_loss=0.7280



Training 3/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/30:   1%|          | 1/143 [00:09<23:11,  9.80s/it]
Training 3/30:  13%|█▎        | 19/143 [00:09<00:46,  2.69it/s]
Training 3/30:  34%|███▎      | 48/143 [00:10<00:11,  8.46it/s]
Training 3/30:  61%|██████    | 87/143 [00:10<00:02, 19.12it/s]
Training 3/30: 100%|██████████| 143/143 [00:10<00:00, 13.44it/s]


Epoch 2: train_loss=0.6428, val_loss=0.5568



Training 4/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/30:   1%|          | 1/143 [00:09<23:09,  9.79s/it]
Training 4/30:  14%|█▍        | 20/143 [00:09<00:43,  2.83it/s]
Training 4/30:  32%|███▏      | 46/143 [00:09<00:12,  8.01it/s]
Training 4/30:  57%|█████▋    | 81/143 [00:10<00:03, 17.56it/s]
Training 4/30: 100%|██████████| 143/143 [00:10<00:00, 13.42it/s]


Epoch 3: train_loss=0.4926, val_loss=0.4308



Training 5/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/30:   1%|          | 1/143 [00:09<23:17,  9.84s/it]
Training 5/30:  15%|█▍        | 21/143 [00:09<00:41,  2.96it/s]
Training 5/30:  34%|███▍      | 49/143 [00:10<00:11,  8.51it/s]
Training 5/30:  60%|██████    | 86/143 [00:10<00:03, 18.55it/s]
Training 5/30: 100%|██████████| 143/143 [00:10<00:00, 13.35it/s]


Epoch 4: train_loss=0.3933, val_loss=0.3513



Training 6/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/30:   1%|          | 1/143 [00:09<23:09,  9.78s/it]
Training 6/30:  14%|█▍        | 20/143 [00:09<00:43,  2.83it/s]
Training 6/30:  29%|██▉       | 42/143 [00:09<00:14,  7.21it/s]
Training 6/30:  60%|██████    | 86/143 [00:10<00:02, 19.30it/s]
Training 6/30: 100%|██████████| 143/143 [00:10<00:00, 13.42it/s]


Epoch 5: train_loss=0.3285, val_loss=0.2964



Training 7/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/30:   1%|          | 1/143 [00:10<24:06, 10.19s/it]
Training 7/30:  13%|█▎        | 19/143 [00:10<00:47,  2.59it/s]
Training 7/30:  27%|██▋       | 39/143 [00:10<00:16,  6.41it/s]
Training 7/30:  48%|████▊     | 69/143 [00:10<00:05, 14.31it/s]
Training 7/30: 100%|██████████| 143/143 [00:11<00:00, 12.90it/s]


Epoch 6: train_loss=0.2816, val_loss=0.2691



Training 8/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/30:   1%|          | 1/143 [00:10<25:32, 10.79s/it]
Training 8/30:  19%|█▉        | 27/143 [00:10<00:33,  3.49it/s]
Training 8/30:  43%|████▎     | 62/143 [00:10<00:08,  9.83it/s]
Training 8/30:  69%|██████▊   | 98/143 [00:11<00:02, 18.75it/s]
Training 8/30: 100%|██████████| 143/143 [00:11<00:00, 12.32it/s]


Epoch 7: train_loss=0.2462, val_loss=0.2314



Training 9/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/30:   1%|          | 1/143 [00:09<23:29,  9.93s/it]
Training 9/30:  16%|█▌        | 23/143 [00:10<00:37,  3.22it/s]
Training 9/30:  41%|████      | 58/143 [00:10<00:08, 10.10it/s]
Training 9/30:  66%|██████▋   | 95/143 [00:10<00:02, 20.02it/s]
Training 9/30: 100%|██████████| 143/143 [00:10<00:00, 13.29it/s]


Epoch 8: train_loss=0.2201, val_loss=0.2059



Training 10/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/30:   1%|          | 1/143 [00:09<23:26,  9.90s/it]
Training 10/30:  10%|▉         | 14/143 [00:10<01:06,  1.95it/s]
Training 10/30:  25%|██▌       | 36/143 [00:10<00:17,  6.28it/s]
Training 10/30:  50%|█████     | 72/143 [00:10<00:04, 16.05it/s]
Training 10/30: 100%|██████████| 143/143 [00:10<00:00, 13.25it/s]


Epoch 9: train_loss=0.2019, val_loss=0.1918



Training 11/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/30:   1%|          | 1/143 [00:10<23:56, 10.12s/it]
Training 11/30:  13%|█▎        | 18/143 [00:10<00:50,  2.46it/s]
Training 11/30:  26%|██▌       | 37/143 [00:10<00:17,  6.12it/s]
Training 11/30: 100%|██████████| 143/143 [00:10<00:00, 13.19it/s]


Epoch 10: train_loss=0.1875, val_loss=0.1761



Training 12/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/30:   1%|          | 1/143 [00:10<23:50, 10.07s/it]
Training 12/30:  20%|█▉        | 28/143 [00:10<00:29,  3.87it/s]
Training 12/30:  43%|████▎     | 61/143 [00:10<00:07, 10.25it/s]
Training 12/30:  65%|██████▌   | 93/143 [00:10<00:02, 18.68it/s]
Training 12/30: 100%|██████████| 143/143 [00:10<00:00, 13.09it/s]


Epoch 11: train_loss=0.1737, val_loss=0.1753



Training 13/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/30:   1%|          | 1/143 [00:09<23:25,  9.90s/it]
Training 13/30:  11%|█         | 16/143 [00:10<00:56,  2.23it/s]
Training 13/30: 100%|██████████| 143/143 [00:10<00:00, 13.51it/s]


Epoch 12: train_loss=0.1678, val_loss=0.1592



Training 14/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/30:   1%|          | 1/143 [00:09<22:46,  9.62s/it]
Training 14/30:  15%|█▍        | 21/143 [00:09<00:40,  3.03it/s]
Training 14/30:  34%|███▍      | 49/143 [00:09<00:10,  8.69it/s]
Training 14/30:  62%|██████▏   | 88/143 [00:09<00:02, 19.51it/s]
Training 14/30: 100%|██████████| 143/143 [00:10<00:00, 13.66it/s]


Epoch 13: train_loss=0.1575, val_loss=0.1520



Training 15/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/30:   1%|          | 1/143 [00:09<22:43,  9.60s/it]
Training 15/30:  13%|█▎        | 18/143 [00:09<00:48,  2.59it/s]
Training 15/30:  27%|██▋       | 39/143 [00:09<00:15,  6.84it/s]
Training 15/30: 100%|██████████| 143/143 [00:10<00:00, 13.80it/s]


Epoch 14: train_loss=0.1528, val_loss=0.1487



Training 16/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/30:   1%|          | 1/143 [00:09<22:46,  9.63s/it]
Training 16/30:  13%|█▎        | 19/143 [00:09<00:45,  2.73it/s]
Training 16/30:  36%|███▋      | 52/143 [00:09<00:09,  9.43it/s]
Training 16/30:  64%|██████▍   | 92/143 [00:09<00:02, 20.51it/s]
Training 16/30: 100%|██████████| 143/143 [00:10<00:00, 13.62it/s]


Epoch 15: train_loss=0.1464, val_loss=0.1419



Training 17/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/30:   1%|          | 1/143 [00:09<23:09,  9.78s/it]
Training 17/30:  15%|█▌        | 22/143 [00:09<00:38,  3.12it/s]
Training 17/30:  39%|███▉      | 56/143 [00:09<00:08,  9.91it/s]
Training 17/30:  66%|██████▋   | 95/143 [00:10<00:02, 20.53it/s]
Training 17/30: 100%|██████████| 143/143 [00:10<00:00, 13.43it/s]


Epoch 16: train_loss=0.1425, val_loss=0.1462



Training 18/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/30:   1%|          | 1/143 [00:09<23:03,  9.74s/it]
Training 18/30:  13%|█▎        | 19/143 [00:09<00:45,  2.70it/s]
Training 18/30:  35%|███▍      | 50/143 [00:09<00:10,  8.92it/s]
Training 18/30:  62%|██████▏   | 88/143 [00:10<00:02, 19.33it/s]
Training 18/30: 100%|██████████| 143/143 [00:10<00:00, 13.49it/s]


Epoch 17: train_loss=0.1389, val_loss=0.1358



Training 19/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/30:   1%|          | 1/143 [00:09<22:57,  9.70s/it]
Training 19/30:  13%|█▎        | 19/143 [00:09<00:45,  2.71it/s]
Training 19/30:  31%|███▏      | 45/143 [00:09<00:12,  7.94it/s]
Training 19/30:  64%|██████▎   | 91/143 [00:10<00:02, 20.68it/s]
Training 19/30: 100%|██████████| 143/143 [00:10<00:00, 13.54it/s]


Epoch 18: train_loss=0.1393, val_loss=0.1346



Training 20/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/30:   1%|          | 1/143 [00:09<22:30,  9.51s/it]
Training 20/30:  17%|█▋        | 25/143 [00:09<00:32,  3.66it/s]
Training 20/30:  41%|████▏     | 59/143 [00:09<00:07, 10.61it/s]
Training 20/30:  68%|██████▊   | 97/143 [00:09<00:02, 21.20it/s]
Training 20/30: 100%|██████████| 143/143 [00:10<00:00, 13.87it/s]


Epoch 19: train_loss=0.1375, val_loss=0.1384



Training 21/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/30:   1%|          | 1/143 [00:09<22:49,  9.64s/it]
Training 21/30:  14%|█▍        | 20/143 [00:09<00:42,  2.87it/s]
Training 21/30:  27%|██▋       | 39/143 [00:09<00:15,  6.69it/s]
Training 21/30: 100%|██████████| 143/143 [00:10<00:00, 13.76it/s]


Epoch 20: train_loss=0.1384, val_loss=0.1300



Training 22/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/30:   1%|          | 1/143 [00:09<23:12,  9.81s/it]
Training 22/30:  13%|█▎        | 19/143 [00:09<00:46,  2.68it/s]
Training 22/30:  32%|███▏      | 46/143 [00:10<00:12,  8.05it/s]
Training 22/30:  57%|█████▋    | 81/143 [00:10<00:03, 17.58it/s]
Training 22/30: 100%|██████████| 143/143 [00:10<00:00, 13.39it/s]


Epoch 21: train_loss=0.1334, val_loss=0.1305



Training 23/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/30:   1%|          | 1/143 [00:09<23:03,  9.74s/it]
Training 23/30:  13%|█▎        | 18/143 [00:09<00:48,  2.56it/s]
Training 23/30:  33%|███▎      | 47/143 [00:09<00:11,  8.37it/s]
Training 23/30:  57%|█████▋    | 81/143 [00:10<00:03, 17.68it/s]
Training 23/30: 100%|██████████| 143/143 [00:10<00:00, 13.47it/s]


Epoch 22: train_loss=0.1300, val_loss=0.1290



Training 24/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/30:   1%|          | 1/143 [00:09<22:41,  9.59s/it]
Training 24/30:  11%|█         | 16/143 [00:09<00:55,  2.30it/s]
Training 24/30:  31%|███▏      | 45/143 [00:09<00:11,  8.21it/s]
Training 24/30:  56%|█████▌    | 80/143 [00:09<00:03, 17.95it/s]
Training 24/30: 100%|██████████| 143/143 [00:10<00:00, 13.64it/s]


Epoch 23: train_loss=0.1307, val_loss=0.1582



Training 25/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/30:   1%|          | 1/143 [00:09<23:33,  9.96s/it]
Training 25/30:  17%|█▋        | 24/143 [00:10<00:35,  3.35it/s]
Training 25/30:  32%|███▏      | 46/143 [00:10<00:12,  7.64it/s]
Training 25/30:  57%|█████▋    | 81/143 [00:10<00:03, 17.06it/s]
Training 25/30: 100%|██████████| 143/143 [00:10<00:00, 13.20it/s]


Epoch 24: train_loss=0.1323, val_loss=0.1285



Training 26/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/30:   1%|          | 1/143 [00:09<23:15,  9.83s/it]
Training 26/30:  15%|█▌        | 22/143 [00:09<00:38,  3.11it/s]
Training 26/30:  38%|███▊      | 55/143 [00:10<00:09,  9.66it/s]
Training 26/30:  62%|██████▏   | 88/143 [00:10<00:02, 18.58it/s]
Training 26/30: 100%|██████████| 143/143 [00:10<00:00, 13.41it/s]


Epoch 25: train_loss=0.1303, val_loss=0.1264



Training 27/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/30:   1%|          | 1/143 [00:09<22:54,  9.68s/it]
Training 27/30:  14%|█▍        | 20/143 [00:09<00:42,  2.86it/s]
Training 27/30:  30%|███       | 43/143 [00:09<00:13,  7.48it/s]
Training 27/30: 100%|██████████| 143/143 [00:10<00:00, 13.68it/s]


Epoch 26: train_loss=0.1310, val_loss=0.1328



Training 28/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/30:   1%|          | 1/143 [00:09<22:24,  9.47s/it]
Training 28/30:  15%|█▍        | 21/143 [00:09<00:39,  3.08it/s]
Training 28/30:  30%|███       | 43/143 [00:09<00:13,  7.59it/s]
Training 28/30:  56%|█████▌    | 80/143 [00:09<00:03, 18.04it/s]
Training 28/30: 100%|██████████| 143/143 [00:10<00:00, 13.83it/s]


Epoch 27: train_loss=0.1280, val_loss=0.1372



Training 29/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/30:   1%|          | 1/143 [00:09<23:16,  9.83s/it]
Training 29/30:  18%|█▊        | 26/143 [00:09<00:31,  3.68it/s]
Training 29/30:  39%|███▉      | 56/143 [00:10<00:09,  9.62it/s]
Training 29/30:  64%|██████▎   | 91/143 [00:10<00:02, 19.08it/s]
Training 29/30: 100%|██████████| 143/143 [00:10<00:00, 13.41it/s]


Epoch 28: train_loss=0.1266, val_loss=0.1269



Training 30/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/30:   1%|          | 1/143 [00:09<23:02,  9.74s/it]
Training 30/30:  14%|█▍        | 20/143 [00:09<00:43,  2.85it/s]
Training 30/30:  38%|███▊      | 55/143 [00:09<00:08,  9.87it/s]
Training 30/30:  65%|██████▌   | 93/143 [00:10<00:02, 20.26it/s]
Training 30/30: 100%|██████████| 143/143 [00:10<00:00, 13.53it/s]


Epoch 29: train_loss=0.1276, val_loss=0.1247


2025-06-02 19:52:15,952 - __main__ - INFO - Saved linear probe for layer 10 to cache\probes\phase1_supervisedVIT_viewpoint_probing\linear_layer_10_probe.pth
2025-06-02 19:52:36,784 - __main__ - INFO - Running mlp probe on layer 10...
2025-06-02 19:52:36,785 - __main__ - INFO - Running mlp probe on layer 10 (feature_dim: 768)

Training 1/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 1/40:   1%|          | 1/143 [00:09<23:37,  9.98s/it]
Training 1/40:   8%|▊         | 11/143 [00:10<01:27,  1.51it/s]
Training 1/40:  20%|█▉        | 28/143 [00:10<00:23,  4.84it/s]
Training 1/40: 100%|██████████| 143/143 [00:10<00:00, 13.27it/s]


Epoch 0: train_loss=1.7343, val_loss=0.1622



Training 2/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/40:   1%|          | 1/143 [00:09<23:00,  9.72s/it]
Training 2/40:   8%|▊         | 12/143 [00:09<01:17,  1.69it/s]
Training 2/40:  18%|█▊        | 26/143 [00:09<00:26,  4.49it/s]
Training 2/40: 100%|██████████| 143/143 [00:10<00:00, 13.59it/s]


Epoch 1: train_loss=0.1496, val_loss=0.1382



Training 3/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/40:   1%|          | 1/143 [00:09<22:40,  9.58s/it]
Training 3/40:   8%|▊         | 12/143 [00:09<01:16,  1.72it/s]
Training 3/40:  20%|█▉        | 28/143 [00:09<00:23,  4.96it/s]
Training 3/40:  32%|███▏      | 46/143 [00:09<00:09,  9.94it/s]
Training 3/40: 100%|██████████| 143/143 [00:10<00:00, 13.69it/s]


Epoch 2: train_loss=0.1335, val_loss=0.1306



Training 4/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/40:   1%|          | 1/143 [00:10<24:05, 10.18s/it]
Training 4/40:   8%|▊         | 11/143 [00:10<01:29,  1.48it/s]
Training 4/40:  17%|█▋        | 24/143 [00:10<00:29,  3.97it/s]
Training 4/40:  29%|██▊       | 41/143 [00:10<00:12,  8.44it/s]
Training 4/40: 100%|██████████| 143/143 [00:11<00:00, 12.94it/s]


Epoch 3: train_loss=0.1250, val_loss=0.1250



Training 5/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/40:   1%|          | 1/143 [00:09<23:30,  9.93s/it]
Training 5/40:   8%|▊         | 12/143 [00:10<01:19,  1.66it/s]
Training 5/40:  18%|█▊        | 26/143 [00:10<00:26,  4.40it/s]
Training 5/40:  33%|███▎      | 47/143 [00:10<00:09, 10.06it/s]
Training 5/40: 100%|██████████| 143/143 [00:10<00:00, 13.22it/s]


Epoch 4: train_loss=0.1212, val_loss=0.1198



Training 6/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/40:   1%|          | 1/143 [00:09<22:46,  9.62s/it]
Training 6/40:   8%|▊         | 12/143 [00:09<01:16,  1.71it/s]
Training 6/40:  21%|██        | 30/143 [00:09<00:21,  5.35it/s]
Training 6/40:  33%|███▎      | 47/143 [00:09<00:09, 10.01it/s]
Training 6/40: 100%|██████████| 143/143 [00:10<00:00, 13.66it/s]


Epoch 5: train_loss=0.1185, val_loss=0.1181



Training 7/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/40:   1%|          | 1/143 [00:09<22:35,  9.55s/it]
Training 7/40:  11%|█         | 16/143 [00:09<00:54,  2.31it/s]
Training 7/40:  23%|██▎       | 33/143 [00:09<00:19,  5.77it/s]
Training 7/40:  35%|███▍      | 50/143 [00:09<00:08, 10.46it/s]
Training 7/40: 100%|██████████| 143/143 [00:10<00:00, 13.72it/s]


Epoch 6: train_loss=0.1160, val_loss=0.1183



Training 8/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/40:   1%|          | 1/143 [00:09<23:09,  9.79s/it]
Training 8/40:   8%|▊         | 11/143 [00:09<01:25,  1.54it/s]
Training 8/40:  20%|██        | 29/143 [00:09<00:22,  5.13it/s]
Training 8/40:  31%|███▏      | 45/143 [00:10<00:10,  9.45it/s]
Training 8/40: 100%|██████████| 143/143 [00:10<00:00, 13.43it/s]


Epoch 7: train_loss=0.1144, val_loss=0.1152



Training 9/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/40:   1%|          | 1/143 [00:09<23:35,  9.97s/it]
Training 9/40:   9%|▉         | 13/143 [00:10<01:12,  1.79it/s]
Training 9/40:  28%|██▊       | 40/143 [00:10<00:14,  7.10it/s]
Training 9/40: 100%|██████████| 143/143 [00:10<00:00, 13.36it/s]


Epoch 8: train_loss=0.1129, val_loss=0.1205



Training 10/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/40:   1%|          | 1/143 [00:09<23:08,  9.78s/it]
Training 10/40:   8%|▊         | 12/143 [00:09<01:17,  1.68it/s]
Training 10/40:  20%|█▉        | 28/143 [00:09<00:23,  4.87it/s]
Training 10/40: 100%|██████████| 143/143 [00:10<00:00, 13.53it/s]


Epoch 9: train_loss=0.1111, val_loss=0.1135



Training 11/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/40:   1%|          | 1/143 [00:09<22:42,  9.59s/it]
Training 11/40:  10%|█         | 15/143 [00:09<00:59,  2.16it/s]
Training 11/40:  21%|██        | 30/143 [00:09<00:21,  5.19it/s]
Training 11/40: 100%|██████████| 143/143 [00:10<00:00, 13.80it/s]


Epoch 10: train_loss=0.1093, val_loss=0.1128



Training 12/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/40:   1%|          | 1/143 [00:09<22:42,  9.59s/it]
Training 12/40:   8%|▊         | 11/143 [00:09<01:24,  1.57it/s]
Training 12/40:  19%|█▉        | 27/143 [00:09<00:24,  4.82it/s]
Training 12/40:  32%|███▏      | 46/143 [00:09<00:09, 10.09it/s]
Training 12/40: 100%|██████████| 143/143 [00:10<00:00, 13.72it/s]


Epoch 11: train_loss=0.1079, val_loss=0.1114



Training 13/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/40:   1%|          | 1/143 [00:10<23:40, 10.01s/it]
Training 13/40:   8%|▊         | 11/143 [00:10<01:27,  1.50it/s]
Training 13/40:  20%|█▉        | 28/143 [00:10<00:23,  4.82it/s]
Training 13/40:  34%|███▎      | 48/143 [00:10<00:09, 10.15it/s]
Training 13/40: 100%|██████████| 143/143 [00:10<00:00, 13.20it/s]


Epoch 12: train_loss=0.1059, val_loss=0.1122



Training 14/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/40:   1%|          | 1/143 [00:09<22:52,  9.66s/it]
Training 14/40:   8%|▊         | 11/143 [00:09<01:24,  1.56it/s]
Training 14/40:  19%|█▉        | 27/143 [00:09<00:24,  4.78it/s]
Training 14/40:  31%|███       | 44/143 [00:09<00:10,  9.45it/s]
Training 14/40: 100%|██████████| 143/143 [00:10<00:00, 13.59it/s]


Epoch 13: train_loss=0.1051, val_loss=0.1158



Training 15/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/40:   1%|          | 1/143 [00:09<23:12,  9.80s/it]
Training 15/40:   8%|▊         | 12/143 [00:09<01:18,  1.68it/s]
Training 15/40:  31%|███       | 44/143 [00:10<00:12,  8.06it/s]
Training 15/40: 100%|██████████| 143/143 [00:10<00:00, 13.58it/s]


Epoch 14: train_loss=0.1030, val_loss=0.1096



Training 16/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/40:   1%|          | 1/143 [00:09<23:18,  9.85s/it]
Training 16/40:   9%|▉         | 13/143 [00:09<01:11,  1.82it/s]
Training 16/40:  20%|██        | 29/143 [00:10<00:22,  4.97it/s]
Training 16/40:  38%|███▊      | 54/143 [00:10<00:07, 11.78it/s]
Training 16/40: 100%|██████████| 143/143 [00:10<00:00, 13.39it/s]


Epoch 15: train_loss=0.1017, val_loss=0.1094



Training 17/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/40:   1%|          | 1/143 [00:09<23:03,  9.74s/it]
Training 17/40:   8%|▊         | 12/143 [00:09<01:17,  1.69it/s]
Training 17/40:  19%|█▉        | 27/143 [00:09<00:24,  4.69it/s]
Training 17/40: 100%|██████████| 143/143 [00:10<00:00, 13.56it/s]


Epoch 16: train_loss=0.1008, val_loss=0.1083



Training 18/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/40:   1%|          | 1/143 [00:09<22:59,  9.72s/it]
Training 18/40:   9%|▉         | 13/143 [00:09<01:10,  1.84it/s]
Training 18/40:  17%|█▋        | 25/143 [00:09<00:27,  4.23it/s]
Training 18/40:  30%|███       | 43/143 [00:10<00:10,  9.16it/s]
Training 18/40:  43%|████▎     | 62/143 [00:10<00:05, 16.11it/s]
Training 18/40: 100%|██████████| 143/143 [00:10<00:00, 13.43it/s]


Epoch 17: train_loss=0.0991, val_loss=0.1081



Training 19/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/40:   1%|          | 1/143 [00:09<23:23,  9.88s/it]
Training 19/40:   8%|▊         | 12/143 [00:09<01:18,  1.67it/s]
Training 19/40:  20%|██        | 29/143 [00:10<00:22,  5.02it/s]
Training 19/40:  32%|███▏      | 46/143 [00:10<00:10,  9.58it/s]
Training 19/40:  48%|████▊     | 69/143 [00:10<00:04, 17.92it/s]
Training 19/40: 100%|██████████| 143/143 [00:10<00:00, 13.14it/s]


Epoch 18: train_loss=0.0981, val_loss=0.1115



Training 20/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/40:   1%|          | 1/143 [00:09<22:55,  9.68s/it]
Training 20/40:   8%|▊         | 12/143 [00:09<01:17,  1.70it/s]
Training 20/40:  29%|██▊       | 41/143 [00:09<00:13,  7.56it/s]
Training 20/40: 100%|██████████| 143/143 [00:10<00:00, 13.73it/s]


Epoch 19: train_loss=0.0950, val_loss=0.1082



Training 21/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/40:   1%|          | 1/143 [00:09<23:00,  9.72s/it]
Training 21/40:  10%|▉         | 14/143 [00:09<01:05,  1.98it/s]
Training 21/40:  22%|██▏       | 31/143 [00:09<00:20,  5.38it/s]
Training 21/40:  48%|████▊     | 69/143 [00:10<00:04, 15.90it/s]
Training 21/40: 100%|██████████| 143/143 [00:10<00:00, 13.56it/s]


Epoch 20: train_loss=0.0935, val_loss=0.1056



Training 22/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/40:   1%|          | 1/143 [00:09<22:57,  9.70s/it]
Training 22/40:   8%|▊         | 11/143 [00:09<01:25,  1.55it/s]
Training 22/40:  20%|█▉        | 28/143 [00:09<00:23,  4.96it/s]
Training 22/40:  32%|███▏      | 46/143 [00:10<00:09,  9.89it/s]
Training 22/40: 100%|██████████| 143/143 [00:10<00:00, 13.53it/s]


Epoch 21: train_loss=0.0922, val_loss=0.1068



Training 23/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/40:   1%|          | 1/143 [00:09<22:43,  9.61s/it]
Training 23/40:   8%|▊         | 11/143 [00:09<01:24,  1.57it/s]
Training 23/40:  22%|██▏       | 31/143 [00:09<00:19,  5.63it/s]
Training 23/40:  35%|███▍      | 50/143 [00:09<00:08, 10.87it/s]
Training 23/40: 100%|██████████| 143/143 [00:10<00:00, 13.71it/s]


Epoch 22: train_loss=0.0899, val_loss=0.1074



Training 24/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/40:   1%|          | 1/143 [00:09<23:11,  9.80s/it]
Training 24/40:   7%|▋         | 10/143 [00:09<01:35,  1.39it/s]
Training 24/40:  26%|██▌       | 37/143 [00:10<00:15,  6.79it/s]
Training 24/40: 100%|██████████| 143/143 [00:10<00:00, 13.56it/s]


Epoch 23: train_loss=0.0899, val_loss=0.1104



Training 25/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/40:   1%|          | 1/143 [00:09<22:37,  9.56s/it]
Training 25/40:   8%|▊         | 11/143 [00:09<01:23,  1.58it/s]
Training 25/40:  15%|█▌        | 22/143 [00:09<00:31,  3.81it/s]
Training 25/40: 100%|██████████| 143/143 [00:10<00:00, 13.82it/s]


Epoch 24: train_loss=0.0885, val_loss=0.1100



Training 26/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/40:   1%|          | 1/143 [00:09<22:37,  9.56s/it]
Training 26/40:   8%|▊         | 12/143 [00:09<01:16,  1.72it/s]
Training 26/40:  17%|█▋        | 24/143 [00:09<00:28,  4.15it/s]
Training 26/40: 100%|██████████| 143/143 [00:10<00:00, 13.81it/s]


Epoch 25: train_loss=0.0859, val_loss=0.1043



Training 27/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/40:   1%|          | 1/143 [00:09<22:57,  9.70s/it]
Training 27/40:   8%|▊         | 12/143 [00:09<01:17,  1.70it/s]
Training 27/40:  19%|█▉        | 27/143 [00:09<00:24,  4.71it/s]
Training 27/40:  30%|███       | 43/143 [00:10<00:11,  9.08it/s]
Training 27/40:  45%|████▍     | 64/143 [00:10<00:04, 16.86it/s]
Training 27/40: 100%|██████████| 143/143 [00:10<00:00, 13.37it/s]


Epoch 26: train_loss=0.0841, val_loss=0.1029



Training 28/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/40:   1%|          | 1/143 [00:09<23:03,  9.74s/it]
Training 28/40:  10%|█         | 15/143 [00:09<01:00,  2.12it/s]
Training 28/40:  23%|██▎       | 33/143 [00:09<00:19,  5.71it/s]
Training 28/40: 100%|██████████| 143/143 [00:10<00:00, 13.59it/s]


Epoch 27: train_loss=0.0822, val_loss=0.1047



Training 29/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/40:   1%|          | 1/143 [00:09<23:04,  9.75s/it]
Training 29/40:   8%|▊         | 11/143 [00:09<01:25,  1.54it/s]
Training 29/40:  16%|█▌        | 23/143 [00:09<00:30,  3.94it/s]
Training 29/40: 100%|██████████| 143/143 [00:10<00:00, 13.61it/s]


Epoch 28: train_loss=0.0810, val_loss=0.1039



Training 30/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/40:   1%|          | 1/143 [00:09<22:52,  9.67s/it]
Training 30/40:   7%|▋         | 10/143 [00:09<01:34,  1.41it/s]
Training 30/40:  17%|█▋        | 25/143 [00:09<00:26,  4.44it/s]
Training 30/40:  30%|███       | 43/143 [00:09<00:10,  9.39it/s]
Training 30/40: 100%|██████████| 143/143 [00:10<00:00, 13.60it/s]


Epoch 29: train_loss=0.0791, val_loss=0.1049



Training 31/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 31/40:   1%|          | 1/143 [00:09<22:42,  9.60s/it]
Training 31/40:   8%|▊         | 11/143 [00:09<01:24,  1.57it/s]
Training 31/40:  19%|█▉        | 27/143 [00:09<00:24,  4.82it/s]
Training 31/40:  31%|███▏      | 45/143 [00:09<00:10,  9.80it/s]
Training 31/40:  46%|████▌     | 66/143 [00:10<00:04, 17.60it/s]
Training 31/40: 100%|██████████| 143/143 [00:10<00:00, 13.49it/s]


Epoch 30: train_loss=0.0807, val_loss=0.1033



Training 32/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 32/40:   1%|          | 1/143 [00:09<23:09,  9.79s/it]
Training 32/40:   7%|▋         | 10/143 [00:09<01:35,  1.39it/s]
Training 32/40:  25%|██▌       | 36/143 [00:09<00:16,  6.60it/s]
Training 32/40: 100%|██████████| 143/143 [00:10<00:00, 13.57it/s]


Epoch 31: train_loss=0.0772, val_loss=0.1064



Training 33/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 33/40:   1%|          | 1/143 [00:09<22:41,  9.59s/it]
Training 33/40:   8%|▊         | 12/143 [00:09<01:16,  1.72it/s]
Training 33/40:  17%|█▋        | 25/143 [00:09<00:27,  4.35it/s]
Training 33/40: 100%|██████████| 143/143 [00:10<00:00, 13.77it/s]


Epoch 32: train_loss=0.0777, val_loss=0.0988



Training 34/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 34/40:   1%|          | 1/143 [00:09<22:31,  9.52s/it]
Training 34/40:   8%|▊         | 11/143 [00:09<01:23,  1.58it/s]
Training 34/40:  17%|█▋        | 25/143 [00:09<00:26,  4.43it/s]
Training 34/40:  29%|██▉       | 42/143 [00:09<00:11,  9.16it/s]
Training 34/40:  41%|████▏     | 59/143 [00:09<00:05, 15.47it/s]
Training 34/40:  58%|█████▊    | 83/143 [00:10<00:02, 27.30it/s]
Training 34/40:  80%|███████▉  | 114/143 [00:10<00:00, 47.08it/s]
Training 34/40: 100%|██████████| 143/143 [00:10<00:00, 13.41it/s]


Epoch 33: train_loss=0.0747, val_loss=0.1003



Training 35/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 35/40:   1%|          | 1/143 [00:08<21:00,  8.87s/it]
Training 35/40:   7%|▋         | 10/143 [00:08<01:26,  1.53it/s]
Training 35/40:  15%|█▌        | 22/143 [00:09<00:29,  4.14it/s]
Training 35/40:  24%|██▍       | 34/143 [00:09<00:14,  7.66it/s]
Training 35/40:  35%|███▍      | 50/143 [00:09<00:06, 14.00it/s]
Training 35/40:  48%|████▊     | 69/143 [00:09<00:03, 23.81it/s]
Training 35/40:  64%|██████▎   | 91/143 [00:09<00:01, 38.21it/s]
Training 35/40:  79%|███████▉  | 113/143 [00:09<00:00, 55.41it/s]
Training 35/40: 100%|██████████| 143/143 [00:10<00:00, 14.10it/s]


Epoch 34: train_loss=0.0731, val_loss=0.1003



Training 36/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 36/40:   1%|          | 1/143 [00:08<21:16,  8.99s/it]
Training 36/40:   8%|▊         | 11/143 [00:09<01:18,  1.67it/s]
Training 36/40:  16%|█▌        | 23/143 [00:09<00:28,  4.26it/s]
Training 36/40:  25%|██▌       | 36/143 [00:09<00:13,  8.06it/s]
Training 36/40:  38%|███▊      | 55/143 [00:09<00:05, 15.57it/s]
Training 36/40:  52%|█████▏    | 74/143 [00:09<00:02, 25.25it/s]
Training 36/40:  69%|██████▊   | 98/143 [00:09<00:01, 41.06it/s]
Training 36/40: 100%|██████████| 143/143 [00:10<00:00, 14.08it/s]


Epoch 35: train_loss=0.0710, val_loss=0.1055



Training 37/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 37/40:   1%|          | 1/143 [00:09<21:43,  9.18s/it]
Training 37/40:   7%|▋         | 10/143 [00:09<01:29,  1.49it/s]
Training 37/40:  15%|█▌        | 22/143 [00:09<00:30,  4.02it/s]
Training 37/40:  27%|██▋       | 38/143 [00:09<00:12,  8.64it/s]
Training 37/40:  40%|███▉      | 57/143 [00:09<00:05, 16.00it/s]
Training 37/40:  55%|█████▍    | 78/143 [00:09<00:02, 26.61it/s]
Training 37/40:  69%|██████▉   | 99/143 [00:09<00:01, 39.84it/s]
Training 37/40: 100%|██████████| 143/143 [00:10<00:00, 13.80it/s]


Epoch 36: train_loss=0.0695, val_loss=0.1083



Training 38/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 38/40:   1%|          | 1/143 [00:08<20:57,  8.85s/it]
Training 38/40:   8%|▊         | 11/143 [00:08<01:17,  1.70it/s]
Training 38/40:  16%|█▌        | 23/143 [00:09<00:27,  4.32it/s]
Training 38/40:  25%|██▌       | 36/143 [00:09<00:13,  8.16it/s]
Training 38/40:  38%|███▊      | 54/143 [00:09<00:05, 15.34it/s]
Training 38/40:  51%|█████     | 73/143 [00:09<00:02, 25.13it/s]
Training 38/40:  69%|██████▉   | 99/143 [00:09<00:01, 42.53it/s]
Training 38/40: 100%|██████████| 143/143 [00:10<00:00, 14.21it/s]


Epoch 37: train_loss=0.0694, val_loss=0.1035



Training 39/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 39/40:   1%|          | 1/143 [00:08<21:09,  8.94s/it]
Training 39/40:   6%|▋         | 9/143 [00:09<01:37,  1.37it/s]
Training 39/40:  14%|█▍        | 20/143 [00:09<00:32,  3.75it/s]
Training 39/40:  26%|██▌       | 37/143 [00:09<00:12,  8.80it/s]
Training 39/40:  38%|███▊      | 55/143 [00:09<00:05, 15.90it/s]
Training 39/40:  53%|█████▎    | 76/143 [00:09<00:02, 26.76it/s]
Training 39/40:  69%|██████▊   | 98/143 [00:09<00:01, 40.98it/s]
Training 39/40: 100%|██████████| 143/143 [00:10<00:00, 14.09it/s]


Epoch 38: train_loss=0.0682, val_loss=0.1014



Training 40/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 40/40:   1%|          | 1/143 [00:08<21:08,  8.93s/it]
Training 40/40:   7%|▋         | 10/143 [00:09<01:27,  1.53it/s]
Training 40/40:  15%|█▍        | 21/143 [00:09<00:31,  3.91it/s]
Training 40/40:  29%|██▉       | 42/143 [00:09<00:09, 10.17it/s]
Training 40/40: 100%|██████████| 143/143 [00:09<00:00, 14.65it/s]


Epoch 39: train_loss=0.0678, val_loss=0.1011


2025-06-02 20:06:19,372 - __main__ - INFO - Saved mlp probe for layer 10 to cache\probes\phase1_supervisedVIT_viewpoint_probing\mlp_layer_10_probe.pth
 83%|████████▎ | 5/6 [2:40:30<32:09, 1929.74s/it]  2025-06-02 20:06:38,523 - __main__ - INFO - Processing layer 11...
2025-06-02 20:06:38,524 - src.probing.data_preprocessing - INFO - Extracting features from 1149 batches...

Extracting features:   0%|          | 0/1149 [00:00<?, ?it/s]
Extracting features:   0%|          | 1/1149 [00:07<2:19:29,  7.29s/it]
Extracting features:   0%|          | 2/1149 [00:07<1:00:12,  3.15s/it]
Extracting features:   0%|          | 3/1149 [00:07<34:57,  1.83s/it]  
Extracting features:   0%|          | 4/1149 [00:08<23:03,  1.21s/it]
Extracting features:   0%|          | 5/1149 [00:08<16:27,  1.16it/s]
Extracting features:   1%|          | 6/1149 [00:08<12:28,  1.53it/s]
Extracting features:   1%|          | 7/1149 [00:08<10:02,  1.89it/s]
Extracting features:   1%|          | 8/1149 [00:09<08:20,  2.28i

Epoch 0: train_loss=12.3500, val_loss=3.0520



Training 2/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/30:   1%|          | 1/143 [00:09<21:32,  9.10s/it]
Training 2/30:  13%|█▎        | 18/143 [00:09<00:45,  2.73it/s]
Training 2/30:  27%|██▋       | 38/143 [00:09<00:15,  6.99it/s]
Training 2/30:  43%|████▎     | 62/143 [00:09<00:05, 13.95it/s]
Training 2/30:  64%|██████▍   | 92/143 [00:09<00:01, 25.59it/s]
Training 2/30: 100%|██████████| 143/143 [00:10<00:00, 14.19it/s]


Epoch 1: train_loss=2.4127, val_loss=1.8753



Training 3/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/30:   1%|          | 1/143 [00:08<20:59,  8.87s/it]
Training 3/30:  13%|█▎        | 19/143 [00:08<00:41,  2.96it/s]
Training 3/30:  24%|██▍       | 34/143 [00:09<00:17,  6.22it/s]
Training 3/30:  42%|████▏     | 60/143 [00:09<00:05, 13.97it/s]
Training 3/30:  59%|█████▉    | 85/143 [00:09<00:02, 23.82it/s]
Training 3/30: 100%|██████████| 143/143 [00:09<00:00, 14.53it/s]


Epoch 2: train_loss=1.6242, val_loss=1.3638



Training 4/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/30:   1%|          | 1/143 [00:08<21:00,  8.88s/it]
Training 4/30:  13%|█▎        | 18/143 [00:08<00:44,  2.80it/s]
Training 4/30:  27%|██▋       | 39/143 [00:09<00:14,  7.38it/s]
Training 4/30:  43%|████▎     | 62/143 [00:09<00:05, 14.19it/s]
Training 4/30:  61%|██████    | 87/143 [00:09<00:02, 24.06it/s]
Training 4/30: 100%|██████████| 143/143 [00:09<00:00, 14.48it/s]


Epoch 3: train_loss=1.2246, val_loss=1.0740



Training 5/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/30:   1%|          | 1/143 [00:09<21:37,  9.13s/it]
Training 5/30:  11%|█         | 16/143 [00:09<00:52,  2.42it/s]
Training 5/30:  25%|██▌       | 36/143 [00:09<00:16,  6.67it/s]
Training 5/30:  40%|███▉      | 57/143 [00:09<00:06, 12.72it/s]
Training 5/30:  59%|█████▉    | 85/143 [00:09<00:02, 23.62it/s]
Training 5/30: 100%|██████████| 143/143 [00:10<00:00, 14.14it/s]


Epoch 4: train_loss=0.9720, val_loss=0.8665



Training 6/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/30:   1%|          | 1/143 [00:08<21:05,  8.91s/it]
Training 6/30:  10%|█         | 15/143 [00:09<00:55,  2.32it/s]
Training 6/30:  27%|██▋       | 39/143 [00:09<00:13,  7.53it/s]
Training 6/30:  43%|████▎     | 61/143 [00:09<00:05, 13.98it/s]
Training 6/30:  61%|██████    | 87/143 [00:09<00:02, 24.19it/s]
Training 6/30: 100%|██████████| 143/143 [00:09<00:00, 14.46it/s]


Epoch 5: train_loss=0.7931, val_loss=0.7247



Training 7/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/30:   1%|          | 1/143 [00:09<22:58,  9.71s/it]
Training 7/30:  12%|█▏        | 17/143 [00:09<00:52,  2.42it/s]
Training 7/30:  26%|██▌       | 37/143 [00:09<00:16,  6.43it/s]
Training 7/30:  43%|████▎     | 61/143 [00:10<00:06, 13.00it/s]
Training 7/30:  60%|██████    | 86/143 [00:10<00:02, 22.15it/s]
Training 7/30: 100%|██████████| 143/143 [00:10<00:00, 13.35it/s]


Epoch 6: train_loss=0.6605, val_loss=0.6052



Training 8/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/30:   1%|          | 1/143 [00:09<22:44,  9.61s/it]
Training 8/30:  15%|█▌        | 22/143 [00:09<00:38,  3.18it/s]
Training 8/30:  31%|███▏      | 45/143 [00:09<00:12,  7.82it/s]
Training 8/30:  48%|████▊     | 69/143 [00:09<00:05, 14.40it/s]
Training 8/30:  65%|██████▌   | 93/143 [00:10<00:02, 23.19it/s]
Training 8/30: 100%|██████████| 143/143 [00:10<00:00, 13.48it/s]


Epoch 7: train_loss=0.5583, val_loss=0.5160



Training 9/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/30:   1%|          | 1/143 [00:09<22:19,  9.43s/it]
Training 9/30:  10%|█         | 15/143 [00:09<00:58,  2.19it/s]
Training 9/30:  25%|██▌       | 36/143 [00:09<00:16,  6.53it/s]
Training 9/30:  43%|████▎     | 62/143 [00:09<00:05, 13.85it/s]
Training 9/30:  62%|██████▏   | 89/143 [00:09<00:02, 23.99it/s]
Training 9/30: 100%|██████████| 143/143 [00:10<00:00, 13.73it/s]


Epoch 8: train_loss=0.4791, val_loss=0.4518



Training 10/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/30:   1%|          | 1/143 [00:09<22:50,  9.65s/it]
Training 10/30:  12%|█▏        | 17/143 [00:09<00:51,  2.43it/s]
Training 10/30:  26%|██▌       | 37/143 [00:09<00:16,  6.46it/s]
Training 10/30:  42%|████▏     | 60/143 [00:09<00:06, 12.78it/s]
Training 10/30:  60%|██████    | 86/143 [00:10<00:02, 22.38it/s]
Training 10/30: 100%|██████████| 143/143 [00:10<00:00, 13.45it/s]


Epoch 9: train_loss=0.4168, val_loss=0.3996



Training 11/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/30:   1%|          | 1/143 [00:09<22:05,  9.34s/it]
Training 11/30:   9%|▉         | 13/143 [00:09<01:07,  1.91it/s]
Training 11/30:  25%|██▌       | 36/143 [00:09<00:15,  6.72it/s]
Training 11/30:  42%|████▏     | 60/143 [00:09<00:06, 13.53it/s]
Training 11/30:  60%|██████    | 86/143 [00:09<00:02, 23.37it/s]
Training 11/30: 100%|██████████| 143/143 [00:10<00:00, 13.86it/s]


Epoch 10: train_loss=0.3660, val_loss=0.3504



Training 12/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/30:   1%|          | 1/143 [00:09<22:15,  9.41s/it]
Training 12/30:   9%|▉         | 13/143 [00:09<01:08,  1.90it/s]
Training 12/30:  21%|██        | 30/143 [00:09<00:20,  5.41it/s]
Training 12/30:  41%|████▏     | 59/143 [00:09<00:06, 13.66it/s]
Training 12/30:  63%|██████▎   | 90/143 [00:09<00:02, 25.44it/s]
Training 12/30: 100%|██████████| 143/143 [00:10<00:00, 13.81it/s]


Epoch 11: train_loss=0.3260, val_loss=0.3156



Training 13/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/30:   1%|          | 1/143 [00:09<22:24,  9.47s/it]
Training 13/30:  10%|█         | 15/143 [00:09<00:58,  2.18it/s]
Training 13/30:  26%|██▌       | 37/143 [00:09<00:15,  6.71it/s]
Training 13/30:  42%|████▏     | 60/143 [00:09<00:06, 13.15it/s]
Training 13/30:  59%|█████▊    | 84/143 [00:09<00:02, 22.09it/s]
Training 13/30: 100%|██████████| 143/143 [00:10<00:00, 13.66it/s]


Epoch 12: train_loss=0.2932, val_loss=0.2839



Training 14/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/30:   1%|          | 1/143 [00:09<22:38,  9.57s/it]
Training 14/30:  11%|█         | 16/143 [00:09<00:55,  2.31it/s]
Training 14/30:  29%|██▊       | 41/143 [00:09<00:13,  7.40it/s]
Training 14/30:  46%|████▌     | 66/143 [00:09<00:05, 14.33it/s]
Training 14/30:  67%|██████▋   | 96/143 [00:09<00:01, 25.49it/s]
Training 14/30: 100%|██████████| 143/143 [00:10<00:00, 13.61it/s]


Epoch 13: train_loss=0.2655, val_loss=0.2613



Training 15/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/30:   1%|          | 1/143 [00:10<24:44, 10.45s/it]
Training 15/30:  14%|█▍        | 20/143 [00:10<00:46,  2.65it/s]
Training 15/30:  29%|██▉       | 42/143 [00:10<00:14,  6.75it/s]
Training 15/30:  49%|████▉     | 70/143 [00:10<00:05, 13.92it/s]
Training 15/30:  67%|██████▋   | 96/143 [00:10<00:02, 22.79it/s]
Training 15/30: 100%|██████████| 143/143 [00:11<00:00, 12.57it/s]


Epoch 14: train_loss=0.2435, val_loss=0.2418



Training 16/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/30:   1%|          | 1/143 [00:09<21:45,  9.19s/it]
Training 16/30:  12%|█▏        | 17/143 [00:09<00:49,  2.56it/s]
Training 16/30:  27%|██▋       | 38/143 [00:09<00:15,  6.99it/s]
Training 16/30:  42%|████▏     | 60/143 [00:09<00:06, 13.30it/s]
Training 16/30:  62%|██████▏   | 88/143 [00:09<00:02, 24.11it/s]
Training 16/30: 100%|██████████| 143/143 [00:10<00:00, 14.11it/s]


Epoch 15: train_loss=0.2247, val_loss=0.2238



Training 17/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/30:   1%|          | 1/143 [00:09<21:27,  9.07s/it]
Training 17/30:  15%|█▍        | 21/143 [00:09<00:38,  3.21it/s]
Training 17/30:  29%|██▊       | 41/143 [00:09<00:13,  7.47it/s]
Training 17/30:  45%|████▍     | 64/143 [00:09<00:05, 14.15it/s]
Training 17/30:  65%|██████▌   | 93/143 [00:09<00:01, 25.48it/s]
Training 17/30: 100%|██████████| 143/143 [00:10<00:00, 14.27it/s]


Epoch 16: train_loss=0.2105, val_loss=0.2086



Training 18/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/30:   1%|          | 1/143 [00:09<21:40,  9.16s/it]
Training 18/30:  11%|█         | 16/143 [00:09<00:52,  2.41it/s]
Training 18/30:  24%|██▍       | 34/143 [00:09<00:17,  6.22it/s]
Training 18/30:  39%|███▉      | 56/143 [00:09<00:06, 12.58it/s]
Training 18/30:  57%|█████▋    | 81/143 [00:09<00:02, 22.24it/s]
Training 18/30: 100%|██████████| 143/143 [00:10<00:00, 14.06it/s]


Epoch 17: train_loss=0.1958, val_loss=0.1960



Training 19/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/30:   1%|          | 1/143 [00:09<22:18,  9.42s/it]
Training 19/30:  14%|█▍        | 20/143 [00:09<00:41,  2.94it/s]
Training 19/30:  29%|██▊       | 41/143 [00:09<00:14,  7.25it/s]
Training 19/30:  48%|████▊     | 68/143 [00:09<00:05, 14.85it/s]
Training 19/30: 100%|██████████| 143/143 [00:10<00:00, 13.83it/s]


Epoch 18: train_loss=0.1850, val_loss=0.1874



Training 20/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/30:   1%|          | 1/143 [00:09<21:20,  9.02s/it]
Training 20/30:  10%|█         | 15/143 [00:09<00:55,  2.29it/s]
Training 20/30:  25%|██▌       | 36/143 [00:09<00:15,  6.82it/s]
Training 20/30:  42%|████▏     | 60/143 [00:09<00:05, 13.84it/s]
Training 20/30:  64%|██████▎   | 91/143 [00:09<00:01, 26.03it/s]
Training 20/30: 100%|██████████| 143/143 [00:09<00:00, 14.32it/s]


Epoch 19: train_loss=0.1761, val_loss=0.1794



Training 21/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/30:   1%|          | 1/143 [00:08<21:14,  8.97s/it]
Training 21/30:  12%|█▏        | 17/143 [00:09<00:48,  2.62it/s]
Training 21/30:  27%|██▋       | 38/143 [00:09<00:14,  7.15it/s]
Training 21/30:  46%|████▌     | 66/143 [00:09<00:04, 15.41it/s]
Training 21/30:  66%|██████▌   | 94/143 [00:09<00:01, 26.34it/s]
Training 21/30: 100%|██████████| 143/143 [00:09<00:00, 14.39it/s]


Epoch 20: train_loss=0.1676, val_loss=0.1721



Training 22/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/30:   1%|          | 1/143 [00:09<21:36,  9.13s/it]
Training 22/30:  13%|█▎        | 19/143 [00:09<00:43,  2.88it/s]
Training 22/30:  27%|██▋       | 39/143 [00:09<00:14,  7.12it/s]
Training 22/30:  40%|███▉      | 57/143 [00:09<00:07, 12.27it/s]
Training 22/30:  57%|█████▋    | 81/143 [00:09<00:02, 21.56it/s]
Training 22/30: 100%|██████████| 143/143 [00:10<00:00, 14.11it/s]


Epoch 21: train_loss=0.1622, val_loss=0.1662



Training 23/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/30:   1%|          | 1/143 [00:09<22:49,  9.64s/it]
Training 23/30:  13%|█▎        | 18/143 [00:09<00:48,  2.58it/s]
Training 23/30:  29%|██▉       | 42/143 [00:09<00:13,  7.42it/s]
Training 23/30:  48%|████▊     | 68/143 [00:09<00:05, 14.57it/s]
Training 23/30:  68%|██████▊   | 97/143 [00:10<00:01, 25.27it/s]
Training 23/30: 100%|██████████| 143/143 [00:10<00:00, 13.52it/s]


Epoch 22: train_loss=0.1561, val_loss=0.1596



Training 24/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/30:   1%|          | 1/143 [00:09<22:33,  9.53s/it]
Training 24/30:  12%|█▏        | 17/143 [00:09<00:51,  2.47it/s]
Training 24/30:  28%|██▊       | 40/143 [00:09<00:14,  7.16it/s]
Training 24/30:  46%|████▌     | 66/143 [00:09<00:05, 14.39it/s]
Training 24/30:  66%|██████▋   | 95/143 [00:09<00:01, 25.18it/s]
Training 24/30: 100%|██████████| 143/143 [00:10<00:00, 13.68it/s]


Epoch 23: train_loss=0.1526, val_loss=0.1521



Training 25/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/30:   1%|          | 1/143 [00:09<21:52,  9.24s/it]
Training 25/30:  11%|█         | 16/143 [00:09<00:53,  2.39it/s]
Training 25/30:  24%|██▍       | 35/143 [00:09<00:16,  6.38it/s]
Training 25/30:  39%|███▉      | 56/143 [00:09<00:07, 12.38it/s]
Training 25/30:  62%|██████▏   | 88/143 [00:09<00:02, 24.81it/s]
Training 25/30: 100%|██████████| 143/143 [00:10<00:00, 14.00it/s]


Epoch 24: train_loss=0.1480, val_loss=0.1560



Training 26/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/30:   1%|          | 1/143 [00:09<21:22,  9.03s/it]
Training 26/30:   9%|▉         | 13/143 [00:09<01:05,  1.98it/s]
Training 26/30:  24%|██▍       | 34/143 [00:09<00:16,  6.50it/s]
Training 26/30:  40%|███▉      | 57/143 [00:09<00:06, 13.23it/s]
Training 26/30:  56%|█████▌    | 80/143 [00:09<00:02, 22.16it/s]
Training 26/30: 100%|██████████| 143/143 [00:10<00:00, 14.26it/s]


Epoch 25: train_loss=0.1453, val_loss=0.1457



Training 27/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/30:   1%|          | 1/143 [00:09<22:48,  9.63s/it]
Training 27/30:  13%|█▎        | 18/143 [00:09<00:48,  2.59it/s]
Training 27/30:  26%|██▌       | 37/143 [00:09<00:16,  6.41it/s]
Training 27/30:  42%|████▏     | 60/143 [00:09<00:06, 12.74it/s]
Training 27/30:  59%|█████▉    | 85/143 [00:10<00:02, 21.91it/s]
Training 27/30: 100%|██████████| 143/143 [00:10<00:00, 13.49it/s]


Epoch 26: train_loss=0.1425, val_loss=0.1442



Training 28/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/30:   1%|          | 1/143 [00:09<22:19,  9.43s/it]
Training 28/30:  15%|█▍        | 21/143 [00:09<00:39,  3.09it/s]
Training 28/30:  29%|██▉       | 42/143 [00:09<00:13,  7.40it/s]
Training 28/30:  46%|████▌     | 66/143 [00:09<00:05, 14.12it/s]
Training 28/30:  65%|██████▌   | 93/143 [00:09<00:02, 24.24it/s]
Training 28/30: 100%|██████████| 143/143 [00:10<00:00, 13.71it/s]


Epoch 27: train_loss=0.1401, val_loss=0.1496



Training 29/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/30:   1%|          | 1/143 [00:09<21:43,  9.18s/it]
Training 29/30:  12%|█▏        | 17/143 [00:09<00:49,  2.56it/s]
Training 29/30:  27%|██▋       | 39/143 [00:09<00:14,  7.20it/s]
Training 29/30:  45%|████▍     | 64/143 [00:09<00:05, 14.40it/s]
Training 29/30:  64%|██████▎   | 91/143 [00:09<00:02, 24.76it/s]
Training 29/30: 100%|██████████| 143/143 [00:10<00:00, 14.09it/s]


Epoch 28: train_loss=0.1382, val_loss=0.1412



Training 30/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/30:   1%|          | 1/143 [00:10<23:52, 10.09s/it]
Training 30/30:  14%|█▍        | 20/143 [00:10<00:44,  2.75it/s]
Training 30/30:  27%|██▋       | 39/143 [00:10<00:16,  6.41it/s]
Training 30/30:  41%|████      | 58/143 [00:10<00:07, 11.38it/s]
Training 30/30:  58%|█████▊    | 83/143 [00:10<00:02, 20.25it/s]
Training 30/30: 100%|██████████| 143/143 [00:11<00:00, 12.78it/s]


Epoch 29: train_loss=0.1378, val_loss=0.1408


2025-06-02 20:24:03,608 - __main__ - INFO - Saved linear probe for layer 11 to cache\probes\phase1_supervisedVIT_viewpoint_probing\linear_layer_11_probe.pth
2025-06-02 20:24:23,128 - __main__ - INFO - Running mlp probe on layer 11...
2025-06-02 20:24:23,128 - __main__ - INFO - Running mlp probe on layer 11 (feature_dim: 768)

Training 1/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 1/40:   1%|          | 1/143 [00:09<21:21,  9.03s/it]
Training 1/40:   6%|▋         | 9/143 [00:09<01:38,  1.35it/s]
Training 1/40:  15%|█▍        | 21/143 [00:09<00:31,  3.93it/s]
Training 1/40:  25%|██▌       | 36/143 [00:09<00:12,  8.32it/s]
Training 1/40:  36%|███▋      | 52/143 [00:09<00:06, 14.57it/s]
Training 1/40:  52%|█████▏    | 74/143 [00:09<00:02, 25.97it/s]
Training 1/40:  65%|██████▌   | 93/143 [00:09<00:01, 37.96it/s]
Training 1/40: 100%|██████████| 143/143 [00:10<00:00, 13.97it/s]


Epoch 0: train_loss=3.0006, val_loss=0.1796



Training 2/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/40:   1%|          | 1/143 [00:09<23:29,  9.92s/it]
Training 2/40:   7%|▋         | 10/143 [00:10<01:36,  1.38it/s]
Training 2/40:  14%|█▍        | 20/143 [00:10<00:36,  3.33it/s]
Training 2/40:  22%|██▏       | 32/143 [00:10<00:16,  6.54it/s]
Training 2/40:  32%|███▏      | 46/143 [00:10<00:08, 11.58it/s]
Training 2/40:  43%|████▎     | 62/143 [00:10<00:04, 19.17it/s]
Training 2/40:  60%|██████    | 86/143 [00:10<00:01, 34.15it/s]
Training 2/40:  77%|███████▋  | 110/143 [00:10<00:00, 52.22it/s]
Training 2/40: 100%|██████████| 143/143 [00:11<00:00, 12.73it/s]


Epoch 1: train_loss=0.1562, val_loss=0.1460



Training 3/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/40:   1%|          | 1/143 [00:09<22:01,  9.31s/it]
Training 3/40:   8%|▊         | 11/143 [00:09<01:21,  1.62it/s]
Training 3/40:  15%|█▌        | 22/143 [00:09<00:31,  3.90it/s]
Training 3/40: 100%|██████████| 143/143 [00:10<00:00, 14.14it/s]


Epoch 2: train_loss=0.1393, val_loss=0.1346



Training 4/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/40:   1%|          | 1/143 [00:09<21:19,  9.01s/it]
Training 4/40:   8%|▊         | 12/143 [00:09<01:11,  1.83it/s]
Training 4/40:  15%|█▌        | 22/143 [00:09<00:30,  3.97it/s]
Training 4/40:  25%|██▌       | 36/143 [00:09<00:13,  8.07it/s]
Training 4/40:  35%|███▍      | 50/143 [00:09<00:06, 13.50it/s]
Training 4/40:  48%|████▊     | 69/143 [00:09<00:03, 23.26it/s]
Training 4/40:  63%|██████▎   | 90/143 [00:09<00:01, 36.86it/s]
Training 4/40:  81%|████████  | 116/143 [00:09<00:00, 57.90it/s]
Training 4/40: 100%|██████████| 143/143 [00:10<00:00, 13.93it/s]


Epoch 3: train_loss=0.1311, val_loss=0.1285



Training 5/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/40:   1%|          | 1/143 [00:09<21:40,  9.16s/it]
Training 5/40:   8%|▊         | 11/143 [00:09<01:20,  1.64it/s]
Training 5/40:  15%|█▍        | 21/143 [00:09<00:32,  3.75it/s]
Training 5/40:  24%|██▍       | 35/143 [00:09<00:13,  7.79it/s]
Training 5/40:  34%|███▎      | 48/143 [00:09<00:07, 12.74it/s]
Training 5/40:  46%|████▌     | 66/143 [00:09<00:03, 21.87it/s]
Training 5/40:  65%|██████▌   | 93/143 [00:09<00:01, 39.82it/s]
Training 5/40: 100%|██████████| 143/143 [00:10<00:00, 13.76it/s]


Epoch 4: train_loss=0.1264, val_loss=0.1263



Training 6/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/40:   1%|          | 1/143 [00:09<22:14,  9.40s/it]
Training 6/40:   6%|▋         | 9/143 [00:09<01:42,  1.30it/s]
Training 6/40:  14%|█▍        | 20/143 [00:09<00:34,  3.57it/s]
Training 6/40:  22%|██▏       | 32/143 [00:09<00:15,  6.94it/s]
Training 6/40:  31%|███▏      | 45/143 [00:09<00:08, 11.82it/s]
Training 6/40:  43%|████▎     | 61/143 [00:09<00:04, 19.77it/s]
Training 6/40:  57%|█████▋    | 81/143 [00:10<00:01, 32.51it/s]
Training 6/40:  71%|███████▏  | 102/143 [00:10<00:00, 48.78it/s]
Training 6/40: 100%|██████████| 143/143 [00:10<00:00, 13.38it/s]


Epoch 5: train_loss=0.1238, val_loss=0.1236



Training 7/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/40:   1%|          | 1/143 [00:08<21:15,  8.98s/it]
Training 7/40:   8%|▊         | 11/143 [00:09<01:18,  1.68it/s]
Training 7/40:  15%|█▌        | 22/143 [00:09<00:29,  4.04it/s]
Training 7/40:  24%|██▍       | 35/143 [00:09<00:13,  7.85it/s]
Training 7/40:  38%|███▊      | 54/143 [00:09<00:05, 15.36it/s]
Training 7/40:  53%|█████▎    | 76/143 [00:09<00:02, 26.73it/s]
Training 7/40:  69%|██████▉   | 99/143 [00:09<00:01, 41.59it/s]
Training 7/40: 100%|██████████| 143/143 [00:10<00:00, 14.05it/s]


Epoch 6: train_loss=0.1212, val_loss=0.1217



Training 8/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/40:   1%|          | 1/143 [00:09<21:32,  9.10s/it]
Training 8/40:   8%|▊         | 12/143 [00:09<01:12,  1.81it/s]
Training 8/40:  17%|█▋        | 25/143 [00:09<00:25,  4.57it/s]
Training 8/40:  26%|██▌       | 37/143 [00:09<00:13,  8.00it/s]
Training 8/40:  38%|███▊      | 54/143 [00:09<00:06, 14.61it/s]
Training 8/40:  51%|█████     | 73/143 [00:09<00:02, 24.23it/s]
Training 8/40:  65%|██████▌   | 93/143 [00:09<00:01, 36.95it/s]
Training 8/40: 100%|██████████| 143/143 [00:10<00:00, 13.85it/s]


Epoch 7: train_loss=0.1187, val_loss=0.1217



Training 9/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/40:   1%|          | 1/143 [00:09<21:51,  9.24s/it]
Training 9/40:   8%|▊         | 11/143 [00:09<01:20,  1.63it/s]
Training 9/40:  16%|█▌        | 23/143 [00:09<00:28,  4.15it/s]
Training 9/40:  26%|██▌       | 37/143 [00:09<00:13,  8.13it/s]
Training 9/40:  38%|███▊      | 54/143 [00:09<00:06, 14.64it/s]
Training 9/40:  50%|█████     | 72/143 [00:09<00:03, 23.60it/s]
Training 9/40:  67%|██████▋   | 96/143 [00:09<00:01, 39.12it/s]
Training 9/40: 100%|██████████| 143/143 [00:10<00:00, 13.74it/s]


Epoch 8: train_loss=0.1167, val_loss=0.1211



Training 10/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/40:   1%|          | 1/143 [00:09<21:21,  9.03s/it]
Training 10/40:   8%|▊         | 11/143 [00:09<01:19,  1.67it/s]
Training 10/40:  17%|█▋        | 25/143 [00:09<00:25,  4.67it/s]
Training 10/40:  28%|██▊       | 40/143 [00:09<00:11,  9.04it/s]
Training 10/40:  40%|███▉      | 57/143 [00:09<00:05, 15.65it/s]
Training 10/40:  55%|█████▌    | 79/143 [00:09<00:02, 26.93it/s]
Training 10/40:  73%|███████▎  | 105/143 [00:09<00:00, 43.96it/s]
Training 10/40: 100%|██████████| 143/143 [00:10<00:00, 13.96it/s]


Epoch 9: train_loss=0.1150, val_loss=0.1177



Training 11/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/40:   1%|          | 1/143 [00:09<22:12,  9.38s/it]
Training 11/40:   8%|▊         | 11/143 [00:09<01:22,  1.60it/s]
Training 11/40:  17%|█▋        | 24/143 [00:09<00:27,  4.30it/s]
Training 11/40:  27%|██▋       | 39/143 [00:09<00:12,  8.53it/s]
Training 11/40:  39%|███▉      | 56/143 [00:09<00:05, 14.96it/s]
Training 11/40:  57%|█████▋    | 82/143 [00:09<00:02, 28.14it/s]
Training 11/40:  72%|███████▏  | 103/143 [00:09<00:00, 41.08it/s]
Training 11/40: 100%|██████████| 143/143 [00:10<00:00, 13.60it/s]


Epoch 10: train_loss=0.1134, val_loss=0.1182



Training 12/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/40:   1%|          | 1/143 [00:09<21:36,  9.13s/it]
Training 12/40:   8%|▊         | 11/143 [00:09<01:20,  1.65it/s]
Training 12/40:  17%|█▋        | 24/143 [00:09<00:27,  4.40it/s]
Training 12/40:  26%|██▌       | 37/143 [00:09<00:13,  8.13it/s]
Training 12/40:  38%|███▊      | 54/143 [00:09<00:06, 14.68it/s]
Training 12/40:  52%|█████▏    | 74/143 [00:09<00:02, 24.78it/s]
Training 12/40:  69%|██████▊   | 98/143 [00:09<00:01, 40.30it/s]
Training 12/40: 100%|██████████| 143/143 [00:10<00:00, 13.82it/s]


Epoch 11: train_loss=0.1119, val_loss=0.1155



Training 13/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/40:   1%|          | 1/143 [00:09<21:55,  9.26s/it]
Training 13/40:   6%|▋         | 9/143 [00:09<01:41,  1.32it/s]
Training 13/40:  14%|█▍        | 20/143 [00:09<00:33,  3.63it/s]
Training 13/40:  25%|██▌       | 36/143 [00:09<00:13,  8.21it/s]
Training 13/40:  36%|███▋      | 52/143 [00:09<00:06, 14.31it/s]
Training 13/40:  51%|█████     | 73/143 [00:09<00:02, 24.89it/s]
Training 13/40:  69%|██████▉   | 99/143 [00:09<00:01, 41.75it/s]
Training 13/40: 100%|██████████| 143/143 [00:10<00:00, 13.67it/s]


Epoch 12: train_loss=0.1097, val_loss=0.1144



Training 14/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/40:   1%|          | 1/143 [00:09<21:40,  9.16s/it]
Training 14/40:   7%|▋         | 10/143 [00:09<01:29,  1.49it/s]
Training 14/40:  15%|█▍        | 21/143 [00:09<00:31,  3.82it/s]
Training 14/40:  22%|██▏       | 32/143 [00:09<00:15,  6.97it/s]
Training 14/40:  32%|███▏      | 46/143 [00:09<00:07, 12.39it/s]
Training 14/40:  45%|████▌     | 65/143 [00:09<00:03, 22.07it/s]
Training 14/40:  60%|██████    | 86/143 [00:09<00:01, 35.67it/s]
Training 14/40:  76%|███████▌  | 108/143 [00:09<00:00, 52.93it/s]
Training 14/40: 100%|██████████| 143/143 [00:10<00:00, 13.68it/s]


Epoch 13: train_loss=0.1084, val_loss=0.1141



Training 15/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/40:   1%|          | 1/143 [00:09<21:20,  9.02s/it]
Training 15/40:   8%|▊         | 11/143 [00:09<01:19,  1.67it/s]
Training 15/40:  15%|█▍        | 21/143 [00:09<00:32,  3.80it/s]
Training 15/40:  25%|██▌       | 36/143 [00:09<00:13,  8.20it/s]
Training 15/40:  36%|███▋      | 52/143 [00:09<00:06, 14.44it/s]
Training 15/40:  50%|█████     | 72/143 [00:09<00:02, 24.70it/s]
Training 15/40:  66%|██████▌   | 94/143 [00:09<00:01, 39.01it/s]
Training 15/40: 100%|██████████| 143/143 [00:10<00:00, 14.00it/s]


Epoch 14: train_loss=0.1068, val_loss=0.1135



Training 16/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/40:   1%|          | 1/143 [00:09<21:39,  9.15s/it]
Training 16/40:   7%|▋         | 10/143 [00:09<01:29,  1.49it/s]
Training 16/40:  16%|█▌        | 23/143 [00:09<00:28,  4.24it/s]
Training 16/40:  25%|██▌       | 36/143 [00:09<00:13,  7.98it/s]
Training 16/40:  38%|███▊      | 54/143 [00:09<00:05, 14.97it/s]
Training 16/40:  51%|█████     | 73/143 [00:09<00:02, 24.57it/s]
Training 16/40:  68%|██████▊   | 97/143 [00:09<00:01, 40.22it/s]
Training 16/40: 100%|██████████| 143/143 [00:10<00:00, 13.87it/s]


Epoch 15: train_loss=0.1049, val_loss=0.1159



Training 17/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/40:   1%|          | 1/143 [00:09<21:44,  9.18s/it]
Training 17/40:   6%|▋         | 9/143 [00:09<01:40,  1.33it/s]
Training 17/40:  14%|█▍        | 20/143 [00:09<00:33,  3.65it/s]
Training 17/40:  25%|██▌       | 36/143 [00:09<00:12,  8.26it/s]
Training 17/40:  35%|███▍      | 50/143 [00:09<00:06, 13.57it/s]
Training 17/40:  50%|████▉     | 71/143 [00:09<00:02, 24.29it/s]
Training 17/40:  68%|██████▊   | 97/143 [00:09<00:01, 41.28it/s]
Training 17/40: 100%|██████████| 143/143 [00:10<00:00, 13.77it/s]


Epoch 16: train_loss=0.1043, val_loss=0.1128



Training 18/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/40:   1%|          | 1/143 [00:08<21:12,  8.96s/it]
Training 18/40:   8%|▊         | 11/143 [00:09<01:18,  1.68it/s]
Training 18/40:  16%|█▌        | 23/143 [00:09<00:28,  4.27it/s]
Training 18/40:  27%|██▋       | 38/143 [00:09<00:12,  8.69it/s]
Training 18/40:  38%|███▊      | 54/143 [00:09<00:05, 14.96it/s]
Training 18/40:  55%|█████▍    | 78/143 [00:09<00:02, 27.49it/s]
Training 18/40:  73%|███████▎  | 105/143 [00:09<00:00, 45.35it/s]
Training 18/40: 100%|██████████| 143/143 [00:10<00:00, 14.15it/s]


Epoch 17: train_loss=0.1016, val_loss=0.1113



Training 19/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/40:   1%|          | 1/143 [00:09<21:31,  9.10s/it]
Training 19/40:   6%|▋         | 9/143 [00:09<01:39,  1.34it/s]
Training 19/40:  13%|█▎        | 19/143 [00:09<00:35,  3.47it/s]
Training 19/40:  22%|██▏       | 31/143 [00:09<00:16,  6.96it/s]
Training 19/40:  33%|███▎      | 47/143 [00:09<00:07, 13.20it/s]
Training 19/40:  45%|████▌     | 65/143 [00:09<00:03, 22.38it/s]
Training 19/40:  62%|██████▏   | 88/143 [00:09<00:01, 37.43it/s]
Training 19/40:  78%|███████▊  | 111/143 [00:09<00:00, 55.49it/s]
Training 19/40: 100%|██████████| 143/143 [00:10<00:00, 13.82it/s]


Epoch 18: train_loss=0.1000, val_loss=0.1111



Training 20/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/40:   1%|          | 1/143 [00:09<21:57,  9.28s/it]
Training 20/40:   8%|▊         | 11/143 [00:09<01:21,  1.62it/s]
Training 20/40:  15%|█▌        | 22/143 [00:09<00:30,  3.91it/s]
Training 20/40:  26%|██▌       | 37/143 [00:09<00:12,  8.18it/s]
Training 20/40:  41%|████▏     | 59/143 [00:09<00:05, 16.64it/s]
Training 20/40:  55%|█████▌    | 79/143 [00:09<00:02, 26.55it/s]
Training 20/40:  69%|██████▊   | 98/143 [00:09<00:01, 38.17it/s]
Training 20/40: 100%|██████████| 143/143 [00:10<00:00, 13.62it/s]


Epoch 19: train_loss=0.0981, val_loss=0.1107



Training 21/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/40:   1%|          | 1/143 [00:09<21:45,  9.20s/it]
Training 21/40:   7%|▋         | 10/143 [00:09<01:29,  1.48it/s]
Training 21/40:  15%|█▍        | 21/143 [00:09<00:32,  3.80it/s]
Training 21/40:  24%|██▍       | 34/143 [00:09<00:14,  7.53it/s]
Training 21/40:  36%|███▋      | 52/143 [00:09<00:06, 14.51it/s]
Training 21/40:  50%|████▉     | 71/143 [00:09<00:02, 24.09it/s]
Training 21/40:  65%|██████▌   | 93/143 [00:09<00:01, 38.27it/s]
Training 21/40: 100%|██████████| 143/143 [00:10<00:00, 13.70it/s]


Epoch 20: train_loss=0.0968, val_loss=0.1098



Training 22/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/40:   1%|          | 1/143 [00:09<22:16,  9.41s/it]
Training 22/40:   8%|▊         | 12/143 [00:09<01:14,  1.75it/s]
Training 22/40:  17%|█▋        | 24/143 [00:09<00:28,  4.21it/s]
Training 22/40:  27%|██▋       | 38/143 [00:09<00:12,  8.14it/s]
Training 22/40:  36%|███▌      | 51/143 [00:09<00:07, 12.98it/s]
Training 22/40:  48%|████▊     | 69/143 [00:09<00:03, 21.91it/s]
Training 22/40:  62%|██████▏   | 88/143 [00:10<00:01, 33.83it/s]
Training 22/40:  79%|███████▉  | 113/143 [00:10<00:00, 53.61it/s]
Training 22/40: 100%|██████████| 143/143 [00:10<00:00, 13.44it/s]


Epoch 21: train_loss=0.0944, val_loss=0.1108



Training 23/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/40:   1%|          | 1/143 [00:09<22:14,  9.40s/it]
Training 23/40:   6%|▋         | 9/143 [00:09<01:42,  1.30it/s]
Training 23/40:  14%|█▍        | 20/143 [00:09<00:34,  3.57it/s]
Training 23/40:  23%|██▎       | 33/143 [00:09<00:15,  7.23it/s]
Training 23/40:  32%|███▏      | 46/143 [00:09<00:08, 12.09it/s]
Training 23/40:  44%|████▍     | 63/143 [00:09<00:03, 20.50it/s]
Training 23/40:  62%|██████▏   | 88/143 [00:10<00:01, 36.64it/s]
Training 23/40:  77%|███████▋  | 110/143 [00:10<00:00, 53.29it/s]
Training 23/40: 100%|██████████| 143/143 [00:10<00:00, 13.43it/s]


Epoch 22: train_loss=0.0931, val_loss=0.1106



Training 24/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/40:   1%|          | 1/143 [00:09<22:05,  9.34s/it]
Training 24/40:   8%|▊         | 11/143 [00:09<01:21,  1.61it/s]
Training 24/40:  15%|█▍        | 21/143 [00:09<00:33,  3.68it/s]
Training 24/40: 100%|██████████| 143/143 [00:10<00:00, 14.06it/s]


Epoch 23: train_loss=0.0903, val_loss=0.1088



Training 25/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/40:   1%|          | 1/143 [00:08<21:08,  8.93s/it]
Training 25/40:   8%|▊         | 11/143 [00:09<01:18,  1.68it/s]
Training 25/40:  16%|█▌        | 23/143 [00:09<00:28,  4.28it/s]
Training 25/40:  26%|██▌       | 37/143 [00:09<00:12,  8.40it/s]
Training 25/40:  38%|███▊      | 55/143 [00:09<00:05, 15.54it/s]
Training 25/40:  52%|█████▏    | 75/143 [00:09<00:02, 25.82it/s]
Training 25/40:  66%|██████▋   | 95/143 [00:09<00:01, 38.66it/s]
Training 25/40: 100%|██████████| 143/143 [00:10<00:00, 14.10it/s]


Epoch 24: train_loss=0.0883, val_loss=0.1094



Training 26/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/40:   1%|          | 1/143 [00:09<21:55,  9.26s/it]
Training 26/40:   8%|▊         | 11/143 [00:09<01:21,  1.62it/s]
Training 26/40:  17%|█▋        | 25/143 [00:09<00:25,  4.55it/s]
Training 26/40:  27%|██▋       | 39/143 [00:09<00:12,  8.53it/s]
Training 26/40:  40%|███▉      | 57/143 [00:09<00:05, 15.41it/s]
Training 26/40:  53%|█████▎    | 76/143 [00:09<00:02, 24.87it/s]
Training 26/40:  69%|██████▉   | 99/143 [00:09<00:01, 39.55it/s]
Training 26/40: 100%|██████████| 143/143 [00:10<00:00, 13.74it/s]


Epoch 25: train_loss=0.0866, val_loss=0.1146



Training 27/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/40:   1%|          | 1/143 [00:09<21:49,  9.22s/it]
Training 27/40:   7%|▋         | 10/143 [00:09<01:30,  1.48it/s]
Training 27/40:  16%|█▌        | 23/143 [00:09<00:28,  4.21it/s]
Training 27/40:  26%|██▌       | 37/143 [00:09<00:12,  8.21it/s]
Training 27/40:  39%|███▉      | 56/143 [00:09<00:05, 15.55it/s]
Training 27/40:  55%|█████▍    | 78/143 [00:09<00:02, 26.63it/s]
Training 27/40:  69%|██████▉   | 99/143 [00:09<00:01, 39.75it/s]
Training 27/40: 100%|██████████| 143/143 [00:10<00:00, 13.74it/s]


Epoch 26: train_loss=0.0842, val_loss=0.1073



Training 28/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/40:   1%|          | 1/143 [00:09<21:36,  9.13s/it]
Training 28/40:   7%|▋         | 10/143 [00:09<01:29,  1.49it/s]
Training 28/40:  15%|█▌        | 22/143 [00:09<00:29,  4.04it/s]
Training 28/40:  24%|██▍       | 34/143 [00:09<00:14,  7.48it/s]
Training 28/40:  36%|███▋      | 52/143 [00:09<00:06, 14.50it/s]
Training 28/40:  49%|████▉     | 70/143 [00:09<00:03, 23.56it/s]
Training 28/40:  67%|██████▋   | 96/143 [00:09<00:01, 40.69it/s]
Training 28/40: 100%|██████████| 143/143 [00:10<00:00, 13.77it/s]


Epoch 27: train_loss=0.0809, val_loss=0.1086



Training 29/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/40:   1%|          | 1/143 [00:09<22:16,  9.41s/it]
Training 29/40:   8%|▊         | 12/143 [00:09<01:14,  1.75it/s]
Training 29/40:  17%|█▋        | 24/143 [00:09<00:28,  4.22it/s]
Training 29/40:  24%|██▍       | 35/143 [00:09<00:14,  7.27it/s]
Training 29/40:  36%|███▋      | 52/143 [00:09<00:06, 13.73it/s]
Training 29/40:  50%|████▉     | 71/143 [00:09<00:03, 23.16it/s]
Training 29/40:  65%|██████▌   | 93/143 [00:10<00:01, 37.16it/s]
Training 29/40: 100%|██████████| 143/143 [00:10<00:00, 13.53it/s]


Epoch 28: train_loss=0.0787, val_loss=0.1094



Training 30/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/40:   1%|          | 1/143 [00:09<22:22,  9.45s/it]
Training 30/40:   6%|▋         | 9/143 [00:09<01:43,  1.29it/s]
Training 30/40:  14%|█▍        | 20/143 [00:09<00:34,  3.55it/s]
Training 30/40:  24%|██▍       | 35/143 [00:09<00:13,  7.77it/s]
Training 30/40:  34%|███▍      | 49/143 [00:09<00:07, 13.00it/s]
Training 30/40:  50%|████▉     | 71/143 [00:09<00:02, 24.02it/s]
Training 30/40:  64%|██████▎   | 91/143 [00:10<00:01, 36.40it/s]
Training 30/40: 100%|██████████| 143/143 [00:10<00:00, 13.41it/s]


Epoch 29: train_loss=0.0777, val_loss=0.1098



Training 31/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 31/40:   1%|          | 1/143 [00:09<21:19,  9.01s/it]
Training 31/40:   8%|▊         | 12/143 [00:09<01:11,  1.83it/s]
Training 31/40:  18%|█▊        | 26/143 [00:09<00:24,  4.83it/s]
Training 31/40:  27%|██▋       | 39/143 [00:09<00:12,  8.60it/s]
Training 31/40:  41%|████      | 58/143 [00:09<00:05, 16.05it/s]
Training 31/40:  54%|█████▍    | 77/143 [00:09<00:02, 25.65it/s]
Training 31/40:  70%|██████▉   | 100/143 [00:09<00:01, 40.53it/s]
Training 31/40: 100%|██████████| 143/143 [00:10<00:00, 14.09it/s]


Epoch 30: train_loss=0.0749, val_loss=0.1097



Training 32/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 32/40:   1%|          | 1/143 [00:08<21:10,  8.94s/it]
Training 32/40:   6%|▋         | 9/143 [00:09<01:38,  1.37it/s]
Training 32/40:  13%|█▎        | 19/143 [00:09<00:35,  3.53it/s]
Training 32/40:  24%|██▍       | 34/143 [00:09<00:13,  7.96it/s]
Training 32/40:  35%|███▍      | 50/143 [00:09<00:06, 14.25it/s]
Training 32/40:  48%|████▊     | 68/143 [00:09<00:03, 23.51it/s]
Training 32/40:  62%|██████▏   | 88/143 [00:09<00:01, 36.46it/s]
Training 32/40:  84%|████████▍ | 120/143 [00:09<00:00, 63.16it/s]
Training 32/40: 100%|██████████| 143/143 [00:10<00:00, 14.03it/s]


Epoch 31: train_loss=0.0726, val_loss=0.1118



Training 33/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 33/40:   1%|          | 1/143 [00:09<22:34,  9.54s/it]
Training 33/40:   8%|▊         | 12/143 [00:09<01:15,  1.72it/s]
Training 33/40:  17%|█▋        | 24/143 [00:09<00:28,  4.16it/s]
Training 33/40:  27%|██▋       | 38/143 [00:09<00:13,  8.03it/s]
Training 33/40:  39%|███▉      | 56/143 [00:09<00:05, 14.76it/s]
Training 33/40:  54%|█████▍    | 77/143 [00:10<00:02, 25.10it/s]
Training 33/40:  71%|███████   | 101/143 [00:10<00:01, 40.00it/s]
Training 33/40: 100%|██████████| 143/143 [00:10<00:00, 13.31it/s]


Epoch 32: train_loss=0.0700, val_loss=0.1095



Training 34/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 34/40:   1%|          | 1/143 [00:09<23:10,  9.79s/it]
Training 34/40:   6%|▋         | 9/143 [00:09<01:47,  1.25it/s]
Training 34/40:  14%|█▍        | 20/143 [00:10<00:35,  3.43it/s]
Training 34/40:  22%|██▏       | 32/143 [00:10<00:16,  6.68it/s]
Training 34/40:  38%|███▊      | 54/143 [00:10<00:06, 14.83it/s]
Training 34/40:  52%|█████▏    | 74/143 [00:10<00:02, 24.40it/s]
Training 34/40:  67%|██████▋   | 96/143 [00:10<00:01, 37.86it/s]
Training 34/40: 100%|██████████| 143/143 [00:10<00:00, 13.00it/s]


Epoch 33: train_loss=0.0698, val_loss=0.1094



Training 35/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 35/40:   1%|          | 1/143 [00:10<24:01, 10.15s/it]
Training 35/40:   8%|▊         | 11/143 [00:10<01:28,  1.48it/s]
Training 35/40:  15%|█▌        | 22/143 [00:10<00:33,  3.59it/s]
Training 35/40:  25%|██▌       | 36/143 [00:10<00:14,  7.27it/s]
Training 35/40:  35%|███▍      | 50/143 [00:10<00:07, 12.18it/s]
Training 35/40:  50%|█████     | 72/143 [00:10<00:03, 22.62it/s]
Training 35/40:  66%|██████▋   | 95/143 [00:10<00:01, 36.45it/s]
Training 35/40: 100%|██████████| 143/143 [00:11<00:00, 12.58it/s]


Epoch 34: train_loss=0.0671, val_loss=0.1119



Training 36/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 36/40:   1%|          | 1/143 [00:08<21:04,  8.90s/it]
Training 36/40:   7%|▋         | 10/143 [00:09<01:26,  1.53it/s]
Training 36/40:  14%|█▍        | 20/143 [00:09<00:33,  3.70it/s]
Training 36/40:  22%|██▏       | 31/143 [00:09<00:16,  6.94it/s]
Training 36/40:  36%|███▋      | 52/143 [00:09<00:05, 15.40it/s]
Training 36/40:  50%|████▉     | 71/143 [00:09<00:02, 25.18it/s]
Training 36/40:  67%|██████▋   | 96/143 [00:09<00:01, 41.79it/s]
Training 36/40: 100%|██████████| 143/143 [00:10<00:00, 14.03it/s]


Epoch 35: train_loss=0.0650, val_loss=0.1128



Training 37/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 37/40:   1%|          | 1/143 [00:09<21:21,  9.02s/it]
Training 37/40:   8%|▊         | 11/143 [00:09<01:19,  1.67it/s]
Training 37/40:  15%|█▌        | 22/143 [00:09<00:30,  4.02it/s]
Training 37/40:  24%|██▍       | 35/143 [00:09<00:13,  7.81it/s]
Training 37/40:  34%|███▍      | 49/143 [00:09<00:07, 13.26it/s]
Training 37/40:  48%|████▊     | 68/143 [00:09<00:03, 23.03it/s]
Training 37/40:  62%|██████▏   | 88/143 [00:09<00:01, 35.93it/s]
Training 37/40:  78%|███████▊  | 112/143 [00:09<00:00, 55.07it/s]
Training 37/40: 100%|██████████| 143/143 [00:10<00:00, 13.85it/s]


Epoch 36: train_loss=0.0645, val_loss=0.1122



Training 38/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 38/40:   1%|          | 1/143 [00:09<23:16,  9.84s/it]
Training 38/40:   9%|▉         | 13/143 [00:09<01:11,  1.82it/s]
Training 38/40:  17%|█▋        | 24/143 [00:10<00:29,  3.98it/s]
Training 38/40:  26%|██▌       | 37/143 [00:10<00:14,  7.47it/s]
Training 38/40:  40%|███▉      | 57/143 [00:10<00:05, 14.79it/s]
Training 38/40:  52%|█████▏    | 75/143 [00:10<00:02, 23.31it/s]
Training 38/40:  70%|██████▉   | 100/143 [00:10<00:01, 38.77it/s]
Training 38/40: 100%|██████████| 143/143 [00:11<00:00, 12.95it/s]


Epoch 37: train_loss=0.0602, val_loss=0.1086



Training 39/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 39/40:   1%|          | 1/143 [00:08<21:14,  8.98s/it]
Training 39/40:   8%|▊         | 11/143 [00:09<01:18,  1.67it/s]
Training 39/40:  16%|█▌        | 23/143 [00:09<00:28,  4.26it/s]
Training 39/40: 100%|██████████| 143/143 [00:09<00:00, 14.64it/s]


Epoch 38: train_loss=0.0604, val_loss=0.1139



Training 40/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 40/40:   1%|          | 1/143 [00:09<21:50,  9.23s/it]
Training 40/40:   8%|▊         | 12/143 [00:09<01:13,  1.78it/s]
Training 40/40:  16%|█▌        | 23/143 [00:09<00:29,  4.08it/s]
Training 40/40:  25%|██▌       | 36/143 [00:09<00:13,  7.80it/s]
Training 40/40:  39%|███▉      | 56/143 [00:09<00:05, 15.54it/s]
Training 40/40:  52%|█████▏    | 74/143 [00:09<00:02, 24.49it/s]
Training 40/40:  66%|██████▋   | 95/143 [00:09<00:01, 37.82it/s]
Training 40/40: 100%|██████████| 143/143 [00:10<00:00, 13.71it/s]


Epoch 39: train_loss=0.0574, val_loss=0.1128


2025-06-02 20:37:52,836 - __main__ - INFO - Saved mlp probe for layer 11 to cache\probes\phase1_supervisedVIT_viewpoint_probing\mlp_layer_11_probe.pth
100%|██████████| 6/6 [3:12:04<00:00, 1920.74s/it]


In [8]:
logger.info("Saving results...")
result_path = experiment.save_results(results)

2025-06-02 20:38:12,343 - __main__ - INFO - Saving results...
2025-06-02 20:38:12,358 - __main__ - INFO - Results saved to results\phase1_supervisedVIT_viewpoint_probing\results.json


In [9]:
from src.analysis.layer_analysis import analyze_experiment_results

logger.info("Creating analysis and visualizations...")
analyze_experiment_results(result_path, output_dir=result_path.parent)

logger.info("Results analyzed! Please see the results and analysis_results folders for the outcomes.")

2025-06-02 20:38:12,370 - __main__ - INFO - Creating analysis and visualizations...
2025-06-02 20:38:14,861 - src.analysis.layer_analysis - INFO - Analysis report saved to results\phase1_supervisedVIT_viewpoint_probing\layer_analysis_report.json
2025-06-02 20:38:14,862 - __main__ - INFO - Results analyzed! Please see the results and analysis_results folders for the outcomes.
