# Probing Experiment on SSL Models

This is effectively a notebook-ized version of the old experiment runner script. It compartmentalizes everything so we don't lose state between small errors.

### Imports, Logging Setup

In [1]:
# Set environment variables before imports
import os
os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'

# Imports
import hydra
from omegaconf import DictConfig, OmegaConf
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import numpy as np
from pathlib import Path
import logging
import wandb
from typing import Dict, List, Tuple, Optional
from tqdm import tqdm


from src.models.feature_extractor import FeatureExtractor, load_feature_extractor
from src.datasets.shapenet_3dr2n2 import create_3dr2n2_dataloaders
from src.probing.probes import create_probe, ProbeTrainer
from src.probing.data_preprocessing import (
    FeatureExtractorPipeline,
    create_probing_dataloaders,
    ProbingDataset,
)
from src.probing.metrics import (
    compute_regression_metrics,
    compute_viewpoint_specific_metrics,
    MetricsTracker,
)
from src.analysis.layer_analysis import LayerWiseAnalyzer

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")

### Probing Setup
This class is the overarching "manager" that is responsible for the entire experiment. It contains all the functionalities required to:

- Create & setup dataloaders 
- Extract features from the frozen layers of the ViT models 
- Train MLP & Linear probes on those layers 
- Summarize results

In [2]:
class ProbingExperiment:
    """Orchestrates probing experiments"""

    def __init__(self, config: DictConfig):
        self.config = config
        # Determine device: prioritize models.device, then top-level device, then auto-detect
        device_to_use = config.models.get("device", config.get("device"))
        if device_to_use:
            self.device = device_to_use
        else:
            self.device = (
                "cuda"
                if torch.cuda.is_available()
                else "mps" if torch.backends.mps.is_available() else "cpu"
            )
        logger.info(f"Using device: {self.device}")

        # Initialize wandb
        if config.get("wandb", {}).get("enabled", False):
            wandb.init(
                project=config.wandb.project,
                entity=config.wandb.get("entity"),
                name=config.experiment.name,
                config=OmegaConf.to_container(config, resolve=True),
            )

        # Setup paths
        self.results_dir = Path(config.get("results_dir", "./results"))
        self.results_dir.mkdir(parents=True, exist_ok=True)
        self.cache_dir = Path(config.get("cache_dir", "./cache"))
        self.cache_dir.mkdir(parents=True, exist_ok=True)
        
        # Setup probe save directory
        self.probe_save_dir = self.cache_dir / "probes" / self.config.experiment.name
        self.probe_save_dir.mkdir(parents=True, exist_ok=True)

        # Initialize analyzer
        self.analyzer = LayerWiseAnalyzer(self.results_dir / config.experiment.name)

   
    def load_dataset(self) -> Tuple[DataLoader, DataLoader, DataLoader]:
        """Load the dataset"""
        subset_percentage = self.config.datasets.get("subset_percentage", None)
        return create_3dr2n2_dataloaders(
            self.config.datasets, subset_percentage=subset_percentage
        )

    def load_feature_extractor(self) -> FeatureExtractor:
        """Load and setup feature extractor"""
        model_config = self.config.models
        model_config.device = self.device
        model_config.cache_dir = str(self.cache_dir / "models")

        feature_extractor = load_feature_extractor(OmegaConf.to_container(model_config))
        logger.info(f"Loaded {model_config.model_name} feature extractor")
        return feature_extractor

    def extract_features_for_layer(
        self,
        feature_extractor: FeatureExtractor,
        train_loader: DataLoader,
        val_loader: DataLoader,
        test_loader: DataLoader,
        layer: int,
        feature_type: str,
        task_type: str,
    ) -> Tuple[ProbingDataset, ProbingDataset, ProbingDataset]:
        """Extract features for a specific layer"""
        pipeline = FeatureExtractorPipeline(
            feature_extractor=feature_extractor,
            device=self.device,
            batch_size=self.config.get("extraction_batch_size", 32),
            cache_dir=str(self.cache_dir / "features"),
        )

        experiment_name = f"{self.config.models.model_name}_{self.config.experiment.name}_layer_{layer}"

        return pipeline.create_probing_datasets(
            train_loader=train_loader,
            val_loader=val_loader,
            test_loader=test_loader,
            layers=[layer],
            feature_type=feature_type,
            task_type=task_type,
            experiment_name=experiment_name,
        )

    def run_probe_experiment(
        self,
        probe_type: str,
        train_loader: DataLoader,
        val_loader: DataLoader,
        test_loader: DataLoader,
        feature_dim: int,
        layer: int,
    ) -> Dict:
        """Run a single probe experiment"""

        logger.info(
            f"Running {probe_type} probe on layer {layer} (feature_dim: {feature_dim})"
        )

        # Get probe configuration
        probe_config = self.config.probing.get(probe_type, {})
        # Make a mutable copy for modification
        probe_config = OmegaConf.to_container(probe_config, resolve=True)

        # Create probe
        probe_config["input_dim"] = feature_dim
        probe_config["output_dim"] = self.config.probing.get("output_dim", 2)

        main_task_type = self.config.probing.get("task_type", "regression")
        if main_task_type == "viewpoint_regression":
            probe_config["task_type"] = "regression"
        elif main_task_type == "view_classification":
            probe_config["task_type"] = "classification"
        else:
            probe_config["task_type"] = main_task_type

        probe = create_probe(probe_config)

        # Setup trainer
        trainer = ProbeTrainer(probe, device=self.device)

        # Setup optimizer and scheduler
        training_config = probe_config.get("training", {})
        optimizer = self.create_optimizer(probe, training_config.get("optimizer", {}))
        scheduler = self.create_scheduler(
            optimizer, training_config.get("scheduler", {})
        )

        # Training parameters"results/phase1_dinov2_viewpoint_probing/results.json"
        epochs = training_config.get("epochs", 30)
        early_stopping_patience = training_config.get("early_stopping_patience", 15)

        metrics_tracker = MetricsTracker()
        trainer = ProbeTrainer(
            probe, device=self.device, MetricsTracker=metrics_tracker
        )

        # Check if wandb is enabled
        wandb_enabled = self.config.get("wandb", {}).get("enabled", False)

        best_model, best_val_loss = trainer.train(
            epochs,
            optimizer,
            scheduler,
            early_stopping_patience,
            train_loader,
            val_loader,
            probe_type=probe_type,
            layer=layer,
            wandb_enabled=wandb_enabled,
        )
        
        # Save the trained probe
        probe_save_dir = self.cache_dir / "probes" / self.config.experiment.name
        probe_save_dir.mkdir(parents=True, exist_ok=True)
        probe_filename = f"{probe_type}_layer_{layer}_probe.pth"
        probe_save_path = probe_save_dir / probe_filename
        
        torch.save({
            'model_state_dict': best_model,  # best_model is already a state_dict
            'probe_config': probe_config,
            'layer': layer,
            'probe_type': probe_type,
            'experiment_name': self.config.experiment.name,
            'model_name': self.config.models.model_name,
            'best_val_loss': best_val_loss,
            'feature_dim': feature_dim
        }, probe_save_path)
        
        logger.info(f"Saved {probe_type} probe for layer {layer} to {probe_save_path}")

        test_metrics = trainer.evaluate(test_loader)

        detailed_metrics = self.compute_detailed_metrics(probe, test_loader)

        total_epochs = len(metrics_tracker.get_history("train"))

        results = {
            "train_history": metrics_tracker.get_history("train"),
            "val_history": metrics_tracker.get_history("val"),
            "test_metrics": test_metrics,
            "detailed_metrics": detailed_metrics,
            "best_epoch": metrics_tracker.best_epoch,
            "total_epochs": total_epochs,
        }

        return results

    def save_probe(self, probe: nn.Module, probe_type: str, layer: int, probe_config: Dict):
        """Save the trained probe model and its configuration"""
        import json
        
        # Create filename with model name, probe type, and layer
        model_name = self.config.models.model_name
        filename = f"{model_name}_{probe_type}_layer_{layer}.pth"
        probe_path = self.probe_save_dir / filename
        
        # Save the probe state dict
        torch.save({
            'model_state_dict': probe.state_dict(),
            'probe_config': probe_config,
            'model_name': model_name,
            'probe_type': probe_type,
            'layer': layer,
            'experiment_name': self.config.experiment.name
        }, probe_path)
        
        # Also save the config as JSON
        config_filename = f"{model_name}_{probe_type}_layer_{layer}_config.json"
        config_path = self.probe_save_dir / config_filename
        
        with open(config_path, 'w') as f:
            json.dump({
                'probe_config': probe_config,
                'model_name': model_name,
                'probe_type': probe_type,
                'layer': layer,
                'experiment_name': self.config.experiment.name
            }, f, indent=2)
        
        logger.info(f"Probe saved to {probe_path}")
        logger.info(f"Probe config saved to {config_path}")

    def load_probe(self, probe_type: str, layer: int, device: Optional[str] = None) -> nn.Module:
        """Load a previously saved probe"""
        if device is None:
            device = self.device
            
        model_name = self.config.models.model_name
        filename = f"{model_name}_{probe_type}_layer_{layer}.pth"
        probe_path = self.probe_save_dir / filename
        
        if not probe_path.exists():
            raise FileNotFoundError(f"Probe not found at {probe_path}")
        
        # Load the saved data
        saved_data = torch.load(probe_path, map_location=device)
        
        # Recreate the probe using the saved config
        probe_config = saved_data['probe_config']
        probe = create_probe(probe_config)
        
        # Load the state dict
        probe.load_state_dict(saved_data['model_state_dict'])
        probe.to(device)
        
        logger.info(f"Probe loaded from {probe_path}")
        return probe

    def create_optimizer(
        self, model: nn.Module, optimizer_config: Dict
    ) -> torch.optim.Optimizer:
        """Create optimizer from config using Hydra instantiate"""
        from hydra.utils import instantiate

        # Create a copy of config and add model parameters
        optimizer_config = optimizer_config.copy()
        optimizer_config["params"] = model.parameters()

        return instantiate(optimizer_config)

    def create_scheduler(
        self, optimizer: torch.optim.Optimizer, scheduler_config: Dict
    ):
        """Create learning rate scheduler from config using Hydra instantiate"""
        if not scheduler_config:
            return None

        from hydra.utils import instantiate

        scheduler_config = scheduler_config.copy()
        scheduler_config["optimizer"] = optimizer

        return instantiate(scheduler_config)

    def compute_detailed_metrics(
        self, probe: nn.Module, test_loader: DataLoader
    ) -> Dict:
        """Compute alles metrics"""
        probe.eval()

        all_predictions = []
        all_targets = []
        all_categories = []

        with torch.no_grad():
            for batch in test_loader:
                features = batch["features"].to(self.device)
                targets = batch["targets"]

                outputs = probe(features)

                all_predictions.append(outputs.cpu())
                all_targets.append(targets)

                # Get categories if available
                if "categories" in batch:
                    all_categories.extend(batch["categories"])

        predictions = torch.cat(all_predictions, dim=0)
        targets = torch.cat(all_targets, dim=0)

        # Basic regression metrics
        metrics = compute_regression_metrics(predictions, targets, return_per_dim=True)

        # Viewpoint-specific metrics
        if predictions.shape[1] == 2:
            viewpoint_metrics = compute_viewpoint_specific_metrics(
                azimuth_pred=predictions[:, 0],
                elevation_pred=predictions[:, 1],
                azimuth_target=targets[:, 0],
                elevation_target=targets[:, 1],
            )
            metrics.update(viewpoint_metrics)

        return metrics

    def save_results(self, results: Dict) -> str:
        """Save results to disk"""
        import json

        # Create experiment directory
        exp_dir = self.results_dir / self.config.experiment.name
        exp_dir.mkdir(parents=True, exist_ok=True)

        # Save results
        results_file = exp_dir / "results.json"

        # Convert tensors to lists for JSON serialization
        serializable_results = self.make_json_serializable(results)

        combined_results = {
            "config": OmegaConf.to_container(self.config, resolve=True),
            "results": serializable_results,
        }

        with open(results_file, "w") as f:
            json.dump(combined_results, f, indent=2)

        logger.info(f"Results saved to {results_file}")
        return results_file

    def make_json_serializable(self, obj):
        """Convert object to JSON-serializable format"""
        if isinstance(obj, dict):
            return {k: self.make_json_serializable(v) for k, v in obj.items()}
        elif isinstance(obj, list):
            return [self.make_json_serializable(v) for v in obj]
        elif isinstance(obj, (torch.Tensor, np.ndarray)):
            return obj.tolist() if hasattr(obj, "tolist") else float(obj)
        elif isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        else:
            return obj



### Hydra Configuration Loading / Setup

In [3]:
from hydra import initialize, compose
from hydra.core.global_hydra import GlobalHydra
import os 
from pathlib import Path #

CONFIG_PATH = "../configs"
CONFIG_NAME = "experiment_config"

cfg: Optional[DictConfig] = None

if GlobalHydra.instance().is_initialized():
    logger.info("Clearing existing Hydra global state.")
    GlobalHydra.instance().clear()

try:
    project_root = Path(os.getcwd()).parent 
    data_dir_abs = project_root / "data"
    
    os.environ["DATA_DIR"] = str(data_dir_abs)

    logger.info(f"Initializing Hydra with config_path: '{CONFIG_PATH}'")
    
    initialize(version_base=None, config_path=CONFIG_PATH)
    
    logger.info(f"Composing configuration with config_name: '{CONFIG_NAME}'")
    
    cfg = compose(config_name=CONFIG_NAME)

except Exception as e:
    logger.error(f"Error initializing Hydra or loading configuration: {e}", exc_info=True)

if cfg:
    logger.info("Hydra configuration loaded successfully.")


2025-06-02 06:58:58,980 - __main__ - INFO - Initializing Hydra with config_path: '../configs'
2025-06-02 06:58:59,448 - __main__ - INFO - Composing configuration with config_name: 'experiment_config'
2025-06-02 06:58:59,523 - __main__ - INFO - Hydra configuration loaded successfully.


## Running the Experiment
The following code uses the above configurations and utility functions to run the actual experiment.

In [4]:
results = None
logger.info("Starting experiment execution")
experiment = ProbingExperiment(cfg)
    

2025-06-02 06:58:59,535 - __main__ - INFO - Starting experiment execution
2025-06-02 06:58:59,536 - __main__ - INFO - Using device: cuda
2025-06-02 06:58:59,817 - wandb.jupyter - ERROR - Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
wandb: Currently logged in as: rsen0811 (cse493g1_drn). Use `wandb login --relogin` to force relogin


### Load the Feature Extractor & Dataset

In [5]:
feature_extractor = experiment.load_feature_extractor()
extraction_config = cfg.models.get("feature_extraction", {})
layers = extraction_config.get("layers", [11])
feature_type = extraction_config.get("feature_type", "cls_token")
task_type = cfg.probing.get("task_type", "viewpoint_regression")

2025-06-02 06:59:07,907 - timm.models._builder - INFO - Loading pretrained weights from url (https://dl.fbaipublicfiles.com/ijepa/IN1K-vit.h.14-300e.pth.tar)
2025-06-02 06:59:13,976 - src.models.feature_extractor - INFO - Loaded ijepa model on cuda
2025-06-02 06:59:13,976 - __main__ - INFO - Loaded ijepa feature extractor


In [6]:
train_loader, val_loader, test_loader = experiment.load_dataset()

100%|██████████| 30648/30648 [01:07<00:00, 455.75it/s]


Using 5.00% of train data: 36777 samples.


100%|██████████| 6567/6567 [00:12<00:00, 509.57it/s]


Using 5.00% of val data: 7880 samples.


100%|██████████| 6569/6569 [00:11<00:00, 596.91it/s]


Using 5.00% of test data: 7882 samples.


### Train the Probes

In [7]:
results = {}
for layer in tqdm(layers):
    logger.info(f"Processing layer {layer}...")

    # Extract features for this layer
    train_dataset, val_dataset, test_dataset = experiment.extract_features_for_layer(
        feature_extractor,
        train_loader,
        val_loader,
        test_loader,
        layer,
        feature_type,
        task_type,
    )

    # Create probing dataloaders
    probe_train_loader, probe_val_loader, probe_test_loader = (
       create_probing_dataloaders(
            train_dataset,
            val_dataset,
            test_dataset,
            batch_size=cfg.probing.get("training", {}).get(
                "batch_size", 64
            ),
            num_workers=cfg.get("num_workers", 4),
        )
    )

    # Run probing experiments for each probe type
    layer_results = {}
    for probe_type in cfg.probing.probe_types:
        logger.info(f"Running {probe_type} probe on layer {layer}...")
        probe_results = experiment.run_probe_experiment(
            probe_type,
            probe_train_loader,
            probe_val_loader,
            probe_test_loader,
            train_dataset.features.shape[1],
            layer,
        )
        layer_results[probe_type] = probe_results

    results[f"layer_{layer}"] = layer_results

  0%|          | 0/6 [00:00<?, ?it/s]2025-06-02 07:00:45,539 - __main__ - INFO - Processing layer 2...
2025-06-02 07:00:45,541 - src.probing.data_preprocessing - INFO - Extracting features from 1149 batches...


Extracting features:   0%|          | 1/1149 [00:09<2:54:47,  9.14s/it]
Extracting features:   0%|          | 2/1149 [00:09<1:21:00,  4.24s/it]
Extracting features:   0%|          | 3/1149 [00:10<50:57,  2.67s/it]  
Extracting features:   0%|          | 4/1149 [00:11<36:43,  1.92s/it]
Extracting features:   0%|          | 5/1149 [00:12<28:52,  1.51s/it]
Extracting features:   1%|          | 6/1149 [00:13<24:06,  1.27s/it]
Extracting features:   1%|          | 7/1149 [00:13<21:03,  1.11s/it]
Extracting features:   1%|          | 8/1149 [00:14<19:04,  1.00s/it]
Extracting features:   1%|          | 9/1149 [00:15<17:43,  1.07it/s]
Extracting features:   1%|          | 10/1149 [00:16<16:49,  1.13it/s]
Extracting features:   1%|          | 11/1149 [00:17<16:12,  1.17it/s]
Extracting

Epoch 0: train_loss=0.2340, val_loss=0.1821



Training 2/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/30:   1%|          | 1/143 [00:09<21:57,  9.28s/it]
Training 2/30:  10%|█         | 15/143 [00:09<00:57,  2.23it/s]
Training 2/30:  21%|██        | 30/143 [00:09<00:21,  5.35it/s]
Training 2/30:  36%|███▋      | 52/143 [00:09<00:07, 11.65it/s]
Training 2/30:  52%|█████▏    | 74/143 [00:09<00:03, 20.02it/s]
Training 2/30: 100%|██████████| 143/143 [00:10<00:00, 13.94it/s]


Epoch 1: train_loss=0.1677, val_loss=0.1688



Training 3/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/30:   1%|          | 1/143 [00:09<21:45,  9.20s/it]
Training 3/30:  11%|█         | 16/143 [00:09<00:52,  2.40it/s]
Training 3/30:  24%|██▍       | 34/143 [00:09<00:17,  6.19it/s]
Training 3/30:  35%|███▍      | 50/143 [00:09<00:08, 10.74it/s]
Training 3/30:  60%|██████    | 86/143 [00:09<00:02, 24.86it/s]
Training 3/30: 100%|██████████| 143/143 [00:10<00:00, 14.04it/s]


Epoch 2: train_loss=0.1683, val_loss=0.1650



Training 4/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/30:   1%|          | 1/143 [00:09<22:25,  9.47s/it]
Training 4/30:   8%|▊         | 11/143 [00:09<01:23,  1.59it/s]
Training 4/30:  18%|█▊        | 26/143 [00:09<00:25,  4.67it/s]
Training 4/30:  35%|███▍      | 50/143 [00:09<00:08, 11.45it/s]
Training 4/30:  52%|█████▏    | 74/143 [00:09<00:03, 20.48it/s]
Training 4/30: 100%|██████████| 143/143 [00:10<00:00, 13.70it/s]


Epoch 3: train_loss=0.1611, val_loss=0.1581



Training 5/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/30:   1%|          | 1/143 [00:09<23:29,  9.93s/it]
Training 5/30:   9%|▉         | 13/143 [00:10<01:12,  1.80it/s]
Training 5/30:  26%|██▌       | 37/143 [00:10<00:16,  6.53it/s]
Training 5/30:  41%|████      | 58/143 [00:10<00:07, 12.13it/s]
Training 5/30:  62%|██████▏   | 88/143 [00:10<00:02, 23.00it/s]
Training 5/30: 100%|██████████| 143/143 [00:10<00:00, 13.11it/s]


Epoch 4: train_loss=0.1623, val_loss=0.1566



Training 6/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/30:   1%|          | 1/143 [00:09<22:47,  9.63s/it]
Training 6/30:   8%|▊         | 11/143 [00:09<01:24,  1.56it/s]
Training 6/30:  22%|██▏       | 31/143 [00:09<00:19,  5.62it/s]
Training 6/30:  38%|███▊      | 54/143 [00:09<00:07, 11.98it/s]
Training 6/30:  59%|█████▊    | 84/143 [00:10<00:02, 23.17it/s]
Training 6/30: 100%|██████████| 143/143 [00:10<00:00, 13.45it/s]


Epoch 5: train_loss=0.1617, val_loss=0.1582



Training 7/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/30:   1%|          | 1/143 [00:09<22:25,  9.48s/it]
Training 7/30:  11%|█         | 16/143 [00:09<00:54,  2.33it/s]
Training 7/30:  22%|██▏       | 32/143 [00:09<00:19,  5.60it/s]
Training 7/30:  43%|████▎     | 62/143 [00:09<00:05, 14.06it/s]
Training 7/30:  62%|██████▏   | 88/143 [00:09<00:02, 23.75it/s]
Training 7/30: 100%|██████████| 143/143 [00:10<00:00, 13.70it/s]


Epoch 6: train_loss=0.1609, val_loss=0.1549



Training 8/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/30:   1%|          | 1/143 [00:09<22:25,  9.48s/it]
Training 8/30:  13%|█▎        | 19/143 [00:09<00:44,  2.78it/s]
Training 8/30:  26%|██▌       | 37/143 [00:09<00:16,  6.46it/s]
Training 8/30:  43%|████▎     | 61/143 [00:09<00:06, 13.18it/s]
Training 8/30:  62%|██████▏   | 89/143 [00:09<00:02, 23.70it/s]
Training 8/30: 100%|██████████| 143/143 [00:10<00:00, 13.66it/s]


Epoch 7: train_loss=0.1596, val_loss=0.1651



Training 9/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/30:   1%|          | 1/143 [00:09<22:58,  9.71s/it]
Training 9/30:  11%|█         | 16/143 [00:09<00:55,  2.28it/s]
Training 9/30:  24%|██▍       | 34/143 [00:09<00:18,  5.88it/s]
Training 9/30:  42%|████▏     | 60/143 [00:10<00:06, 13.03it/s]
Training 9/30:  60%|██████    | 86/143 [00:10<00:02, 22.55it/s]
Training 9/30: 100%|██████████| 143/143 [00:10<00:00, 13.44it/s]


Epoch 8: train_loss=0.1598, val_loss=0.1539



Training 10/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/30:   1%|          | 1/143 [00:09<22:52,  9.66s/it]
Training 10/30:  10%|█         | 15/143 [00:09<00:59,  2.14it/s]
Training 10/30:  22%|██▏       | 31/143 [00:09<00:20,  5.36it/s]
Training 10/30:  39%|███▉      | 56/143 [00:09<00:07, 12.26it/s]
Training 10/30:  63%|██████▎   | 90/143 [00:10<00:02, 24.95it/s]
Training 10/30: 100%|██████████| 143/143 [00:10<00:00, 13.47it/s]


Epoch 9: train_loss=0.1572, val_loss=0.1543



Training 11/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/30:   1%|          | 1/143 [00:09<22:21,  9.45s/it]
Training 11/30:   9%|▉         | 13/143 [00:09<01:08,  1.89it/s]
Training 11/30:  19%|█▉        | 27/143 [00:09<00:24,  4.77it/s]
Training 11/30:  34%|███▍      | 49/143 [00:09<00:08, 10.97it/s]
Training 11/30:  51%|█████     | 73/143 [00:09<00:03, 20.02it/s]
Training 11/30: 100%|██████████| 143/143 [00:10<00:00, 13.72it/s]


Epoch 10: train_loss=0.1570, val_loss=0.1533



Training 12/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/30:   1%|          | 1/143 [00:09<22:26,  9.49s/it]
Training 12/30:  10%|█         | 15/143 [00:09<00:58,  2.18it/s]
Training 12/30:  21%|██        | 30/143 [00:09<00:21,  5.24it/s]
Training 12/30:  36%|███▋      | 52/143 [00:09<00:07, 11.42it/s]
Training 12/30:  57%|█████▋    | 82/143 [00:09<00:02, 22.78it/s]
Training 12/30:  73%|███████▎  | 104/143 [00:09<00:01, 33.26it/s]
Training 12/30: 100%|██████████| 143/143 [00:10<00:00, 13.62it/s]


Epoch 11: train_loss=0.1596, val_loss=0.1547



Training 13/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/30:   1%|          | 1/143 [00:09<22:40,  9.58s/it]
Training 13/30:  10%|█         | 15/143 [00:09<00:59,  2.16it/s]
Training 13/30:  24%|██▍       | 35/143 [00:09<00:17,  6.21it/s]
Training 13/30:  41%|████▏     | 59/143 [00:09<00:06, 12.87it/s]
Training 13/30:  66%|██████▋   | 95/143 [00:09<00:01, 26.43it/s]
Training 13/30: 100%|██████████| 143/143 [00:10<00:00, 13.62it/s]


Epoch 12: train_loss=0.1549, val_loss=0.1542



Training 14/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/30:   1%|          | 1/143 [00:09<21:52,  9.25s/it]
Training 14/30:  11%|█         | 16/143 [00:09<00:53,  2.39it/s]
Training 14/30:  24%|██▍       | 34/143 [00:09<00:17,  6.16it/s]
Training 14/30:  41%|████      | 58/143 [00:09<00:06, 13.05it/s]
Training 14/30:  68%|██████▊   | 97/143 [00:09<00:01, 28.26it/s]
Training 14/30: 100%|██████████| 143/143 [00:10<00:00, 14.03it/s]


Epoch 13: train_loss=0.1571, val_loss=0.1564



Training 15/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/30:   1%|          | 1/143 [00:09<21:58,  9.29s/it]
Training 15/30:  12%|█▏        | 17/143 [00:09<00:49,  2.53it/s]
Training 15/30:  26%|██▌       | 37/143 [00:09<00:15,  6.70it/s]
Training 15/30:  40%|███▉      | 57/143 [00:09<00:06, 12.37it/s]
Training 15/30:  64%|██████▍   | 92/143 [00:09<00:01, 25.93it/s]
Training 15/30: 100%|██████████| 143/143 [00:10<00:00, 13.97it/s]


Epoch 14: train_loss=0.1543, val_loss=0.1510



Training 16/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/30:   1%|          | 1/143 [00:09<22:51,  9.66s/it]
Training 16/30:  12%|█▏        | 17/143 [00:09<00:51,  2.43it/s]
Training 16/30:  24%|██▍       | 34/143 [00:09<00:18,  5.85it/s]
Training 16/30:  37%|███▋      | 53/143 [00:09<00:08, 11.05it/s]
Training 16/30:  54%|█████▍    | 77/143 [00:10<00:03, 19.90it/s]
Training 16/30: 100%|██████████| 143/143 [00:10<00:00, 13.42it/s]


Epoch 15: train_loss=0.1551, val_loss=0.1549



Training 17/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/30:   1%|          | 1/143 [00:09<21:57,  9.28s/it]
Training 17/30:   8%|▊         | 11/143 [00:09<01:21,  1.62it/s]
Training 17/30:  25%|██▌       | 36/143 [00:09<00:15,  6.88it/s]
Training 17/30:  41%|████      | 58/143 [00:09<00:06, 13.10it/s]
Training 17/30:  64%|██████▎   | 91/143 [00:09<00:02, 25.82it/s]
Training 17/30: 100%|██████████| 143/143 [00:10<00:00, 13.96it/s]


Epoch 16: train_loss=0.1545, val_loss=0.1531



Training 18/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/30:   1%|          | 1/143 [00:09<21:42,  9.17s/it]
Training 18/30:   9%|▉         | 13/143 [00:09<01:06,  1.95it/s]
Training 18/30:  21%|██        | 30/143 [00:09<00:20,  5.55it/s]
Training 18/30:  37%|███▋      | 53/143 [00:09<00:07, 12.20it/s]
Training 18/30:  55%|█████▌    | 79/143 [00:09<00:02, 22.23it/s]
Training 18/30: 100%|██████████| 143/143 [00:10<00:00, 14.11it/s]


Epoch 17: train_loss=0.1536, val_loss=0.1561



Training 19/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/30:   1%|          | 1/143 [00:09<21:51,  9.24s/it]
Training 19/30:  10%|▉         | 14/143 [00:09<01:01,  2.09it/s]
Training 19/30:  24%|██▍       | 34/143 [00:09<00:17,  6.29it/s]
Training 19/30:  39%|███▉      | 56/143 [00:09<00:06, 12.60it/s]
Training 19/30:  60%|██████    | 86/143 [00:09<00:02, 24.18it/s]
Training 19/30: 100%|██████████| 143/143 [00:10<00:00, 13.99it/s]


Epoch 18: train_loss=0.1592, val_loss=0.1496



Training 20/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/30:   1%|          | 1/143 [00:09<21:38,  9.14s/it]
Training 20/30:  11%|█         | 16/143 [00:09<00:52,  2.41it/s]
Training 20/30:  27%|██▋       | 39/143 [00:09<00:14,  7.31it/s]
Training 20/30:  44%|████▍     | 63/143 [00:09<00:05, 14.24it/s]
Training 20/30:  58%|█████▊    | 83/143 [00:09<00:02, 21.79it/s]
Training 20/30: 100%|██████████| 143/143 [00:10<00:00, 14.16it/s]


Epoch 19: train_loss=0.1561, val_loss=0.1492



Training 21/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/30:   1%|          | 1/143 [00:09<21:44,  9.19s/it]
Training 21/30:  12%|█▏        | 17/143 [00:09<00:49,  2.56it/s]
Training 21/30:  22%|██▏       | 32/143 [00:09<00:19,  5.71it/s]
Training 21/30:  44%|████▍     | 63/143 [00:09<00:05, 14.73it/s]
Training 21/30:  61%|██████    | 87/143 [00:09<00:02, 23.87it/s]
Training 21/30: 100%|██████████| 143/143 [00:10<00:00, 14.11it/s]


Epoch 20: train_loss=0.1516, val_loss=0.1502



Training 22/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/30:   1%|          | 1/143 [00:09<21:48,  9.22s/it]
Training 22/30:  10%|█         | 15/143 [00:09<00:57,  2.24it/s]
Training 22/30:  24%|██▍       | 34/143 [00:09<00:17,  6.24it/s]
Training 22/30:  40%|███▉      | 57/143 [00:09<00:06, 12.83it/s]
Training 22/30:  57%|█████▋    | 81/143 [00:09<00:02, 22.02it/s]
Training 22/30: 100%|██████████| 143/143 [00:10<00:00, 14.02it/s]


Epoch 21: train_loss=0.1558, val_loss=0.1489



Training 23/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/30:   1%|          | 1/143 [00:09<21:28,  9.08s/it]
Training 23/30:  15%|█▌        | 22/143 [00:09<00:35,  3.36it/s]
Training 23/30:  25%|██▌       | 36/143 [00:09<00:16,  6.32it/s]
Training 23/30:  39%|███▉      | 56/143 [00:09<00:07, 12.11it/s]
Training 23/30:  56%|█████▌    | 80/143 [00:09<00:02, 21.45it/s]
Training 23/30: 100%|██████████| 143/143 [00:10<00:00, 14.24it/s]


Epoch 22: train_loss=0.1517, val_loss=0.1549



Training 24/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/30:   1%|          | 1/143 [00:09<21:27,  9.06s/it]
Training 24/30:  10%|█         | 15/143 [00:09<00:56,  2.28it/s]
Training 24/30:  22%|██▏       | 32/143 [00:09<00:18,  5.91it/s]
Training 24/30:  34%|███▎      | 48/143 [00:09<00:09, 10.54it/s]
Training 24/30:  52%|█████▏    | 75/143 [00:09<00:03, 21.19it/s]
Training 24/30:  69%|██████▊   | 98/143 [00:09<00:01, 32.73it/s]
Training 24/30: 100%|██████████| 143/143 [00:10<00:00, 14.16it/s]


Epoch 23: train_loss=0.1510, val_loss=0.1489



Training 25/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/30:   1%|          | 1/143 [00:09<22:19,  9.43s/it]
Training 25/30:  10%|█         | 15/143 [00:09<00:58,  2.19it/s]
Training 25/30:  20%|██        | 29/143 [00:09<00:22,  5.06it/s]
Training 25/30:  35%|███▍      | 50/143 [00:09<00:08, 10.99it/s]
Training 25/30:  50%|█████     | 72/143 [00:09<00:03, 19.25it/s]
Training 25/30: 100%|██████████| 143/143 [00:10<00:00, 13.68it/s]


Epoch 24: train_loss=0.1546, val_loss=0.1483



Training 26/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/30:   1%|          | 1/143 [00:09<21:35,  9.12s/it]
Training 26/30:   9%|▉         | 13/143 [00:09<01:06,  1.96it/s]
Training 26/30:  24%|██▍       | 34/143 [00:09<00:16,  6.44it/s]
Training 26/30:  36%|███▌      | 51/143 [00:09<00:08, 11.31it/s]
Training 26/30:  53%|█████▎    | 76/143 [00:09<00:03, 21.07it/s]
Training 26/30:  71%|███████   | 101/143 [00:09<00:01, 33.68it/s]
Training 26/30: 100%|██████████| 143/143 [00:10<00:00, 14.13it/s]


Epoch 25: train_loss=0.1509, val_loss=0.1497



Training 27/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/30:   1%|          | 1/143 [00:09<22:47,  9.63s/it]
Training 27/30:  11%|█         | 16/143 [00:09<00:55,  2.29it/s]
Training 27/30:  24%|██▍       | 34/143 [00:09<00:18,  5.92it/s]
Training 27/30:  43%|████▎     | 61/143 [00:09<00:06, 13.41it/s]
Training 27/30:  65%|██████▌   | 93/143 [00:10<00:01, 25.32it/s]
Training 27/30: 100%|██████████| 143/143 [00:10<00:00, 13.54it/s]


Epoch 26: train_loss=0.1502, val_loss=0.1478



Training 28/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/30:   1%|          | 1/143 [00:09<22:22,  9.46s/it]
Training 28/30:  13%|█▎        | 18/143 [00:09<00:47,  2.63it/s]
Training 28/30:  25%|██▌       | 36/143 [00:09<00:16,  6.32it/s]
Training 28/30:  43%|████▎     | 62/143 [00:09<00:05, 13.63it/s]
Training 28/30:  61%|██████    | 87/143 [00:09<00:02, 22.98it/s]
Training 28/30: 100%|██████████| 143/143 [00:10<00:00, 13.76it/s]


Epoch 27: train_loss=0.1524, val_loss=0.1540



Training 29/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/30:   1%|          | 1/143 [00:09<21:56,  9.27s/it]
Training 29/30:  11%|█         | 16/143 [00:09<00:53,  2.38it/s]
Training 29/30:  28%|██▊       | 40/143 [00:09<00:13,  7.41it/s]
Training 29/30:  42%|████▏     | 60/143 [00:09<00:06, 13.06it/s]
Training 29/30:  65%|██████▌   | 93/143 [00:09<00:01, 25.79it/s]
Training 29/30: 100%|██████████| 143/143 [00:10<00:00, 14.00it/s]


Epoch 28: train_loss=0.1522, val_loss=0.1563



Training 30/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/30:   1%|          | 1/143 [00:09<22:27,  9.49s/it]
Training 30/30:  12%|█▏        | 17/143 [00:09<00:50,  2.48it/s]
Training 30/30:  24%|██▍       | 35/143 [00:09<00:17,  6.16it/s]
Training 30/30:  38%|███▊      | 54/143 [00:09<00:07, 11.44it/s]
Training 30/30:  58%|█████▊    | 83/143 [00:09<00:02, 22.42it/s]
Training 30/30: 100%|██████████| 143/143 [00:10<00:00, 13.65it/s]


Epoch 29: train_loss=0.1523, val_loss=0.1483


2025-06-02 07:32:07,156 - __main__ - INFO - Saved linear probe for layer 2 to cache\probes\phase1_ijepa_viewpoint_probing\linear_layer_2_probe.pth
2025-06-02 07:32:26,699 - __main__ - INFO - Running mlp probe on layer 2...
2025-06-02 07:32:26,700 - __main__ - INFO - Running mlp probe on layer 2 (feature_dim: 1280)

Training 1/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 1/40:   1%|          | 1/143 [00:09<21:47,  9.21s/it]
Training 1/40:   8%|▊         | 11/143 [00:09<01:20,  1.63it/s]
Training 1/40:  14%|█▍        | 20/143 [00:09<00:35,  3.51it/s]
Training 1/40:  22%|██▏       | 32/143 [00:09<00:15,  6.94it/s]
Training 1/40: 100%|██████████| 143/143 [00:10<00:00, 14.22it/s]


Epoch 0: train_loss=0.3491, val_loss=0.1651



Training 2/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/40:   1%|          | 1/143 [00:09<21:37,  9.14s/it]
Training 2/40:   7%|▋         | 10/143 [00:09<01:29,  1.49it/s]
Training 2/40:  15%|█▌        | 22/143 [00:09<00:29,  4.04it/s]
Training 2/40:  25%|██▌       | 36/143 [00:09<00:13,  8.07it/s]
Training 2/40:  37%|███▋      | 53/143 [00:09<00:06, 14.65it/s]
Training 2/40:  51%|█████     | 73/143 [00:09<00:02, 24.78it/s]
Training 2/40:  70%|██████▉   | 100/143 [00:09<00:01, 42.56it/s]
Training 2/40: 100%|██████████| 143/143 [00:10<00:00, 13.88it/s]


Epoch 1: train_loss=0.1635, val_loss=0.1616



Training 3/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/40:   1%|          | 1/143 [00:09<21:42,  9.17s/it]
Training 3/40:   6%|▋         | 9/143 [00:09<01:40,  1.33it/s]
Training 3/40:  18%|█▊        | 26/143 [00:09<00:23,  4.95it/s]
Training 3/40: 100%|██████████| 143/143 [00:09<00:00, 14.44it/s]


Epoch 2: train_loss=0.1629, val_loss=0.1612



Training 4/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/40:   1%|          | 1/143 [00:09<21:37,  9.14s/it]
Training 4/40:   6%|▋         | 9/143 [00:09<01:40,  1.34it/s]
Training 4/40:  13%|█▎        | 19/143 [00:09<00:35,  3.46it/s]
Training 4/40:  22%|██▏       | 31/143 [00:09<00:16,  6.93it/s]
Training 4/40: 100%|██████████| 143/143 [00:09<00:00, 14.31it/s]


Epoch 3: train_loss=0.1628, val_loss=0.1611



Training 5/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/40:   1%|          | 1/143 [00:09<21:44,  9.19s/it]
Training 5/40:   6%|▌         | 8/143 [00:09<01:54,  1.18it/s]
Training 5/40:  13%|█▎        | 18/143 [00:09<00:37,  3.29it/s]
Training 5/40:  29%|██▊       | 41/143 [00:09<00:10, 10.01it/s]
Training 5/40: 100%|██████████| 143/143 [00:10<00:00, 14.28it/s]


Epoch 4: train_loss=0.1626, val_loss=0.1609



Training 6/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/40:   1%|          | 1/143 [00:09<21:49,  9.22s/it]
Training 6/40:   8%|▊         | 11/143 [00:09<01:21,  1.63it/s]
Training 6/40:  14%|█▍        | 20/143 [00:09<00:35,  3.51it/s]
Training 6/40:  24%|██▍       | 35/143 [00:09<00:13,  7.82it/s]
Training 6/40: 100%|██████████| 143/143 [00:10<00:00, 14.14it/s]


Epoch 5: train_loss=0.1628, val_loss=0.1607



Training 7/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/40:   1%|          | 1/143 [00:09<21:51,  9.24s/it]
Training 7/40:   7%|▋         | 10/143 [00:09<01:30,  1.48it/s]
Training 7/40:  15%|█▍        | 21/143 [00:09<00:32,  3.78it/s]
Training 7/40:  23%|██▎       | 33/143 [00:09<00:15,  7.19it/s]
Training 7/40:  34%|███▍      | 49/143 [00:09<00:07, 13.34it/s]
Training 7/40:  50%|█████     | 72/143 [00:09<00:02, 25.09it/s]
Training 7/40:  68%|██████▊   | 97/143 [00:09<00:01, 41.26it/s]
Training 7/40: 100%|██████████| 143/143 [00:10<00:00, 13.75it/s]


Epoch 6: train_loss=0.1627, val_loss=0.1618



Training 8/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/40:   1%|          | 1/143 [00:09<21:47,  9.21s/it]
Training 8/40:   7%|▋         | 10/143 [00:09<01:29,  1.48it/s]
Training 8/40:  15%|█▍        | 21/143 [00:09<00:32,  3.79it/s]
Training 8/40:  24%|██▍       | 34/143 [00:09<00:14,  7.52it/s]
Training 8/40:  35%|███▍      | 50/143 [00:09<00:06, 13.67it/s]
Training 8/40:  50%|████▉     | 71/143 [00:09<00:02, 24.34it/s]
Training 8/40:  64%|██████▎   | 91/143 [00:09<00:01, 36.96it/s]
Training 8/40: 100%|██████████| 143/143 [00:10<00:00, 13.74it/s]


Epoch 7: train_loss=0.1629, val_loss=0.1606



Training 9/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/40:   1%|          | 1/143 [00:09<22:29,  9.50s/it]
Training 9/40:   6%|▋         | 9/143 [00:09<01:44,  1.29it/s]
Training 9/40:  14%|█▍        | 20/143 [00:09<00:34,  3.53it/s]
Training 9/40:  24%|██▍       | 35/143 [00:09<00:13,  7.74it/s]
Training 9/40: 100%|██████████| 143/143 [00:10<00:00, 13.84it/s]


Epoch 8: train_loss=0.1622, val_loss=0.1605



Training 10/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/40:   1%|          | 1/143 [00:09<21:46,  9.20s/it]
Training 10/40:   6%|▋         | 9/143 [00:09<01:40,  1.33it/s]
Training 10/40:  15%|█▍        | 21/143 [00:09<00:31,  3.86it/s]
Training 10/40:  23%|██▎       | 33/143 [00:09<00:15,  7.29it/s]
Training 10/40:  39%|███▉      | 56/143 [00:09<00:05, 16.28it/s]
Training 10/40:  52%|█████▏    | 75/143 [00:09<00:02, 25.74it/s]
Training 10/40:  71%|███████   | 101/143 [00:09<00:00, 42.56it/s]
Training 10/40: 100%|██████████| 143/143 [00:10<00:00, 13.84it/s]


Epoch 9: train_loss=0.1628, val_loss=0.1606



Training 11/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/40:   1%|          | 1/143 [00:09<21:47,  9.21s/it]
Training 11/40:   6%|▋         | 9/143 [00:09<01:40,  1.33it/s]
Training 11/40:  14%|█▍        | 20/143 [00:09<00:33,  3.64it/s]
Training 11/40:  23%|██▎       | 33/143 [00:09<00:14,  7.37it/s]
Training 11/40:  34%|███▍      | 49/143 [00:09<00:06, 13.52it/s]
Training 11/40:  49%|████▉     | 70/143 [00:09<00:03, 24.21it/s]
Training 11/40:  63%|██████▎   | 90/143 [00:09<00:01, 36.84it/s]
Training 11/40: 100%|██████████| 143/143 [00:10<00:00, 13.75it/s]


Epoch 10: train_loss=0.1617, val_loss=0.1589



Training 12/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/40:   1%|          | 1/143 [00:09<21:28,  9.07s/it]
Training 12/40:   7%|▋         | 10/143 [00:09<01:28,  1.50it/s]
Training 12/40:  15%|█▌        | 22/143 [00:09<00:29,  4.06it/s]
Training 12/40:  24%|██▍       | 34/143 [00:09<00:14,  7.54it/s]
Training 12/40:  36%|███▋      | 52/143 [00:09<00:06, 14.60it/s]
Training 12/40:  53%|█████▎    | 76/143 [00:09<00:02, 27.06it/s]
Training 12/40:  69%|██████▉   | 99/143 [00:09<00:01, 41.90it/s]
Training 12/40: 100%|██████████| 143/143 [00:10<00:00, 13.99it/s]


Epoch 11: train_loss=0.1615, val_loss=0.1577



Training 13/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/40:   1%|          | 1/143 [00:09<21:51,  9.23s/it]
Training 13/40:   6%|▋         | 9/143 [00:09<01:41,  1.32it/s]
Training 13/40:  17%|█▋        | 24/143 [00:09<00:26,  4.49it/s]
Training 13/40: 100%|██████████| 143/143 [00:09<00:00, 14.32it/s]


Epoch 12: train_loss=0.1612, val_loss=0.1586



Training 14/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/40:   1%|          | 1/143 [00:10<23:40, 10.01s/it]
Training 14/40:   6%|▋         | 9/143 [00:10<01:49,  1.22it/s]
Training 14/40:  16%|█▌        | 23/143 [00:10<00:30,  3.95it/s]
Training 14/40:  25%|██▌       | 36/143 [00:10<00:14,  7.39it/s]
Training 14/40:  38%|███▊      | 54/143 [00:10<00:06, 13.86it/s]
Training 14/40:  50%|████▉     | 71/143 [00:10<00:03, 21.77it/s]
Training 14/40:  69%|██████▉   | 99/143 [00:10<00:01, 39.23it/s]
Training 14/40: 100%|██████████| 143/143 [00:11<00:00, 12.82it/s]


Epoch 13: train_loss=0.1607, val_loss=0.1591



Training 15/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/40:   1%|          | 1/143 [00:09<21:41,  9.16s/it]
Training 15/40:   6%|▋         | 9/143 [00:09<01:40,  1.33it/s]
Training 15/40:  14%|█▍        | 20/143 [00:09<00:33,  3.66it/s]
Training 15/40: 100%|██████████| 143/143 [00:09<00:00, 14.42it/s]


Epoch 14: train_loss=0.1603, val_loss=0.1564



Training 16/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/40:   1%|          | 1/143 [00:09<21:42,  9.17s/it]
Training 16/40:   6%|▋         | 9/143 [00:09<01:40,  1.33it/s]
Training 16/40:  12%|█▏        | 17/143 [00:09<00:41,  3.01it/s]
Training 16/40:  20%|█▉        | 28/143 [00:09<00:18,  6.18it/s]
Training 16/40: 100%|██████████| 143/143 [00:10<00:00, 14.25it/s]


Epoch 15: train_loss=0.1595, val_loss=0.1660



Training 17/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/40:   1%|          | 1/143 [00:09<22:40,  9.58s/it]
Training 17/40:   7%|▋         | 10/143 [00:09<01:33,  1.42it/s]
Training 17/40:  13%|█▎        | 19/143 [00:09<00:38,  3.24it/s]
Training 17/40:  38%|███▊      | 55/143 [00:09<00:06, 13.40it/s]
Training 17/40: 100%|██████████| 143/143 [00:10<00:00, 13.77it/s]


Epoch 16: train_loss=0.1597, val_loss=0.1573



Training 18/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/40:   1%|          | 1/143 [00:09<23:35,  9.97s/it]
Training 18/40:   6%|▌         | 8/143 [00:10<02:04,  1.09it/s]
Training 18/40:  13%|█▎        | 19/143 [00:10<00:38,  3.24it/s]
Training 18/40:  19%|█▉        | 27/143 [00:10<00:21,  5.34it/s]
Training 18/40:  30%|███       | 43/143 [00:10<00:08, 11.17it/s]
Training 18/40:  46%|████▌     | 66/143 [00:10<00:03, 22.34it/s]
Training 18/40:  62%|██████▏   | 89/143 [00:10<00:01, 36.38it/s]
Training 18/40: 100%|██████████| 143/143 [00:11<00:00, 12.83it/s]


Epoch 17: train_loss=0.1574, val_loss=0.1672



Training 19/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/40:   1%|          | 1/143 [00:09<21:32,  9.10s/it]
Training 19/40:   7%|▋         | 10/143 [00:09<01:28,  1.50it/s]
Training 19/40:  14%|█▍        | 20/143 [00:09<00:33,  3.62it/s]
Training 19/40:  50%|█████     | 72/143 [00:09<00:03, 19.04it/s]
Training 19/40: 100%|██████████| 143/143 [00:09<00:00, 14.38it/s]


Epoch 18: train_loss=0.1575, val_loss=0.1560



Training 20/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/40:   1%|          | 1/143 [00:09<22:19,  9.43s/it]
Training 20/40:   8%|▊         | 11/143 [00:09<01:22,  1.59it/s]
Training 20/40:  15%|█▍        | 21/143 [00:09<00:33,  3.64it/s]
Training 20/40:  45%|████▍     | 64/143 [00:09<00:04, 15.96it/s]
Training 20/40: 100%|██████████| 143/143 [00:10<00:00, 13.96it/s]


Epoch 19: train_loss=0.1553, val_loss=0.1523



Training 21/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/40:   1%|          | 1/143 [00:09<21:57,  9.28s/it]
Training 21/40:   8%|▊         | 11/143 [00:09<01:21,  1.62it/s]
Training 21/40:  13%|█▎        | 19/143 [00:09<00:37,  3.27it/s]
Training 21/40: 100%|██████████| 143/143 [00:10<00:00, 14.21it/s]


Epoch 20: train_loss=0.1581, val_loss=0.1552



Training 22/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/40:   1%|          | 1/143 [00:09<22:11,  9.38s/it]
Training 22/40:   7%|▋         | 10/143 [00:09<01:31,  1.45it/s]
Training 22/40:  16%|█▌        | 23/143 [00:09<00:28,  4.15it/s]
Training 22/40: 100%|██████████| 143/143 [00:10<00:00, 14.13it/s]


Epoch 21: train_loss=0.1559, val_loss=0.1544



Training 23/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/40:   1%|          | 1/143 [00:09<22:59,  9.71s/it]
Training 23/40:   8%|▊         | 11/143 [00:09<01:25,  1.55it/s]
Training 23/40:  15%|█▌        | 22/143 [00:09<00:32,  3.74it/s]
Training 23/40:  24%|██▍       | 34/143 [00:10<00:15,  7.01it/s]
Training 23/40:  35%|███▍      | 50/143 [00:10<00:07, 12.88it/s]
Training 23/40:  47%|████▋     | 67/143 [00:10<00:03, 21.02it/s]
Training 23/40:  63%|██████▎   | 90/143 [00:10<00:01, 35.43it/s]
Training 23/40: 100%|██████████| 143/143 [00:10<00:00, 13.13it/s]


Epoch 22: train_loss=0.1551, val_loss=0.1593



Training 24/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/40:   1%|          | 1/143 [00:09<21:32,  9.10s/it]
Training 24/40:   6%|▋         | 9/143 [00:09<01:39,  1.34it/s]
Training 24/40:  14%|█▍        | 20/143 [00:09<00:33,  3.68it/s]
Training 24/40:  22%|██▏       | 31/143 [00:09<00:16,  6.84it/s]
Training 24/40:  36%|███▌      | 51/143 [00:09<00:06, 14.71it/s]
Training 24/40:  51%|█████     | 73/143 [00:09<00:02, 25.97it/s]
Training 24/40:  66%|██████▌   | 94/143 [00:09<00:01, 39.28it/s]
Training 24/40: 100%|██████████| 143/143 [00:10<00:00, 13.93it/s]


Epoch 23: train_loss=0.1539, val_loss=0.1570



Training 25/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/40:   1%|          | 1/143 [00:09<22:19,  9.43s/it]
Training 25/40:   6%|▌         | 8/143 [00:09<01:57,  1.15it/s]
Training 25/40:  13%|█▎        | 19/143 [00:09<00:36,  3.41it/s]
Training 25/40:  24%|██▍       | 34/143 [00:09<00:14,  7.64it/s]
Training 25/40:  34%|███▎      | 48/143 [00:09<00:07, 12.87it/s]
Training 25/40:  49%|████▉     | 70/143 [00:09<00:03, 23.90it/s]
Training 25/40:  69%|██████▉   | 99/143 [00:10<00:01, 42.52it/s]
Training 25/40: 100%|██████████| 143/143 [00:10<00:00, 13.48it/s]


Epoch 24: train_loss=0.1530, val_loss=0.1502



Training 26/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/40:   1%|          | 1/143 [00:09<22:13,  9.39s/it]
Training 26/40:   7%|▋         | 10/143 [00:09<01:31,  1.45it/s]
Training 26/40:  12%|█▏        | 17/143 [00:09<00:43,  2.89it/s]
Training 26/40:  31%|███       | 44/143 [00:09<00:09, 10.64it/s]
Training 26/40: 100%|██████████| 143/143 [00:10<00:00, 13.95it/s]


Epoch 25: train_loss=0.1553, val_loss=0.1508



Training 27/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/40:   1%|          | 1/143 [00:09<21:42,  9.17s/it]
Training 27/40:   7%|▋         | 10/143 [00:09<01:29,  1.49it/s]
Training 27/40:  14%|█▍        | 20/143 [00:09<00:34,  3.60it/s]
Training 27/40: 100%|██████████| 143/143 [00:09<00:00, 14.41it/s]


Epoch 26: train_loss=0.1520, val_loss=0.1500



Training 28/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/40:   1%|          | 1/143 [00:09<22:00,  9.30s/it]
Training 28/40:   6%|▋         | 9/143 [00:09<01:41,  1.32it/s]
Training 28/40:  14%|█▍        | 20/143 [00:09<00:34,  3.61it/s]
Training 28/40:  22%|██▏       | 31/143 [00:09<00:16,  6.72it/s]
Training 28/40:  34%|███▍      | 49/143 [00:09<00:06, 13.66it/s]
Training 28/40:  47%|████▋     | 67/143 [00:09<00:03, 22.66it/s]
Training 28/40:  66%|██████▋   | 95/143 [00:09<00:01, 41.09it/s]
Training 28/40: 100%|██████████| 143/143 [00:10<00:00, 13.64it/s]


Epoch 27: train_loss=0.1535, val_loss=0.1511



Training 29/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/40:   1%|          | 1/143 [00:09<21:33,  9.11s/it]
Training 29/40:   6%|▋         | 9/143 [00:09<01:39,  1.34it/s]
Training 29/40:  12%|█▏        | 17/143 [00:09<00:41,  3.04it/s]
Training 29/40:  26%|██▌       | 37/143 [00:09<00:11,  8.92it/s]
Training 29/40: 100%|██████████| 143/143 [00:09<00:00, 14.39it/s]


Epoch 28: train_loss=0.1508, val_loss=0.1511



Training 30/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/40:   1%|          | 1/143 [00:09<21:51,  9.23s/it]
Training 30/40:   6%|▋         | 9/143 [00:09<01:41,  1.32it/s]
Training 30/40:  15%|█▌        | 22/143 [00:09<00:29,  4.06it/s]
Training 30/40:  23%|██▎       | 33/143 [00:09<00:15,  7.18it/s]
Training 30/40:  39%|███▉      | 56/143 [00:09<00:05, 16.16it/s]
Training 30/40:  53%|█████▎    | 76/143 [00:09<00:02, 26.13it/s]
Training 30/40:  67%|██████▋   | 96/143 [00:09<00:01, 38.66it/s]
Training 30/40: 100%|██████████| 143/143 [00:10<00:00, 13.78it/s]


Epoch 29: train_loss=0.1552, val_loss=0.1480



Training 31/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 31/40:   1%|          | 1/143 [00:08<21:14,  8.98s/it]
Training 31/40:   5%|▍         | 7/143 [00:09<02:09,  1.05it/s]
Training 31/40:  13%|█▎        | 19/143 [00:09<00:33,  3.65it/s]
Training 31/40:  21%|██        | 30/143 [00:09<00:16,  6.87it/s]
Training 31/40:  32%|███▏      | 46/143 [00:09<00:07, 13.21it/s]
Training 31/40:  47%|████▋     | 67/143 [00:09<00:03, 24.19it/s]
Training 31/40:  62%|██████▏   | 88/143 [00:09<00:01, 37.79it/s]
Training 31/40: 100%|██████████| 143/143 [00:10<00:00, 14.09it/s]


Epoch 30: train_loss=0.1584, val_loss=0.1539



Training 32/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 32/40:   1%|          | 1/143 [00:10<23:45, 10.04s/it]
Training 32/40:   6%|▋         | 9/143 [00:10<01:50,  1.22it/s]
Training 32/40:  15%|█▍        | 21/143 [00:10<00:34,  3.54it/s]
Training 32/40:  24%|██▍       | 35/143 [00:10<00:14,  7.24it/s]
Training 32/40:  36%|███▌      | 51/143 [00:10<00:07, 12.94it/s]
Training 32/40:  49%|████▉     | 70/143 [00:10<00:03, 21.90it/s]
Training 32/40:  66%|██████▋   | 95/143 [00:10<00:01, 37.23it/s]
Training 32/40: 100%|██████████| 143/143 [00:11<00:00, 12.73it/s]


Epoch 31: train_loss=0.1538, val_loss=0.1521



Training 33/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 33/40:   1%|          | 1/143 [00:09<21:53,  9.25s/it]
Training 33/40:   7%|▋         | 10/143 [00:09<01:30,  1.48it/s]
Training 33/40:  13%|█▎        | 18/143 [00:09<00:39,  3.14it/s]
Training 33/40:  45%|████▌     | 65/143 [00:09<00:04, 16.90it/s]
Training 33/40: 100%|██████████| 143/143 [00:10<00:00, 14.24it/s]


Epoch 32: train_loss=0.1593, val_loss=0.1613



Training 34/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 34/40:   1%|          | 1/143 [00:09<21:46,  9.20s/it]
Training 34/40:   6%|▋         | 9/143 [00:09<01:40,  1.33it/s]
Training 34/40:  14%|█▍        | 20/143 [00:09<00:33,  3.64it/s]
Training 34/40:  22%|██▏       | 32/143 [00:09<00:15,  7.07it/s]
Training 34/40: 100%|██████████| 143/143 [00:10<00:00, 14.18it/s]


Epoch 33: train_loss=0.1627, val_loss=0.1614



Training 35/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 35/40:   1%|          | 1/143 [00:09<22:26,  9.48s/it]
Training 35/40:   6%|▋         | 9/143 [00:09<01:43,  1.29it/s]
Training 35/40:  12%|█▏        | 17/143 [00:09<00:43,  2.92it/s]
Training 35/40: 100%|██████████| 143/143 [00:10<00:00, 13.96it/s]


Epoch 34: train_loss=0.1628, val_loss=0.1613



Training 36/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 36/40:   1%|          | 1/143 [00:09<21:22,  9.03s/it]
Training 36/40:   6%|▌         | 8/143 [00:09<01:52,  1.20it/s]
Training 36/40:  15%|█▍        | 21/143 [00:09<00:30,  3.99it/s]
Training 36/40:  22%|██▏       | 31/143 [00:09<00:16,  6.88it/s]
Training 36/40: 100%|██████████| 143/143 [00:09<00:00, 14.48it/s]


Epoch 35: train_loss=0.1628, val_loss=0.1613



Training 37/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 37/40:   1%|          | 1/143 [00:09<21:37,  9.14s/it]
Training 37/40:   7%|▋         | 10/143 [00:09<01:29,  1.49it/s]
Training 37/40:  14%|█▍        | 20/143 [00:09<00:34,  3.61it/s]
Training 37/40:  23%|██▎       | 33/143 [00:09<00:14,  7.35it/s]
Training 37/40:  33%|███▎      | 47/143 [00:09<00:07, 12.73it/s]
Training 37/40:  47%|████▋     | 67/143 [00:09<00:03, 23.00it/s]
Training 37/40:  61%|██████    | 87/143 [00:09<00:01, 35.79it/s]
Training 37/40: 100%|██████████| 143/143 [00:10<00:00, 13.80it/s]


Epoch 36: train_loss=0.1626, val_loss=0.1614



Training 38/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 38/40:   1%|          | 1/143 [00:09<21:32,  9.10s/it]
Training 38/40:   7%|▋         | 10/143 [00:09<01:28,  1.50it/s]
Training 38/40:  22%|██▏       | 32/143 [00:09<00:17,  6.21it/s]
Training 38/40: 100%|██████████| 143/143 [00:09<00:00, 14.54it/s]


Epoch 37: train_loss=0.1629, val_loss=0.1613



Training 39/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 39/40:   1%|          | 1/143 [00:09<21:53,  9.25s/it]
Training 39/40:   6%|▋         | 9/143 [00:09<01:41,  1.32it/s]
Training 39/40:  15%|█▍        | 21/143 [00:09<00:31,  3.84it/s]
Training 39/40: 100%|██████████| 143/143 [00:10<00:00, 14.26it/s]


Epoch 38: train_loss=0.1628, val_loss=0.1613



Training 40/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 40/40:   1%|          | 1/143 [00:09<22:03,  9.32s/it]
Training 40/40:   7%|▋         | 10/143 [00:09<01:30,  1.46it/s]
Training 40/40:  15%|█▍        | 21/143 [00:09<00:32,  3.75it/s]
Training 40/40:  24%|██▍       | 34/143 [00:09<00:14,  7.44it/s]
Training 40/40:  35%|███▍      | 50/143 [00:09<00:06, 13.53it/s]
Training 40/40:  50%|█████     | 72/143 [00:09<00:02, 24.61it/s]
Training 40/40:  70%|██████▉   | 100/143 [00:09<00:01, 42.79it/s]
Training 40/40: 100%|██████████| 143/143 [00:10<00:00, 13.62it/s]


Epoch 39: train_loss=0.1629, val_loss=0.1613


2025-06-02 07:45:48,021 - __main__ - INFO - Saved mlp probe for layer 2 to cache\probes\phase1_ijepa_viewpoint_probing\mlp_layer_2_probe.pth
 17%|█▋        | 1/6 [45:21<3:46:49, 2721.97s/it]2025-06-02 07:46:07,509 - __main__ - INFO - Processing layer 4...
2025-06-02 07:46:07,510 - src.probing.data_preprocessing - INFO - Extracting features from 1149 batches...


Extracting features:   0%|          | 1/1149 [00:07<2:27:28,  7.71s/it]
Extracting features:   0%|          | 2/1149 [00:08<1:09:13,  3.62s/it]
Extracting features:   0%|          | 3/1149 [00:09<44:06,  2.31s/it]  
Extracting features:   0%|          | 4/1149 [00:09<32:16,  1.69s/it]
Extracting features:   0%|          | 5/1149 [00:10<25:44,  1.35s/it]
Extracting features:   1%|          | 6/1149 [00:11<21:47,  1.14s/it]
Extracting features:   1%|          | 7/1149 [00:12<19:15,  1.01s/it]
Extracting features:   1%|          | 8/1149 [00:12<17:37,  1.08it/s]
Extracting features:   1%|          | 9/1149 [00:13<16:31,  1.15it/s]

Epoch 0: train_loss=0.2978, val_loss=0.1652



Training 2/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/30:   1%|          | 1/143 [00:09<22:01,  9.31s/it]
Training 2/30:  13%|█▎        | 19/143 [00:09<00:43,  2.83it/s]
Training 2/30:  27%|██▋       | 38/143 [00:09<00:15,  6.78it/s]
Training 2/30:  43%|████▎     | 61/143 [00:09<00:06, 13.28it/s]
Training 2/30:  59%|█████▊    | 84/143 [00:09<00:02, 21.95it/s]
Training 2/30: 100%|██████████| 143/143 [00:10<00:00, 13.80it/s]


Epoch 1: train_loss=0.1659, val_loss=0.1696



Training 3/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/30:   1%|          | 1/143 [00:09<22:39,  9.57s/it]
Training 3/30:   9%|▉         | 13/143 [00:09<01:09,  1.87it/s]
Training 3/30:  21%|██        | 30/143 [00:09<00:21,  5.32it/s]
Training 3/30:  37%|███▋      | 53/143 [00:09<00:07, 11.72it/s]
Training 3/30:  57%|█████▋    | 81/143 [00:09<00:02, 22.20it/s]
Training 3/30: 100%|██████████| 143/143 [00:10<00:00, 13.59it/s]


Epoch 2: train_loss=0.1650, val_loss=0.1595



Training 4/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/30:   1%|          | 1/143 [00:09<21:47,  9.21s/it]
Training 4/30:  10%|█         | 15/143 [00:09<00:57,  2.24it/s]
Training 4/30:  24%|██▍       | 34/143 [00:09<00:17,  6.25it/s]
Training 4/30:  43%|████▎     | 61/143 [00:09<00:05, 14.03it/s]
Training 4/30:  61%|██████    | 87/143 [00:09<00:02, 23.98it/s]
Training 4/30: 100%|██████████| 143/143 [00:10<00:00, 14.09it/s]


Epoch 3: train_loss=0.1632, val_loss=0.1578



Training 5/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/30:   1%|          | 1/143 [00:09<21:46,  9.20s/it]
Training 5/30:  14%|█▍        | 20/143 [00:09<00:40,  3.01it/s]
Training 5/30:  27%|██▋       | 39/143 [00:09<00:14,  7.00it/s]
Training 5/30:  41%|████▏     | 59/143 [00:09<00:06, 12.69it/s]
Training 5/30:  63%|██████▎   | 90/143 [00:09<00:02, 24.72it/s]
Training 5/30: 100%|██████████| 143/143 [00:10<00:00, 14.04it/s]


Epoch 4: train_loss=0.1601, val_loss=0.1564



Training 6/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/30:   1%|          | 1/143 [00:09<21:55,  9.27s/it]
Training 6/30:  10%|█         | 15/143 [00:09<00:57,  2.23it/s]
Training 6/30:  22%|██▏       | 32/143 [00:09<00:19,  5.79it/s]
Training 6/30:  38%|███▊      | 55/143 [00:09<00:07, 12.38it/s]
Training 6/30:  56%|█████▌    | 80/143 [00:09<00:02, 21.95it/s]
Training 6/30: 100%|██████████| 143/143 [00:10<00:00, 13.95it/s]


Epoch 5: train_loss=0.1594, val_loss=0.1692



Training 7/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/30:   1%|          | 1/143 [00:09<22:06,  9.34s/it]
Training 7/30:   9%|▉         | 13/143 [00:09<01:07,  1.91it/s]
Training 7/30:  22%|██▏       | 32/143 [00:09<00:18,  5.87it/s]
Training 7/30: 100%|██████████| 143/143 [00:10<00:00, 14.20it/s]


Epoch 6: train_loss=0.1589, val_loss=0.1574



Training 8/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/30:   1%|          | 1/143 [00:09<21:35,  9.12s/it]
Training 8/30:  12%|█▏        | 17/143 [00:09<00:48,  2.57it/s]
Training 8/30:  22%|██▏       | 32/143 [00:09<00:19,  5.74it/s]
Training 8/30:  40%|███▉      | 57/143 [00:09<00:06, 13.02it/s]
Training 8/30:  57%|█████▋    | 81/143 [00:09<00:02, 22.26it/s]
Training 8/30:  76%|███████▌  | 109/143 [00:09<00:00, 36.35it/s]
Training 8/30: 100%|██████████| 143/143 [00:10<00:00, 14.02it/s]


Epoch 7: train_loss=0.1594, val_loss=0.1585



Training 9/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/30:   1%|          | 1/143 [00:09<22:12,  9.38s/it]
Training 9/30:  13%|█▎        | 18/143 [00:09<00:47,  2.65it/s]
Training 9/30:  27%|██▋       | 39/143 [00:09<00:14,  7.00it/s]
Training 9/30:  43%|████▎     | 61/143 [00:09<00:06, 13.18it/s]
Training 9/30:  68%|██████▊   | 97/143 [00:09<00:01, 26.97it/s]
Training 9/30: 100%|██████████| 143/143 [00:10<00:00, 13.87it/s]


Epoch 8: train_loss=0.1589, val_loss=0.1566



Training 10/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/30:   1%|          | 1/143 [00:09<21:54,  9.26s/it]
Training 10/30:   8%|▊         | 12/143 [00:09<01:13,  1.78it/s]
Training 10/30:  23%|██▎       | 33/143 [00:09<00:17,  6.19it/s]
Training 10/30:  38%|███▊      | 54/143 [00:09<00:07, 12.19it/s]
Training 10/30:  57%|█████▋    | 81/143 [00:09<00:02, 22.58it/s]
Training 10/30: 100%|██████████| 143/143 [00:10<00:00, 13.99it/s]


Epoch 9: train_loss=0.1569, val_loss=0.1572



Training 11/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/30:   1%|          | 1/143 [00:09<21:48,  9.21s/it]
Training 11/30:  13%|█▎        | 19/143 [00:09<00:43,  2.85it/s]
Training 11/30:  25%|██▌       | 36/143 [00:09<00:16,  6.42it/s]
Training 11/30:  40%|███▉      | 57/143 [00:09<00:06, 12.43it/s]
Training 11/30:  57%|█████▋    | 81/143 [00:09<00:02, 21.65it/s]
Training 11/30: 100%|██████████| 143/143 [00:10<00:00, 14.03it/s]


Epoch 10: train_loss=0.1557, val_loss=0.1519



Training 12/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/30:   1%|          | 1/143 [00:09<22:04,  9.33s/it]
Training 12/30:  11%|█         | 16/143 [00:09<00:53,  2.37it/s]
Training 12/30:  21%|██        | 30/143 [00:09<00:21,  5.27it/s]
Training 12/30:  38%|███▊      | 54/143 [00:09<00:07, 12.13it/s]
Training 12/30:  58%|█████▊    | 83/143 [00:09<00:02, 23.26it/s]
Training 12/30: 100%|██████████| 143/143 [00:10<00:00, 13.91it/s]


Epoch 11: train_loss=0.1538, val_loss=0.1631



Training 13/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/30:   1%|          | 1/143 [00:09<21:54,  9.25s/it]
Training 13/30:  10%|█         | 15/143 [00:09<00:57,  2.23it/s]
Training 13/30:  22%|██▏       | 32/143 [00:09<00:19,  5.80it/s]
Training 13/30:  38%|███▊      | 54/143 [00:09<00:07, 12.10it/s]
Training 13/30:  54%|█████▍    | 77/143 [00:09<00:03, 20.88it/s]
Training 13/30: 100%|██████████| 143/143 [00:10<00:00, 13.99it/s]


Epoch 12: train_loss=0.1576, val_loss=0.1500



Training 14/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/30:   1%|          | 1/143 [00:09<21:42,  9.17s/it]
Training 14/30:  13%|█▎        | 18/143 [00:09<00:46,  2.71it/s]
Training 14/30:  22%|██▏       | 32/143 [00:09<00:19,  5.65it/s]
Training 14/30:  41%|████      | 58/143 [00:09<00:06, 13.19it/s]
Training 14/30:  58%|█████▊    | 83/143 [00:09<00:02, 22.80it/s]
Training 14/30: 100%|██████████| 143/143 [00:10<00:00, 14.08it/s]


Epoch 13: train_loss=0.1557, val_loss=0.1613



Training 15/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/30:   1%|          | 1/143 [00:09<21:51,  9.23s/it]
Training 15/30:  10%|▉         | 14/143 [00:09<01:01,  2.09it/s]
Training 15/30:  26%|██▌       | 37/143 [00:09<00:15,  6.94it/s]
Training 15/30:  38%|███▊      | 54/143 [00:09<00:07, 11.74it/s]
Training 15/30:  62%|██████▏   | 89/143 [00:09<00:02, 25.40it/s]
Training 15/30: 100%|██████████| 143/143 [00:10<00:00, 14.06it/s]


Epoch 14: train_loss=0.1525, val_loss=0.1556



Training 16/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/30:   1%|          | 1/143 [00:09<21:31,  9.09s/it]
Training 16/30:  10%|█         | 15/143 [00:09<00:56,  2.27it/s]
Training 16/30:  24%|██▍       | 34/143 [00:09<00:17,  6.32it/s]
Training 16/30:  36%|███▋      | 52/143 [00:09<00:07, 11.51it/s]
Training 16/30:  58%|█████▊    | 83/143 [00:09<00:02, 23.74it/s]
Training 16/30: 100%|██████████| 143/143 [00:10<00:00, 14.14it/s]


Epoch 15: train_loss=0.1527, val_loss=0.1620



Training 17/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/30:   1%|          | 1/143 [00:09<21:27,  9.07s/it]
Training 17/30:   9%|▉         | 13/143 [00:09<01:05,  1.97it/s]
Training 17/30:  24%|██▍       | 34/143 [00:09<00:16,  6.48it/s]
Training 17/30:  40%|███▉      | 57/143 [00:09<00:06, 13.18it/s]
Training 17/30:  59%|█████▊    | 84/143 [00:09<00:02, 23.74it/s]
Training 17/30: 100%|██████████| 143/143 [00:10<00:00, 14.22it/s]


Epoch 16: train_loss=0.1526, val_loss=0.1788



Training 18/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/30:   1%|          | 1/143 [00:09<23:31,  9.94s/it]
Training 18/30:  10%|█         | 15/143 [00:10<01:01,  2.08it/s]
Training 18/30:  20%|██        | 29/143 [00:10<00:23,  4.81it/s]
Training 18/30:  35%|███▍      | 50/143 [00:10<00:08, 10.44it/s]
Training 18/30:  52%|█████▏    | 75/143 [00:10<00:03, 19.48it/s]
Training 18/30: 100%|██████████| 143/143 [00:10<00:00, 13.08it/s]


Epoch 17: train_loss=0.1535, val_loss=0.1470



Training 19/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/30:   1%|          | 1/143 [00:09<21:45,  9.19s/it]
Training 19/30:  13%|█▎        | 19/143 [00:09<00:43,  2.86it/s]
Training 19/30:  25%|██▌       | 36/143 [00:09<00:16,  6.43it/s]
Training 19/30:  40%|███▉      | 57/143 [00:09<00:06, 12.46it/s]
Training 19/30:  58%|█████▊    | 83/143 [00:09<00:02, 22.49it/s]
Training 19/30: 100%|██████████| 143/143 [00:10<00:00, 14.04it/s]


Epoch 18: train_loss=0.1507, val_loss=0.1475



Training 20/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/30:   1%|          | 1/143 [00:09<21:37,  9.14s/it]
Training 20/30:  11%|█         | 16/143 [00:09<00:52,  2.42it/s]
Training 20/30:  27%|██▋       | 39/143 [00:09<00:14,  7.30it/s]
Training 20/30:  45%|████▍     | 64/143 [00:09<00:05, 14.51it/s]
Training 20/30:  63%|██████▎   | 90/143 [00:09<00:02, 24.52it/s]
Training 20/30: 100%|██████████| 143/143 [00:10<00:00, 14.16it/s]


Epoch 19: train_loss=0.1524, val_loss=0.1485



Training 21/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/30:   1%|          | 1/143 [00:09<22:27,  9.49s/it]
Training 21/30:  12%|█▏        | 17/143 [00:09<00:50,  2.48it/s]
Training 21/30:  28%|██▊       | 40/143 [00:09<00:14,  7.19it/s]
Training 21/30:  43%|████▎     | 61/143 [00:09<00:06, 13.00it/s]
Training 21/30:  59%|█████▉    | 85/143 [00:09<00:02, 21.93it/s]
Training 21/30: 100%|██████████| 143/143 [00:10<00:00, 13.66it/s]


Epoch 20: train_loss=0.1536, val_loss=0.1585



Training 22/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/30:   1%|          | 1/143 [00:09<21:48,  9.21s/it]
Training 22/30:  10%|█         | 15/143 [00:09<00:57,  2.24it/s]
Training 22/30:  24%|██▍       | 35/143 [00:09<00:16,  6.46it/s]
Training 22/30:  41%|████▏     | 59/143 [00:09<00:06, 13.36it/s]
Training 22/30:  59%|█████▊    | 84/143 [00:09<00:02, 22.93it/s]
Training 22/30: 100%|██████████| 143/143 [00:10<00:00, 14.02it/s]


Epoch 21: train_loss=0.1487, val_loss=0.1508



Training 23/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/30:   1%|          | 1/143 [00:09<22:03,  9.32s/it]
Training 23/30:  12%|█▏        | 17/143 [00:09<00:49,  2.52it/s]
Training 23/30:  23%|██▎       | 33/143 [00:09<00:18,  5.84it/s]
Training 23/30:  39%|███▉      | 56/143 [00:09<00:07, 12.40it/s]
Training 23/30:  59%|█████▊    | 84/143 [00:09<00:02, 23.09it/s]
Training 23/30: 100%|██████████| 143/143 [00:10<00:00, 13.86it/s]


Epoch 22: train_loss=0.1502, val_loss=0.1572



Training 24/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/30:   1%|          | 1/143 [00:09<22:52,  9.67s/it]
Training 24/30:  10%|▉         | 14/143 [00:09<01:04,  1.99it/s]
Training 24/30:  24%|██▍       | 35/143 [00:09<00:17,  6.23it/s]
Training 24/30:  37%|███▋      | 53/143 [00:09<00:08, 11.14it/s]
Training 24/30:  55%|█████▍    | 78/143 [00:10<00:03, 20.40it/s]
Training 24/30:  70%|██████▉   | 100/143 [00:10<00:01, 30.87it/s]
Training 24/30: 100%|██████████| 143/143 [00:10<00:00, 13.38it/s]


Epoch 23: train_loss=0.1487, val_loss=0.1481



Training 25/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/30:   1%|          | 1/143 [00:09<22:56,  9.69s/it]
Training 25/30:  10%|█         | 15/143 [00:09<00:59,  2.13it/s]
Training 25/30:  21%|██        | 30/143 [00:09<00:21,  5.14it/s]
Training 25/30:  32%|███▏      | 46/143 [00:09<00:10,  9.51it/s]
Training 25/30:  48%|████▊     | 69/143 [00:10<00:04, 18.02it/s]
Training 25/30: 100%|██████████| 143/143 [00:10<00:00, 13.38it/s]


Epoch 24: train_loss=0.1478, val_loss=0.1512



Training 26/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/30:   1%|          | 1/143 [00:09<22:30,  9.51s/it]
Training 26/30:  12%|█▏        | 17/143 [00:09<00:50,  2.47it/s]
Training 26/30:  22%|██▏       | 32/143 [00:09<00:20,  5.52it/s]
Training 26/30:  33%|███▎      | 47/143 [00:09<00:09,  9.67it/s]
Training 26/30:  53%|█████▎    | 76/143 [00:09<00:03, 20.71it/s]
Training 26/30:  73%|███████▎  | 104/143 [00:10<00:01, 34.48it/s]
Training 26/30: 100%|██████████| 143/143 [00:10<00:00, 13.60it/s]


Epoch 25: train_loss=0.1471, val_loss=0.1434



Training 27/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/30:   1%|          | 1/143 [00:09<22:36,  9.55s/it]
Training 27/30:  10%|█         | 15/143 [00:09<00:59,  2.17it/s]
Training 27/30:  24%|██▍       | 34/143 [00:09<00:18,  6.04it/s]
Training 27/30:  36%|███▋      | 52/143 [00:09<00:08, 10.98it/s]
Training 27/30:  58%|█████▊    | 83/143 [00:09<00:02, 22.68it/s]
Training 27/30: 100%|██████████| 143/143 [00:10<00:00, 13.57it/s]


Epoch 26: train_loss=0.1501, val_loss=0.1457



Training 28/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/30:   1%|          | 1/143 [00:09<22:27,  9.49s/it]
Training 28/30:  12%|█▏        | 17/143 [00:09<00:50,  2.48it/s]
Training 28/30:  26%|██▌       | 37/143 [00:09<00:16,  6.57it/s]
Training 28/30:  44%|████▍     | 63/143 [00:09<00:05, 13.85it/s]
Training 28/30:  62%|██████▏   | 89/143 [00:09<00:02, 23.54it/s]
Training 28/30: 100%|██████████| 143/143 [00:10<00:00, 13.68it/s]


Epoch 27: train_loss=0.1479, val_loss=0.1427



Training 29/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/30:   1%|          | 1/143 [00:09<22:17,  9.42s/it]
Training 29/30:  12%|█▏        | 17/143 [00:09<00:50,  2.49it/s]
Training 29/30:  25%|██▌       | 36/143 [00:09<00:16,  6.41it/s]
Training 29/30:  39%|███▉      | 56/143 [00:09<00:07, 12.00it/s]
Training 29/30:  61%|██████    | 87/143 [00:09<00:02, 23.83it/s]
Training 29/30: 100%|██████████| 143/143 [00:10<00:00, 13.81it/s]


Epoch 28: train_loss=0.1461, val_loss=0.1478



Training 30/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/30:   1%|          | 1/143 [00:09<22:54,  9.68s/it]
Training 30/30:   8%|▊         | 12/143 [00:09<01:17,  1.70it/s]
Training 30/30:  20%|█▉        | 28/143 [00:09<00:23,  4.92it/s]
Training 30/30:  33%|███▎      | 47/143 [00:09<00:09, 10.12it/s]
Training 30/30:  50%|████▉     | 71/143 [00:10<00:03, 19.00it/s]
Training 30/30: 100%|██████████| 143/143 [00:10<00:00, 13.40it/s]


Epoch 29: train_loss=0.1489, val_loss=0.1433


2025-06-02 08:17:15,191 - __main__ - INFO - Saved linear probe for layer 4 to cache\probes\phase1_ijepa_viewpoint_probing\linear_layer_4_probe.pth
2025-06-02 08:17:34,432 - __main__ - INFO - Running mlp probe on layer 4...
2025-06-02 08:17:34,432 - __main__ - INFO - Running mlp probe on layer 4 (feature_dim: 1280)

Training 1/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 1/40:   1%|          | 1/143 [00:09<21:39,  9.15s/it]
Training 1/40:   4%|▍         | 6/143 [00:09<02:36,  1.14s/it]
Training 1/40:  10%|█         | 15/143 [00:09<00:45,  2.79it/s]
Training 1/40:  17%|█▋        | 25/143 [00:09<00:20,  5.68it/s]
Training 1/40:  38%|███▊      | 54/143 [00:09<00:05, 17.26it/s]
Training 1/40: 100%|██████████| 143/143 [00:10<00:00, 14.18it/s]


Epoch 0: train_loss=0.4397, val_loss=0.1652



Training 2/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/40:   1%|          | 1/143 [00:09<21:56,  9.27s/it]
Training 2/40:   6%|▋         | 9/143 [00:09<01:41,  1.32it/s]
Training 2/40:  15%|█▍        | 21/143 [00:09<00:31,  3.83it/s]
Training 2/40:  24%|██▍       | 34/143 [00:09<00:14,  7.53it/s]
Training 2/40:  36%|███▋      | 52/143 [00:09<00:06, 14.46it/s]
Training 2/40:  49%|████▉     | 70/143 [00:09<00:03, 23.44it/s]
Training 2/40:  65%|██████▌   | 93/143 [00:09<00:01, 38.26it/s]
Training 2/40: 100%|██████████| 143/143 [00:10<00:00, 13.71it/s]


Epoch 1: train_loss=0.1631, val_loss=0.1616



Training 3/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/40:   1%|          | 1/143 [00:09<22:20,  9.44s/it]
Training 3/40:   6%|▋         | 9/143 [00:09<01:43,  1.30it/s]
Training 3/40:  13%|█▎        | 19/143 [00:09<00:37,  3.35it/s]
Training 3/40:  25%|██▌       | 36/143 [00:09<00:13,  8.16it/s]
Training 3/40: 100%|██████████| 143/143 [00:10<00:00, 13.92it/s]


Epoch 2: train_loss=0.1624, val_loss=0.1600



Training 4/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/40:   1%|          | 1/143 [00:09<21:47,  9.21s/it]
Training 4/40:   8%|▊         | 11/143 [00:09<01:20,  1.63it/s]
Training 4/40:  15%|█▌        | 22/143 [00:09<00:30,  3.95it/s]
Training 4/40:  22%|██▏       | 32/143 [00:09<00:16,  6.78it/s]
Training 4/40: 100%|██████████| 143/143 [00:10<00:00, 14.22it/s]


Epoch 3: train_loss=0.1617, val_loss=0.1591



Training 5/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/40:   1%|          | 1/143 [00:09<23:17,  9.84s/it]
Training 5/40:   6%|▋         | 9/143 [00:09<01:47,  1.24it/s]
Training 5/40:  12%|█▏        | 17/143 [00:10<00:44,  2.82it/s]
Training 5/40: 100%|██████████| 143/143 [00:10<00:00, 13.46it/s]


Epoch 4: train_loss=0.1616, val_loss=0.1598



Training 6/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/40:   1%|          | 1/143 [00:09<21:57,  9.28s/it]
Training 6/40:   7%|▋         | 10/143 [00:09<01:30,  1.47it/s]
Training 6/40:  14%|█▍        | 20/143 [00:09<00:34,  3.55it/s]
Training 6/40:  22%|██▏       | 31/143 [00:09<00:16,  6.66it/s]
Training 6/40:  31%|███▏      | 45/143 [00:09<00:08, 12.00it/s]
Training 6/40:  47%|████▋     | 67/143 [00:09<00:03, 23.20it/s]
Training 6/40:  70%|██████▉   | 100/143 [00:09<00:00, 45.15it/s]
Training 6/40: 100%|██████████| 143/143 [00:10<00:00, 13.68it/s]


Epoch 5: train_loss=0.1606, val_loss=0.1582



Training 7/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/40:   1%|          | 1/143 [00:09<21:42,  9.17s/it]
Training 7/40:   7%|▋         | 10/143 [00:09<01:29,  1.48it/s]
Training 7/40:  13%|█▎        | 18/143 [00:09<00:39,  3.16it/s]
Training 7/40:  22%|██▏       | 31/143 [00:09<00:16,  6.92it/s]
Training 7/40: 100%|██████████| 143/143 [00:10<00:00, 14.20it/s]


Epoch 6: train_loss=0.1605, val_loss=0.1584



Training 8/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/40:   1%|          | 1/143 [00:09<22:34,  9.54s/it]
Training 8/40:   7%|▋         | 10/143 [00:09<01:33,  1.43it/s]
Training 8/40:  14%|█▍        | 20/143 [00:09<00:35,  3.46it/s]
Training 8/40:  24%|██▍       | 34/143 [00:09<00:14,  7.35it/s]
Training 8/40:  39%|███▉      | 56/143 [00:09<00:05, 15.64it/s]
Training 8/40:  54%|█████▍    | 77/143 [00:10<00:02, 25.88it/s]
Training 8/40:  68%|██████▊   | 97/143 [00:10<00:01, 38.08it/s]
Training 8/40: 100%|██████████| 143/143 [00:10<00:00, 13.34it/s]


Epoch 7: train_loss=0.1587, val_loss=0.1574



Training 9/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/40:   1%|          | 1/143 [00:09<21:55,  9.26s/it]
Training 9/40:   7%|▋         | 10/143 [00:09<01:30,  1.47it/s]
Training 9/40:  15%|█▍        | 21/143 [00:09<00:32,  3.76it/s]
Training 9/40:  25%|██▌       | 36/143 [00:09<00:13,  8.04it/s]
Training 9/40: 100%|██████████| 143/143 [00:10<00:00, 14.11it/s]


Epoch 8: train_loss=0.1580, val_loss=0.1557



Training 10/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/40:   1%|          | 1/143 [00:09<21:49,  9.22s/it]
Training 10/40:   6%|▋         | 9/143 [00:09<01:40,  1.33it/s]
Training 10/40:  14%|█▍        | 20/143 [00:09<00:33,  3.64it/s]
Training 10/40:  22%|██▏       | 32/143 [00:09<00:15,  7.07it/s]
Training 10/40:  32%|███▏      | 46/143 [00:09<00:07, 12.43it/s]
Training 10/40:  47%|████▋     | 67/143 [00:09<00:03, 23.17it/s]
Training 10/40: 100%|██████████| 143/143 [00:10<00:00, 13.92it/s]


Epoch 9: train_loss=0.1576, val_loss=0.1548



Training 11/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/40:   1%|          | 1/143 [00:09<21:21,  9.02s/it]
Training 11/40:   6%|▌         | 8/143 [00:09<01:52,  1.20it/s]
Training 11/40:  11%|█         | 16/143 [00:09<00:43,  2.91it/s]
Training 11/40: 100%|██████████| 143/143 [00:09<00:00, 14.49it/s]


Epoch 10: train_loss=0.1591, val_loss=0.1549



Training 12/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/40:   1%|          | 1/143 [00:09<21:51,  9.24s/it]
Training 12/40:   6%|▌         | 8/143 [00:09<01:55,  1.17it/s]
Training 12/40:  13%|█▎        | 18/143 [00:09<00:38,  3.27it/s]
Training 12/40: 100%|██████████| 143/143 [00:10<00:00, 14.26it/s]


Epoch 11: train_loss=0.1582, val_loss=0.1559



Training 13/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/40:   1%|          | 1/143 [00:09<21:18,  9.01s/it]
Training 13/40:   6%|▋         | 9/143 [00:09<01:38,  1.36it/s]
Training 13/40:  13%|█▎        | 19/143 [00:09<00:35,  3.50it/s]
Training 13/40:  38%|███▊      | 54/143 [00:09<00:06, 13.95it/s]
Training 13/40: 100%|██████████| 143/143 [00:09<00:00, 14.56it/s]


Epoch 12: train_loss=0.1568, val_loss=0.1557



Training 14/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/40:   1%|          | 1/143 [00:09<22:13,  9.39s/it]
Training 14/40:   8%|▊         | 11/143 [00:09<01:22,  1.60it/s]
Training 14/40:  15%|█▍        | 21/143 [00:09<00:33,  3.66it/s]
Training 14/40:  24%|██▍       | 34/143 [00:09<00:14,  7.31it/s]
Training 14/40:  35%|███▍      | 50/143 [00:09<00:06, 13.36it/s]
Training 14/40:  50%|█████     | 72/143 [00:09<00:02, 24.43it/s]
Training 14/40:  68%|██████▊   | 97/143 [00:10<00:01, 40.40it/s]
Training 14/40: 100%|██████████| 143/143 [00:10<00:00, 13.56it/s]


Epoch 13: train_loss=0.1585, val_loss=0.1564



Training 15/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/40:   1%|          | 1/143 [00:09<21:35,  9.12s/it]
Training 15/40:   6%|▌         | 8/143 [00:09<01:53,  1.19it/s]
Training 15/40:  14%|█▍        | 20/143 [00:09<00:32,  3.74it/s]
Training 15/40:  21%|██        | 30/143 [00:09<00:17,  6.60it/s]
Training 15/40:  34%|███▍      | 49/143 [00:09<00:06, 14.05it/s]
Training 15/40:  48%|████▊     | 69/143 [00:09<00:03, 24.26it/s]
Training 15/40:  64%|██████▍   | 92/143 [00:09<00:01, 39.21it/s]
Training 15/40: 100%|██████████| 143/143 [00:10<00:00, 13.90it/s]


Epoch 14: train_loss=0.1575, val_loss=0.1621



Training 16/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/40:   1%|          | 1/143 [00:09<22:01,  9.31s/it]
Training 16/40:   6%|▌         | 8/143 [00:09<01:56,  1.16it/s]
Training 16/40:  13%|█▎        | 19/143 [00:09<00:35,  3.45it/s]
Training 16/40:  23%|██▎       | 33/143 [00:09<00:14,  7.42it/s]
Training 16/40: 100%|██████████| 143/143 [00:10<00:00, 14.04it/s]


Epoch 15: train_loss=0.1551, val_loss=0.1508



Training 17/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/40:   1%|          | 1/143 [00:09<22:21,  9.45s/it]
Training 17/40:   7%|▋         | 10/143 [00:09<01:32,  1.44it/s]
Training 17/40:  14%|█▍        | 20/143 [00:09<00:35,  3.49it/s]
Training 17/40:  29%|██▊       | 41/143 [00:09<00:10,  9.45it/s]
Training 17/40: 100%|██████████| 143/143 [00:10<00:00, 13.90it/s]


Epoch 16: train_loss=0.1553, val_loss=0.1585



Training 18/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/40:   1%|          | 1/143 [00:09<21:29,  9.08s/it]
Training 18/40:   6%|▋         | 9/143 [00:09<01:39,  1.35it/s]
Training 18/40:  14%|█▍        | 20/143 [00:09<00:33,  3.69it/s]
Training 18/40:  29%|██▊       | 41/143 [00:09<00:10,  9.87it/s]
Training 18/40: 100%|██████████| 143/143 [00:09<00:00, 14.42it/s]


Epoch 17: train_loss=0.1558, val_loss=0.1521



Training 19/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/40:   1%|          | 1/143 [00:09<22:16,  9.41s/it]
Training 19/40:   6%|▌         | 8/143 [00:09<01:57,  1.15it/s]
Training 19/40:  12%|█▏        | 17/143 [00:09<00:42,  3.00it/s]
Training 19/40: 100%|██████████| 143/143 [00:10<00:00, 14.00it/s]


Epoch 18: train_loss=0.1542, val_loss=0.1522



Training 20/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/40:   1%|          | 1/143 [00:09<21:38,  9.15s/it]
Training 20/40:   6%|▌         | 8/143 [00:09<01:54,  1.18it/s]
Training 20/40:  10%|█         | 15/143 [00:09<00:48,  2.66it/s]
Training 20/40:  49%|████▉     | 70/143 [00:09<00:03, 18.90it/s]
Training 20/40: 100%|██████████| 143/143 [00:09<00:00, 14.35it/s]


Epoch 19: train_loss=0.1537, val_loss=0.1498



Training 21/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/40:   1%|          | 1/143 [00:09<21:38,  9.15s/it]
Training 21/40:   6%|▋         | 9/143 [00:09<01:40,  1.34it/s]
Training 21/40:  12%|█▏        | 17/143 [00:09<00:41,  3.03it/s]
Training 21/40:  34%|███▎      | 48/143 [00:09<00:07, 12.16it/s]
Training 21/40: 100%|██████████| 143/143 [00:09<00:00, 14.36it/s]


Epoch 20: train_loss=0.1520, val_loss=0.1471



Training 22/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/40:   1%|          | 1/143 [00:09<21:39,  9.15s/it]
Training 22/40:   6%|▋         | 9/143 [00:09<01:40,  1.33it/s]
Training 22/40:  14%|█▍        | 20/143 [00:09<00:33,  3.66it/s]
Training 22/40:  21%|██        | 30/143 [00:09<00:17,  6.52it/s]
Training 22/40: 100%|██████████| 143/143 [00:09<00:00, 14.32it/s]


Epoch 21: train_loss=0.1512, val_loss=0.1471



Training 23/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/40:   1%|          | 1/143 [00:09<22:05,  9.33s/it]
Training 23/40:   7%|▋         | 10/143 [00:09<01:31,  1.46it/s]
Training 23/40:  15%|█▍        | 21/143 [00:09<00:32,  3.73it/s]
Training 23/40:  22%|██▏       | 31/143 [00:09<00:17,  6.52it/s]
Training 23/40:  36%|███▌      | 51/143 [00:09<00:06, 14.19it/s]
Training 23/40:  50%|████▉     | 71/143 [00:09<00:02, 24.14it/s]
Training 23/40:  65%|██████▌   | 93/143 [00:09<00:01, 37.99it/s]
Training 23/40: 100%|██████████| 143/143 [00:10<00:00, 13.58it/s]


Epoch 22: train_loss=0.1508, val_loss=0.1556



Training 24/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/40:   1%|          | 1/143 [00:09<21:33,  9.11s/it]
Training 24/40:   6%|▌         | 8/143 [00:09<01:53,  1.19it/s]
Training 24/40:  13%|█▎        | 18/143 [00:09<00:37,  3.31it/s]
Training 24/40:  36%|███▌      | 51/143 [00:09<00:07, 13.06it/s]
Training 24/40: 100%|██████████| 143/143 [00:09<00:00, 14.39it/s]


Epoch 23: train_loss=0.1507, val_loss=0.1459



Training 25/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/40:   1%|          | 1/143 [00:09<21:25,  9.05s/it]
Training 25/40:   6%|▌         | 8/143 [00:09<01:53,  1.19it/s]
Training 25/40:  11%|█         | 16/143 [00:09<00:43,  2.90it/s]
Training 25/40:  39%|███▉      | 56/143 [00:09<00:05, 14.81it/s]
Training 25/40: 100%|██████████| 143/143 [00:09<00:00, 14.43it/s]


Epoch 24: train_loss=0.1495, val_loss=0.1515



Training 26/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/40:   1%|          | 1/143 [00:09<21:20,  9.02s/it]
Training 26/40:   6%|▋         | 9/143 [00:09<01:38,  1.36it/s]
Training 26/40:  15%|█▌        | 22/143 [00:09<00:29,  4.16it/s]
Training 26/40: 100%|██████████| 143/143 [00:09<00:00, 14.65it/s]


Epoch 25: train_loss=0.1486, val_loss=0.1434



Training 27/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/40:   1%|          | 1/143 [00:09<21:21,  9.02s/it]
Training 27/40:   6%|▋         | 9/143 [00:09<01:38,  1.35it/s]
Training 27/40:  13%|█▎        | 18/143 [00:09<00:38,  3.28it/s]
Training 27/40:  28%|██▊       | 40/143 [00:09<00:10,  9.80it/s]
Training 27/40: 100%|██████████| 143/143 [00:09<00:00, 14.48it/s]


Epoch 26: train_loss=0.1477, val_loss=0.1457



Training 28/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/40:   1%|          | 1/143 [00:09<21:22,  9.03s/it]
Training 28/40:   6%|▋         | 9/143 [00:09<01:39,  1.35it/s]
Training 28/40:  13%|█▎        | 19/143 [00:09<00:35,  3.49it/s]
Training 28/40:  31%|███       | 44/143 [00:09<00:09, 10.90it/s]
Training 28/40: 100%|██████████| 143/143 [00:09<00:00, 14.50it/s]


Epoch 27: train_loss=0.1469, val_loss=0.1412



Training 29/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/40:   1%|          | 1/143 [00:09<21:19,  9.01s/it]
Training 29/40:   8%|▊         | 11/143 [00:09<01:19,  1.67it/s]
Training 29/40:  17%|█▋        | 25/143 [00:09<00:25,  4.68it/s]
Training 29/40: 100%|██████████| 143/143 [00:09<00:00, 14.58it/s]


Epoch 28: train_loss=0.1492, val_loss=0.1478



Training 30/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/40:   1%|          | 1/143 [00:08<21:17,  9.00s/it]
Training 30/40:   6%|▋         | 9/143 [00:09<01:38,  1.36it/s]
Training 30/40:  13%|█▎        | 19/143 [00:09<00:35,  3.50it/s]
Training 30/40:  22%|██▏       | 32/143 [00:09<00:15,  7.30it/s]
Training 30/40: 100%|██████████| 143/143 [00:09<00:00, 14.49it/s]


Epoch 29: train_loss=0.1528, val_loss=0.1615



Training 31/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 31/40:   1%|          | 1/143 [00:08<21:09,  8.94s/it]
Training 31/40:   7%|▋         | 10/143 [00:09<01:27,  1.53it/s]
Training 31/40:  15%|█▍        | 21/143 [00:09<00:31,  3.91it/s]
Training 31/40:  24%|██▍       | 34/143 [00:09<00:14,  7.74it/s]
Training 31/40: 100%|██████████| 143/143 [00:09<00:00, 14.64it/s]


Epoch 30: train_loss=0.1503, val_loss=0.1476



Training 32/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 32/40:   1%|          | 1/143 [00:09<21:25,  9.05s/it]
Training 32/40:   6%|▋         | 9/143 [00:09<01:39,  1.35it/s]
Training 32/40:  14%|█▍        | 20/143 [00:09<00:33,  3.70it/s]
Training 32/40:  24%|██▍       | 35/143 [00:09<00:13,  8.09it/s]
Training 32/40: 100%|██████████| 143/143 [00:09<00:00, 14.42it/s]


Epoch 31: train_loss=0.1552, val_loss=0.1476



Training 33/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 33/40:   1%|          | 1/143 [00:09<21:31,  9.10s/it]
Training 33/40:   7%|▋         | 10/143 [00:09<01:28,  1.50it/s]
Training 33/40:  14%|█▍        | 20/143 [00:09<00:33,  3.62it/s]
Training 33/40: 100%|██████████| 143/143 [00:09<00:00, 14.50it/s]


Epoch 32: train_loss=0.1463, val_loss=0.1422



Training 34/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 34/40:   1%|          | 1/143 [00:08<21:17,  9.00s/it]
Training 34/40:   6%|▋         | 9/143 [00:09<01:38,  1.36it/s]
Training 34/40:  11%|█         | 16/143 [00:09<00:44,  2.86it/s]
Training 34/40: 100%|██████████| 143/143 [00:09<00:00, 14.61it/s]


Epoch 33: train_loss=0.1475, val_loss=0.1552



Training 35/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 35/40:   1%|          | 1/143 [00:09<21:20,  9.01s/it]
Training 35/40:   8%|▊         | 12/143 [00:09<01:11,  1.82it/s]
Training 35/40:  15%|█▍        | 21/143 [00:09<00:32,  3.74it/s]
Training 35/40:  42%|████▏     | 60/143 [00:09<00:05, 15.37it/s]
Training 35/40: 100%|██████████| 143/143 [00:09<00:00, 14.52it/s]


Epoch 34: train_loss=0.1442, val_loss=0.1541



Training 36/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 36/40:   1%|          | 1/143 [00:09<22:02,  9.31s/it]
Training 36/40:   6%|▋         | 9/143 [00:09<01:41,  1.31it/s]
Training 36/40:  13%|█▎        | 19/143 [00:09<00:36,  3.39it/s]
Training 36/40: 100%|██████████| 143/143 [00:10<00:00, 14.17it/s]


Epoch 35: train_loss=0.1443, val_loss=0.1388



Training 37/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 37/40:   1%|          | 1/143 [00:09<21:19,  9.01s/it]
Training 37/40:   6%|▌         | 8/143 [00:09<01:52,  1.20it/s]
Training 37/40:  11%|█         | 16/143 [00:09<00:43,  2.92it/s]
Training 37/40:  31%|███       | 44/143 [00:09<00:08, 11.28it/s]
Training 37/40: 100%|██████████| 143/143 [00:09<00:00, 14.49it/s]


Epoch 36: train_loss=0.1449, val_loss=0.1414



Training 38/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 38/40:   1%|          | 1/143 [00:09<21:31,  9.10s/it]
Training 38/40:   6%|▌         | 8/143 [00:09<01:53,  1.19it/s]
Training 38/40:  13%|█▎        | 18/143 [00:09<00:37,  3.31it/s]
Training 38/40:  33%|███▎      | 47/143 [00:09<00:08, 11.87it/s]
Training 38/40: 100%|██████████| 143/143 [00:09<00:00, 14.42it/s]


Epoch 37: train_loss=0.1411, val_loss=0.1581



Training 39/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 39/40:   1%|          | 1/143 [00:08<21:17,  9.00s/it]
Training 39/40:   6%|▋         | 9/143 [00:09<01:38,  1.36it/s]
Training 39/40:  13%|█▎        | 19/143 [00:09<00:35,  3.51it/s]
Training 39/40: 100%|██████████| 143/143 [00:09<00:00, 14.62it/s]


Epoch 38: train_loss=0.1441, val_loss=0.1391



Training 40/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 40/40:   1%|          | 1/143 [00:09<21:22,  9.03s/it]
Training 40/40:   7%|▋         | 10/143 [00:09<01:28,  1.51it/s]
Training 40/40:  13%|█▎        | 19/143 [00:09<00:36,  3.43it/s]
Training 40/40:  20%|█▉        | 28/143 [00:09<00:19,  6.04it/s]
Training 40/40:  34%|███▍      | 49/143 [00:09<00:06, 14.43it/s]
Training 40/40:  48%|████▊     | 68/143 [00:09<00:03, 24.19it/s]
Training 40/40:  60%|██████    | 86/143 [00:09<00:01, 35.60it/s]
Training 40/40: 100%|██████████| 143/143 [00:10<00:00, 14.05it/s]


Epoch 39: train_loss=0.1413, val_loss=0.1424


2025-06-02 08:30:43,859 - __main__ - INFO - Saved mlp probe for layer 4 to cache\probes\phase1_ijepa_viewpoint_probing\mlp_layer_4_probe.pth
 33%|███▎      | 2/6 [1:30:17<3:00:26, 2706.66s/it]2025-06-02 08:31:03,455 - __main__ - INFO - Processing layer 6...
2025-06-02 08:31:03,455 - src.probing.data_preprocessing - INFO - Extracting features from 1149 batches...


Extracting features:   0%|          | 1/1149 [00:07<2:26:36,  7.66s/it]
Extracting features:   0%|          | 2/1149 [00:08<1:08:51,  3.60s/it]
Extracting features:   0%|          | 3/1149 [00:09<43:53,  2.30s/it]  
Extracting features:   0%|          | 4/1149 [00:09<32:09,  1.68s/it]
Extracting features:   0%|          | 5/1149 [00:10<25:39,  1.35s/it]
Extracting features:   1%|          | 6/1149 [00:11<21:45,  1.14s/it]
Extracting features:   1%|          | 7/1149 [00:12<19:16,  1.01s/it]
Extracting features:   1%|          | 8/1149 [00:12<17:39,  1.08it/s]
Extracting features:   1%|          | 9/1149 [00:13<16:33,  1.15it/

Epoch 0: train_loss=0.1919, val_loss=0.1643



Training 2/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/30:   1%|          | 1/143 [00:09<23:08,  9.78s/it]
Training 2/30:  12%|█▏        | 17/143 [00:09<00:52,  2.40it/s]
Training 2/30:  25%|██▌       | 36/143 [00:09<00:17,  6.18it/s]
Training 2/30:  41%|████      | 58/143 [00:10<00:06, 12.15it/s]
Training 2/30:  65%|██████▌   | 93/143 [00:10<00:01, 25.11it/s]
Training 2/30: 100%|██████████| 143/143 [00:10<00:00, 13.31it/s]


Epoch 1: train_loss=0.1640, val_loss=0.1575



Training 3/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/30:   1%|          | 1/143 [00:09<22:10,  9.37s/it]
Training 3/30:   9%|▉         | 13/143 [00:09<01:08,  1.91it/s]
Training 3/30:  22%|██▏       | 31/143 [00:09<00:19,  5.64it/s]
Training 3/30:  35%|███▍      | 50/143 [00:09<00:08, 11.00it/s]
Training 3/30:  52%|█████▏    | 74/143 [00:09<00:03, 20.13it/s]
Training 3/30:  74%|███████▍  | 106/143 [00:09<00:01, 36.17it/s]
Training 3/30: 100%|██████████| 143/143 [00:10<00:00, 13.77it/s]


Epoch 2: train_loss=0.1606, val_loss=0.1553



Training 4/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/30:   1%|          | 1/143 [00:09<21:45,  9.20s/it]
Training 4/30:  12%|█▏        | 17/143 [00:09<00:49,  2.55it/s]
Training 4/30:  26%|██▌       | 37/143 [00:09<00:15,  6.78it/s]
Training 4/30:  41%|████▏     | 59/143 [00:09<00:06, 13.09it/s]
Training 4/30:  61%|██████    | 87/143 [00:09<00:02, 23.87it/s]
Training 4/30: 100%|██████████| 143/143 [00:10<00:00, 14.04it/s]


Epoch 3: train_loss=0.1602, val_loss=0.1805



Training 5/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/30:   1%|          | 1/143 [00:09<22:03,  9.32s/it]
Training 5/30:  10%|█         | 15/143 [00:09<00:57,  2.22it/s]
Training 5/30:  25%|██▌       | 36/143 [00:09<00:16,  6.59it/s]
Training 5/30:  39%|███▉      | 56/143 [00:09<00:07, 12.23it/s]
Training 5/30:  58%|█████▊    | 83/143 [00:09<00:02, 22.54it/s]
Training 5/30: 100%|██████████| 143/143 [00:10<00:00, 13.90it/s]


Epoch 4: train_loss=0.1609, val_loss=0.1543



Training 6/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/30:   1%|          | 1/143 [00:09<21:38,  9.14s/it]
Training 6/30:  11%|█         | 16/143 [00:09<00:52,  2.41it/s]
Training 6/30:  27%|██▋       | 38/143 [00:09<00:14,  7.08it/s]
Training 6/30:  43%|████▎     | 62/143 [00:09<00:05, 14.00it/s]
Training 6/30:  66%|██████▌   | 94/143 [00:09<00:01, 26.45it/s]
Training 6/30: 100%|██████████| 143/143 [00:10<00:00, 14.17it/s]


Epoch 5: train_loss=0.1585, val_loss=0.1506



Training 7/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/30:   1%|          | 1/143 [00:08<21:17,  9.00s/it]
Training 7/30:  11%|█         | 16/143 [00:09<00:51,  2.45it/s]
Training 7/30:  22%|██▏       | 31/143 [00:09<00:19,  5.66it/s]
Training 7/30:  43%|████▎     | 61/143 [00:09<00:05, 14.54it/s]
Training 7/30:  62%|██████▏   | 89/143 [00:09<00:02, 25.48it/s]
Training 7/30: 100%|██████████| 143/143 [00:09<00:00, 14.38it/s]


Epoch 6: train_loss=0.1543, val_loss=0.1504



Training 8/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/30:   1%|          | 1/143 [00:09<21:38,  9.15s/it]
Training 8/30:  10%|▉         | 14/143 [00:09<01:01,  2.11it/s]
Training 8/30:  22%|██▏       | 31/143 [00:09<00:19,  5.71it/s]
Training 8/30:  37%|███▋      | 53/143 [00:09<00:07, 12.08it/s]
Training 8/30:  56%|█████▌    | 80/143 [00:09<00:02, 22.58it/s]
Training 8/30: 100%|██████████| 143/143 [00:10<00:00, 14.12it/s]


Epoch 7: train_loss=0.1544, val_loss=0.1487



Training 9/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/30:   1%|          | 1/143 [00:09<23:21,  9.87s/it]
Training 9/30:  10%|▉         | 14/143 [00:09<01:06,  1.95it/s]
Training 9/30:  22%|██▏       | 31/143 [00:10<00:21,  5.31it/s]
Training 9/30:  38%|███▊      | 54/143 [00:10<00:07, 11.53it/s]
Training 9/30:  54%|█████▍    | 77/143 [00:10<00:03, 19.81it/s]
Training 9/30:  76%|███████▌  | 108/143 [00:10<00:01, 34.75it/s]
Training 9/30: 100%|██████████| 143/143 [00:10<00:00, 13.12it/s]


Epoch 8: train_loss=0.1515, val_loss=0.1511



Training 10/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/30:   1%|          | 1/143 [00:09<22:05,  9.33s/it]
Training 10/30:  12%|█▏        | 17/143 [00:09<00:50,  2.52it/s]
Training 10/30:  27%|██▋       | 39/143 [00:09<00:14,  7.10it/s]
Training 10/30:  44%|████▍     | 63/143 [00:09<00:05, 13.88it/s]
Training 10/30:  69%|██████▉   | 99/143 [00:09<00:01, 27.72it/s]
Training 10/30: 100%|██████████| 143/143 [00:10<00:00, 13.93it/s]


Epoch 9: train_loss=0.1511, val_loss=0.1465



Training 11/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/30:   1%|          | 1/143 [00:09<21:54,  9.26s/it]
Training 11/30:  12%|█▏        | 17/143 [00:09<00:49,  2.54it/s]
Training 11/30:  27%|██▋       | 39/143 [00:09<00:14,  7.15it/s]
Training 11/30:  44%|████▍     | 63/143 [00:09<00:05, 13.97it/s]
Training 11/30:  65%|██████▌   | 93/143 [00:09<00:01, 25.47it/s]
Training 11/30: 100%|██████████| 143/143 [00:10<00:00, 13.98it/s]


Epoch 10: train_loss=0.1492, val_loss=0.1543



Training 12/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/30:   1%|          | 1/143 [00:09<22:03,  9.32s/it]
Training 12/30:  10%|▉         | 14/143 [00:09<01:02,  2.07it/s]
Training 12/30:  24%|██▍       | 35/143 [00:09<00:16,  6.45it/s]
Training 12/30:  41%|████      | 58/143 [00:09<00:06, 12.98it/s]
Training 12/30:  60%|██████    | 86/143 [00:09<00:02, 23.67it/s]
Training 12/30: 100%|██████████| 143/143 [00:10<00:00, 13.90it/s]


Epoch 11: train_loss=0.1508, val_loss=0.1469



Training 13/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/30:   1%|          | 1/143 [00:09<21:57,  9.28s/it]
Training 13/30:  10%|█         | 15/143 [00:09<00:57,  2.23it/s]
Training 13/30:  24%|██▍       | 35/143 [00:09<00:16,  6.42it/s]
Training 13/30:  40%|███▉      | 57/143 [00:09<00:06, 12.69it/s]
Training 13/30:  61%|██████    | 87/143 [00:09<00:02, 24.25it/s]
Training 13/30: 100%|██████████| 143/143 [00:10<00:00, 13.97it/s]


Epoch 12: train_loss=0.1499, val_loss=0.1460



Training 14/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/30:   1%|          | 1/143 [00:09<21:57,  9.28s/it]
Training 14/30:  13%|█▎        | 19/143 [00:09<00:43,  2.83it/s]
Training 14/30:  24%|██▍       | 35/143 [00:09<00:17,  6.15it/s]
Training 14/30:  42%|████▏     | 60/143 [00:09<00:06, 13.31it/s]
Training 14/30:  64%|██████▎   | 91/143 [00:09<00:02, 25.23it/s]
Training 14/30: 100%|██████████| 143/143 [00:10<00:00, 13.99it/s]


Epoch 13: train_loss=0.1478, val_loss=0.1440



Training 15/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/30:   1%|          | 1/143 [00:09<21:30,  9.09s/it]
Training 15/30:  10%|▉         | 14/143 [00:09<01:00,  2.12it/s]
Training 15/30:  22%|██▏       | 32/143 [00:09<00:18,  5.95it/s]
Training 15/30:  37%|███▋      | 53/143 [00:09<00:07, 12.05it/s]
Training 15/30:  55%|█████▌    | 79/143 [00:09<00:02, 22.20it/s]
Training 15/30: 100%|██████████| 143/143 [00:10<00:00, 14.11it/s]


Epoch 14: train_loss=0.1478, val_loss=0.1419



Training 16/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/30:   1%|          | 1/143 [00:09<21:34,  9.11s/it]
Training 16/30:   8%|▊         | 12/143 [00:09<01:12,  1.80it/s]
Training 16/30:  21%|██        | 30/143 [00:09<00:20,  5.64it/s]
Training 16/30:  38%|███▊      | 54/143 [00:09<00:07, 12.62it/s]
Training 16/30:  55%|█████▌    | 79/143 [00:09<00:02, 22.30it/s]
Training 16/30:  75%|███████▍  | 107/143 [00:09<00:00, 36.46it/s]
Training 16/30: 100%|██████████| 143/143 [00:10<00:00, 14.09it/s]


Epoch 15: train_loss=0.1488, val_loss=0.1420



Training 17/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/30:   1%|          | 1/143 [00:09<21:44,  9.18s/it]
Training 17/30:  13%|█▎        | 19/143 [00:09<00:43,  2.86it/s]
Training 17/30:  29%|██▊       | 41/143 [00:09<00:13,  7.50it/s]
Training 17/30:  47%|████▋     | 67/143 [00:09<00:05, 14.98it/s]
Training 17/30:  66%|██████▋   | 95/143 [00:09<00:01, 25.71it/s]
Training 17/30: 100%|██████████| 143/143 [00:10<00:00, 14.05it/s]


Epoch 16: train_loss=0.1445, val_loss=0.1409



Training 18/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/30:   1%|          | 1/143 [00:09<22:04,  9.33s/it]
Training 18/30:  11%|█         | 16/143 [00:09<00:53,  2.37it/s]
Training 18/30:  24%|██▍       | 35/143 [00:09<00:17,  6.32it/s]
Training 18/30:  38%|███▊      | 55/143 [00:09<00:07, 11.96it/s]
Training 18/30:  57%|█████▋    | 82/143 [00:09<00:02, 22.27it/s]
Training 18/30: 100%|██████████| 143/143 [00:10<00:00, 13.86it/s]


Epoch 17: train_loss=0.1475, val_loss=0.1402



Training 19/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/30:   1%|          | 1/143 [00:09<21:57,  9.28s/it]
Training 19/30:  13%|█▎        | 18/143 [00:09<00:46,  2.68it/s]
Training 19/30:  24%|██▍       | 35/143 [00:09<00:17,  6.23it/s]
Training 19/30:  39%|███▉      | 56/143 [00:09<00:07, 12.21it/s]
Training 19/30:  59%|█████▉    | 85/143 [00:09<00:02, 23.37it/s]
Training 19/30: 100%|██████████| 143/143 [00:10<00:00, 13.93it/s]


Epoch 18: train_loss=0.1449, val_loss=0.1415



Training 20/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/30:   1%|          | 1/143 [00:09<21:34,  9.11s/it]
Training 20/30:  13%|█▎        | 18/143 [00:09<00:45,  2.73it/s]
Training 20/30:  25%|██▌       | 36/143 [00:09<00:16,  6.55it/s]
Training 20/30:  42%|████▏     | 60/143 [00:09<00:06, 13.50it/s]
Training 20/30:  58%|█████▊    | 83/143 [00:09<00:02, 22.34it/s]
Training 20/30: 100%|██████████| 143/143 [00:10<00:00, 14.16it/s]


Epoch 19: train_loss=0.1447, val_loss=0.1392



Training 21/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/30:   1%|          | 1/143 [00:09<21:39,  9.15s/it]
Training 21/30:  13%|█▎        | 18/143 [00:09<00:45,  2.72it/s]
Training 21/30:  24%|██▍       | 34/143 [00:09<00:17,  6.09it/s]
Training 21/30:  38%|███▊      | 55/143 [00:09<00:07, 12.14it/s]
Training 21/30:  56%|█████▌    | 80/143 [00:09<00:02, 21.82it/s]
Training 21/30:  73%|███████▎  | 105/143 [00:09<00:01, 34.32it/s]
Training 21/30: 100%|██████████| 143/143 [00:10<00:00, 14.00it/s]


Epoch 20: train_loss=0.1422, val_loss=0.1402



Training 22/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/30:   1%|          | 1/143 [00:09<21:28,  9.08s/it]
Training 22/30:  12%|█▏        | 17/143 [00:09<00:48,  2.59it/s]
Training 22/30:  24%|██▍       | 35/143 [00:09<00:16,  6.42it/s]
Training 22/30:  39%|███▉      | 56/143 [00:09<00:06, 12.52it/s]
Training 22/30:  63%|██████▎   | 90/143 [00:09<00:02, 25.95it/s]
Training 22/30: 100%|██████████| 143/143 [00:10<00:00, 14.25it/s]


Epoch 21: train_loss=0.1426, val_loss=0.1393



Training 23/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/30:   1%|          | 1/143 [00:09<21:47,  9.21s/it]
Training 23/30:  13%|█▎        | 18/143 [00:09<00:46,  2.70it/s]
Training 23/30:  24%|██▍       | 34/143 [00:09<00:17,  6.06it/s]
Training 23/30:  40%|███▉      | 57/143 [00:09<00:06, 12.67it/s]
Training 23/30:  57%|█████▋    | 82/143 [00:09<00:02, 22.27it/s]
Training 23/30: 100%|██████████| 143/143 [00:10<00:00, 14.07it/s]


Epoch 22: train_loss=0.1435, val_loss=0.1648



Training 24/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/30:   1%|          | 1/143 [00:09<21:46,  9.20s/it]
Training 24/30:  10%|▉         | 14/143 [00:09<01:01,  2.09it/s]
Training 24/30:  22%|██▏       | 32/143 [00:09<00:18,  5.89it/s]
Training 24/30:  41%|████      | 58/143 [00:09<00:06, 13.42it/s]
Training 24/30:  59%|█████▉    | 85/143 [00:09<00:02, 23.79it/s]
Training 24/30: 100%|██████████| 143/143 [00:10<00:00, 14.04it/s]


Epoch 23: train_loss=0.1464, val_loss=0.1375



Training 25/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/30:   1%|          | 1/143 [00:09<21:36,  9.13s/it]
Training 25/30:  10%|█         | 15/143 [00:09<00:56,  2.26it/s]
Training 25/30:  24%|██▍       | 35/143 [00:09<00:16,  6.51it/s]
Training 25/30:  42%|████▏     | 60/143 [00:09<00:06, 13.76it/s]
Training 25/30:  64%|██████▎   | 91/143 [00:09<00:02, 25.84it/s]
Training 25/30: 100%|██████████| 143/143 [00:10<00:00, 14.21it/s]


Epoch 24: train_loss=0.1423, val_loss=0.1388



Training 26/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/30:   1%|          | 1/143 [00:09<22:27,  9.49s/it]
Training 26/30:  12%|█▏        | 17/143 [00:09<00:50,  2.47it/s]
Training 26/30:  26%|██▌       | 37/143 [00:09<00:16,  6.57it/s]
Training 26/30:  43%|████▎     | 62/143 [00:09<00:05, 13.56it/s]
Training 26/30:  66%|██████▋   | 95/143 [00:09<00:01, 26.03it/s]
Training 26/30: 100%|██████████| 143/143 [00:10<00:00, 13.72it/s]


Epoch 25: train_loss=0.1421, val_loss=0.1375



Training 27/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/30:   1%|          | 1/143 [00:09<21:30,  9.09s/it]
Training 27/30:  12%|█▏        | 17/143 [00:09<00:48,  2.58it/s]
Training 27/30:  24%|██▍       | 34/143 [00:09<00:17,  6.19it/s]
Training 27/30:  39%|███▉      | 56/143 [00:09<00:06, 12.58it/s]
Training 27/30:  59%|█████▉    | 85/143 [00:09<00:02, 23.91it/s]
Training 27/30: 100%|██████████| 143/143 [00:10<00:00, 14.22it/s]


Epoch 26: train_loss=0.1408, val_loss=0.1368



Training 28/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/30:   1%|          | 1/143 [00:09<21:29,  9.08s/it]
Training 28/30:  10%|█         | 15/143 [00:09<00:56,  2.28it/s]
Training 28/30:  22%|██▏       | 31/143 [00:09<00:19,  5.69it/s]
Training 28/30:  38%|███▊      | 55/143 [00:09<00:06, 12.71it/s]
Training 28/30:  57%|█████▋    | 81/143 [00:09<00:02, 22.84it/s]
Training 28/30: 100%|██████████| 143/143 [00:10<00:00, 14.25it/s]


Epoch 27: train_loss=0.1395, val_loss=0.1537



Training 29/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/30:   1%|          | 1/143 [00:09<21:43,  9.18s/it]
Training 29/30:  11%|█         | 16/143 [00:09<00:52,  2.40it/s]
Training 29/30:  24%|██▍       | 34/143 [00:09<00:17,  6.20it/s]
Training 29/30:  42%|████▏     | 60/143 [00:09<00:06, 13.71it/s]
Training 29/30:  62%|██████▏   | 88/143 [00:09<00:02, 24.48it/s]
Training 29/30: 100%|██████████| 143/143 [00:10<00:00, 14.11it/s]


Epoch 28: train_loss=0.1411, val_loss=0.1428



Training 30/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/30:   1%|          | 1/143 [00:09<21:44,  9.19s/it]
Training 30/30:  10%|█         | 15/143 [00:09<00:56,  2.25it/s]
Training 30/30:  19%|█▉        | 27/143 [00:09<00:24,  4.77it/s]
Training 30/30:  37%|███▋      | 53/143 [00:09<00:07, 12.33it/s]
Training 30/30:  55%|█████▌    | 79/143 [00:09<00:02, 22.37it/s]
Training 30/30: 100%|██████████| 143/143 [00:10<00:00, 14.09it/s]


Epoch 29: train_loss=0.1401, val_loss=0.1361


2025-06-02 09:02:09,630 - __main__ - INFO - Saved linear probe for layer 6 to cache\probes\phase1_ijepa_viewpoint_probing\linear_layer_6_probe.pth
2025-06-02 09:02:28,882 - __main__ - INFO - Running mlp probe on layer 6...
2025-06-02 09:02:28,882 - __main__ - INFO - Running mlp probe on layer 6 (feature_dim: 1280)

Training 1/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 1/40:   1%|          | 1/143 [00:09<21:41,  9.16s/it]
Training 1/40:   7%|▋         | 10/143 [00:09<01:29,  1.49it/s]
Training 1/40:  13%|█▎        | 18/143 [00:09<00:39,  3.17it/s]
Training 1/40:  43%|████▎     | 62/143 [00:09<00:05, 16.14it/s]
Training 1/40: 100%|██████████| 143/143 [00:09<00:00, 14.34it/s]


Epoch 0: train_loss=0.3873, val_loss=0.1632



Training 2/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/40:   1%|          | 1/143 [00:09<21:18,  9.00s/it]
Training 2/40:   7%|▋         | 10/143 [00:09<01:27,  1.51it/s]
Training 2/40:  12%|█▏        | 17/143 [00:09<00:42,  3.00it/s]
Training 2/40:  28%|██▊       | 40/143 [00:09<00:10,  9.84it/s]
Training 2/40: 100%|██████████| 143/143 [00:09<00:00, 14.54it/s]


Epoch 1: train_loss=0.1630, val_loss=0.1619



Training 3/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/40:   1%|          | 1/143 [00:09<22:30,  9.51s/it]
Training 3/40:   6%|▋         | 9/143 [00:09<01:44,  1.29it/s]
Training 3/40:  12%|█▏        | 17/143 [00:09<00:43,  2.91it/s]
Training 3/40:  21%|██        | 30/143 [00:09<00:17,  6.56it/s]
Training 3/40: 100%|██████████| 143/143 [00:10<00:00, 13.77it/s]


Epoch 2: train_loss=0.1628, val_loss=0.1610



Training 4/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/40:   1%|          | 1/143 [00:09<21:33,  9.11s/it]
Training 4/40:   7%|▋         | 10/143 [00:09<01:28,  1.50it/s]
Training 4/40:  15%|█▍        | 21/143 [00:09<00:31,  3.83it/s]
Training 4/40: 100%|██████████| 143/143 [00:09<00:00, 14.46it/s]


Epoch 3: train_loss=0.1627, val_loss=0.1606



Training 5/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/40:   1%|          | 1/143 [00:09<21:38,  9.14s/it]
Training 5/40:   5%|▍         | 7/143 [00:09<02:11,  1.03it/s]
Training 5/40:  13%|█▎        | 18/143 [00:09<00:37,  3.37it/s]
Training 5/40:  20%|█▉        | 28/143 [00:09<00:18,  6.23it/s]
Training 5/40: 100%|██████████| 143/143 [00:10<00:00, 14.29it/s]


Epoch 4: train_loss=0.1623, val_loss=0.1614



Training 6/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/40:   1%|          | 1/143 [00:09<21:54,  9.26s/it]
Training 6/40:   6%|▌         | 8/143 [00:09<01:55,  1.17it/s]
Training 6/40:  13%|█▎        | 18/143 [00:09<00:38,  3.27it/s]
Training 6/40:  45%|████▌     | 65/143 [00:09<00:04, 17.00it/s]
Training 6/40: 100%|██████████| 143/143 [00:10<00:00, 14.25it/s]


Epoch 5: train_loss=0.1617, val_loss=0.1594



Training 7/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/40:   1%|          | 1/143 [00:09<21:40,  9.16s/it]
Training 7/40:   6%|▋         | 9/143 [00:09<01:40,  1.34it/s]
Training 7/40:  13%|█▎        | 19/143 [00:09<00:35,  3.45it/s]
Training 7/40: 100%|██████████| 143/143 [00:09<00:00, 14.39it/s]


Epoch 6: train_loss=0.1614, val_loss=0.1597



Training 8/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/40:   1%|          | 1/143 [00:09<21:59,  9.29s/it]
Training 8/40:   8%|▊         | 12/143 [00:09<01:14,  1.77it/s]
Training 8/40:  16%|█▌        | 23/143 [00:09<00:29,  4.05it/s]
Training 8/40:  26%|██▌       | 37/143 [00:09<00:13,  8.01it/s]
Training 8/40: 100%|██████████| 143/143 [00:10<00:00, 14.01it/s]


Epoch 7: train_loss=0.1605, val_loss=0.1592



Training 9/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/40:   1%|          | 1/143 [00:09<22:01,  9.30s/it]
Training 9/40:   6%|▋         | 9/143 [00:09<01:41,  1.32it/s]
Training 9/40:  13%|█▎        | 19/143 [00:09<00:36,  3.40it/s]
Training 9/40:  21%|██        | 30/143 [00:09<00:17,  6.52it/s]
Training 9/40:  34%|███▎      | 48/143 [00:09<00:07, 13.44it/s]
Training 9/40:  46%|████▌     | 66/143 [00:09<00:03, 22.43it/s]
Training 9/40:  65%|██████▌   | 93/143 [00:09<00:01, 40.15it/s]
Training 9/40: 100%|██████████| 143/143 [00:10<00:00, 13.61it/s]


Epoch 8: train_loss=0.1602, val_loss=0.1595



Training 10/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/40:   1%|          | 1/143 [00:09<21:31,  9.10s/it]
Training 10/40:   6%|▋         | 9/143 [00:09<01:39,  1.34it/s]
Training 10/40:  13%|█▎        | 19/143 [00:09<00:35,  3.47it/s]
Training 10/40:  21%|██        | 30/143 [00:09<00:16,  6.65it/s]
Training 10/40: 100%|██████████| 143/143 [00:09<00:00, 14.36it/s]


Epoch 9: train_loss=0.1597, val_loss=0.1575



Training 11/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/40:   1%|          | 1/143 [00:09<22:43,  9.61s/it]
Training 11/40:   8%|▊         | 11/143 [00:09<01:24,  1.57it/s]
Training 11/40:  16%|█▌        | 23/143 [00:09<00:30,  3.99it/s]
Training 11/40:  24%|██▍       | 34/143 [00:09<00:15,  6.99it/s]
Training 11/40: 100%|██████████| 143/143 [00:10<00:00, 13.68it/s]


Epoch 10: train_loss=0.1591, val_loss=0.1564



Training 12/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/40:   1%|          | 1/143 [00:09<21:41,  9.17s/it]
Training 12/40:   7%|▋         | 10/143 [00:09<01:29,  1.49it/s]
Training 12/40:  15%|█▍        | 21/143 [00:09<00:32,  3.81it/s]
Training 12/40:  24%|██▍       | 34/143 [00:09<00:14,  7.55it/s]
Training 12/40:  35%|███▍      | 50/143 [00:09<00:06, 13.73it/s]
Training 12/40:  50%|█████     | 72/143 [00:09<00:02, 24.98it/s]
Training 12/40:  66%|██████▋   | 95/143 [00:09<00:01, 39.79it/s]
Training 12/40: 100%|██████████| 143/143 [00:10<00:00, 13.78it/s]


Epoch 11: train_loss=0.1587, val_loss=0.1558



Training 13/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/40:   1%|          | 1/143 [00:09<21:48,  9.22s/it]
Training 13/40:   7%|▋         | 10/143 [00:09<01:30,  1.48it/s]
Training 13/40:  15%|█▍        | 21/143 [00:09<00:32,  3.78it/s]
Training 13/40:  24%|██▍       | 34/143 [00:09<00:14,  7.49it/s]
Training 13/40:  38%|███▊      | 55/143 [00:09<00:05, 15.66it/s]
Training 13/40:  51%|█████     | 73/143 [00:09<00:02, 24.58it/s]
Training 13/40:  68%|██████▊   | 97/143 [00:09<00:01, 40.05it/s]
Training 13/40: 100%|██████████| 143/143 [00:10<00:00, 13.77it/s]


Epoch 12: train_loss=0.1586, val_loss=0.1550



Training 14/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/40:   1%|          | 1/143 [00:09<22:12,  9.39s/it]
Training 14/40:   6%|▌         | 8/143 [00:09<01:57,  1.15it/s]
Training 14/40:  13%|█▎        | 18/143 [00:09<00:38,  3.22it/s]
Training 14/40:  36%|███▋      | 52/143 [00:09<00:07, 13.00it/s]
Training 14/40: 100%|██████████| 143/143 [00:10<00:00, 14.00it/s]


Epoch 13: train_loss=0.1558, val_loss=0.1525



Training 15/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/40:   1%|          | 1/143 [00:09<21:57,  9.28s/it]
Training 15/40:   6%|▋         | 9/143 [00:09<01:41,  1.32it/s]
Training 15/40:  15%|█▍        | 21/143 [00:09<00:31,  3.82it/s]
Training 15/40:  22%|██▏       | 32/143 [00:09<00:16,  6.93it/s]
Training 15/40:  34%|███▎      | 48/143 [00:09<00:07, 13.05it/s]
Training 15/40:  50%|████▉     | 71/143 [00:09<00:02, 24.80it/s]
Training 15/40:  68%|██████▊   | 97/143 [00:09<00:01, 41.61it/s]
Training 15/40: 100%|██████████| 143/143 [00:10<00:00, 13.70it/s]


Epoch 14: train_loss=0.1570, val_loss=0.1524



Training 16/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/40:   1%|          | 1/143 [00:09<21:51,  9.24s/it]
Training 16/40:   6%|▋         | 9/143 [00:09<01:41,  1.32it/s]
Training 16/40:  14%|█▍        | 20/143 [00:09<00:33,  3.63it/s]
Training 16/40:  22%|██▏       | 32/143 [00:09<00:15,  7.06it/s]
Training 16/40: 100%|██████████| 143/143 [00:10<00:00, 14.17it/s]


Epoch 15: train_loss=0.1551, val_loss=0.1721



Training 17/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/40:   1%|          | 1/143 [00:09<21:42,  9.17s/it]
Training 17/40:   6%|▌         | 8/143 [00:09<01:54,  1.18it/s]
Training 17/40:  12%|█▏        | 17/143 [00:09<00:40,  3.08it/s]
Training 17/40: 100%|██████████| 143/143 [00:09<00:00, 14.35it/s]


Epoch 16: train_loss=0.1535, val_loss=0.1496



Training 18/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/40:   1%|          | 1/143 [00:09<22:19,  9.43s/it]
Training 18/40:   7%|▋         | 10/143 [00:09<01:31,  1.45it/s]
Training 18/40:  16%|█▌        | 23/143 [00:09<00:29,  4.12it/s]
Training 18/40:  24%|██▍       | 35/143 [00:09<00:14,  7.44it/s]
Training 18/40:  36%|███▋      | 52/143 [00:09<00:06, 13.84it/s]
Training 18/40:  50%|█████     | 72/143 [00:09<00:02, 23.75it/s]
Training 18/40:  66%|██████▋   | 95/143 [00:10<00:01, 38.28it/s]
Training 18/40: 100%|██████████| 143/143 [00:10<00:00, 13.43it/s]


Epoch 17: train_loss=0.1570, val_loss=0.1563



Training 19/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/40:   1%|          | 1/143 [00:09<22:39,  9.57s/it]
Training 19/40:   6%|▌         | 8/143 [00:09<01:59,  1.13it/s]
Training 19/40:  13%|█▎        | 19/143 [00:09<00:36,  3.37it/s]
Training 19/40:  20%|██        | 29/143 [00:09<00:18,  6.12it/s]
Training 19/40:  31%|███▏      | 45/143 [00:09<00:08, 12.12it/s]
Training 19/40:  45%|████▌     | 65/143 [00:10<00:03, 22.04it/s]
Training 19/40:  61%|██████    | 87/143 [00:10<00:01, 35.79it/s]
Training 19/40: 100%|██████████| 143/143 [00:10<00:00, 13.29it/s]


Epoch 18: train_loss=0.1512, val_loss=0.1501



Training 20/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/40:   1%|          | 1/143 [00:10<24:19, 10.28s/it]
Training 20/40:   6%|▋         | 9/143 [00:10<01:52,  1.19it/s]
Training 20/40:  13%|█▎        | 19/143 [00:10<00:40,  3.08it/s]
Training 20/40:  20%|█▉        | 28/143 [00:10<00:21,  5.39it/s]
Training 20/40:  41%|████▏     | 59/143 [00:10<00:05, 16.52it/s]
Training 20/40: 100%|██████████| 143/143 [00:11<00:00, 12.61it/s]


Epoch 19: train_loss=0.1493, val_loss=0.1449



Training 21/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/40:   1%|          | 1/143 [00:09<23:35,  9.97s/it]
Training 21/40:  11%|█         | 16/143 [00:10<00:57,  2.22it/s]
Training 21/40:  26%|██▌       | 37/143 [00:10<00:16,  6.32it/s]
Training 21/40:  39%|███▉      | 56/143 [00:10<00:07, 11.37it/s]
Training 21/40:  53%|█████▎    | 76/143 [00:10<00:03, 18.48it/s]
Training 21/40: 100%|██████████| 143/143 [00:10<00:00, 13.04it/s]


Epoch 20: train_loss=0.1493, val_loss=0.1577



Training 22/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/40:   1%|          | 1/143 [00:09<23:10,  9.79s/it]
Training 22/40:   8%|▊         | 12/143 [00:09<01:17,  1.68it/s]
Training 22/40:  17%|█▋        | 25/143 [00:09<00:27,  4.26it/s]
Training 22/40:  31%|███▏      | 45/143 [00:10<00:10,  9.72it/s]
Training 22/40:  41%|████▏     | 59/143 [00:10<00:05, 14.63it/s]
Training 22/40:  50%|█████     | 72/143 [00:10<00:03, 20.47it/s]
Training 22/40: 100%|██████████| 143/143 [00:10<00:00, 13.08it/s]


Epoch 21: train_loss=0.1478, val_loss=0.1519



Training 23/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/40:   1%|          | 1/143 [00:10<23:47, 10.05s/it]
Training 23/40:   7%|▋         | 10/143 [00:10<01:37,  1.36it/s]
Training 23/40:  15%|█▍        | 21/143 [00:10<00:35,  3.48it/s]
Training 23/40:  39%|███▉      | 56/143 [00:10<00:06, 12.91it/s]
Training 23/40: 100%|██████████| 143/143 [00:11<00:00, 12.99it/s]


Epoch 22: train_loss=0.1448, val_loss=0.1423



Training 24/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/40:   1%|          | 1/143 [00:09<22:30,  9.51s/it]
Training 24/40:   5%|▍         | 7/143 [00:09<02:17,  1.01s/it]
Training 24/40:  10%|█         | 15/143 [00:09<00:48,  2.62it/s]
Training 24/40: 100%|██████████| 143/143 [00:10<00:00, 13.86it/s]


Epoch 23: train_loss=0.1455, val_loss=0.1442



Training 25/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/40:   1%|          | 1/143 [00:09<21:40,  9.16s/it]
Training 25/40:   5%|▍         | 7/143 [00:09<02:12,  1.03it/s]
Training 25/40:  12%|█▏        | 17/143 [00:09<00:40,  3.14it/s]
Training 25/40:  18%|█▊        | 26/143 [00:09<00:20,  5.72it/s]
Training 25/40:  31%|███       | 44/143 [00:09<00:07, 12.79it/s]
Training 25/40:  43%|████▎     | 62/143 [00:09<00:03, 21.91it/s]
Training 25/40:  57%|█████▋    | 82/143 [00:09<00:01, 34.76it/s]
Training 25/40: 100%|██████████| 143/143 [00:10<00:00, 13.81it/s]


Epoch 24: train_loss=0.1427, val_loss=0.1478



Training 26/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/40:   1%|          | 1/143 [00:09<21:57,  9.28s/it]
Training 26/40:   6%|▌         | 8/143 [00:09<01:55,  1.17it/s]
Training 26/40:  13%|█▎        | 19/143 [00:09<00:35,  3.47it/s]
Training 26/40:  20%|██        | 29/143 [00:09<00:18,  6.30it/s]
Training 26/40: 100%|██████████| 143/143 [00:10<00:00, 14.07it/s]


Epoch 25: train_loss=0.1426, val_loss=0.1391



Training 27/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/40:   1%|          | 1/143 [00:09<22:16,  9.41s/it]
Training 27/40:   7%|▋         | 10/143 [00:09<01:31,  1.45it/s]
Training 27/40:  13%|█▎        | 18/143 [00:09<00:40,  3.08it/s]
Training 27/40: 100%|██████████| 143/143 [00:10<00:00, 14.04it/s]


Epoch 26: train_loss=0.1416, val_loss=0.1406



Training 28/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/40:   1%|          | 1/143 [00:09<21:42,  9.17s/it]
Training 28/40:   6%|▌         | 8/143 [00:09<01:54,  1.18it/s]
Training 28/40:  10%|█         | 15/143 [00:09<00:48,  2.66it/s]
Training 28/40: 100%|██████████| 143/143 [00:09<00:00, 14.40it/s]


Epoch 27: train_loss=0.1388, val_loss=0.1341



Training 29/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/40:   1%|          | 1/143 [00:09<22:50,  9.65s/it]
Training 29/40:   5%|▍         | 7/143 [00:09<02:19,  1.02s/it]
Training 29/40:  15%|█▍        | 21/143 [00:09<00:31,  3.81it/s]
Training 29/40: 100%|██████████| 143/143 [00:10<00:00, 13.72it/s]


Epoch 28: train_loss=0.1413, val_loss=0.1349



Training 30/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/40:   1%|          | 1/143 [00:09<21:43,  9.18s/it]
Training 30/40:   5%|▍         | 7/143 [00:09<02:12,  1.03it/s]
Training 30/40:  10%|█         | 15/143 [00:09<00:47,  2.71it/s]
Training 30/40:  15%|█▌        | 22/143 [00:09<00:25,  4.71it/s]
Training 30/40:  29%|██▊       | 41/143 [00:09<00:08, 12.21it/s]
Training 30/40:  42%|████▏     | 60/143 [00:09<00:03, 21.92it/s]
Training 30/40:  55%|█████▍    | 78/143 [00:09<00:01, 33.35it/s]
Training 30/40: 100%|██████████| 143/143 [00:10<00:00, 13.83it/s]


Epoch 29: train_loss=0.1361, val_loss=0.1323



Training 31/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 31/40:   1%|          | 1/143 [00:09<22:32,  9.52s/it]
Training 31/40:   6%|▌         | 8/143 [00:09<01:58,  1.14it/s]
Training 31/40:  13%|█▎        | 18/143 [00:09<00:39,  3.18it/s]
Training 31/40:  21%|██        | 30/143 [00:09<00:17,  6.52it/s]
Training 31/40:  34%|███▍      | 49/143 [00:09<00:06, 13.70it/s]
Training 31/40:  46%|████▌     | 66/143 [00:10<00:03, 21.96it/s]
Training 31/40:  66%|██████▋   | 95/143 [00:10<00:01, 40.77it/s]
Training 31/40: 100%|██████████| 143/143 [00:10<00:00, 13.37it/s]


Epoch 30: train_loss=0.1394, val_loss=0.1423



Training 32/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 32/40:   1%|          | 1/143 [00:09<21:40,  9.16s/it]
Training 32/40:   6%|▌         | 8/143 [00:09<01:54,  1.18it/s]
Training 32/40:  12%|█▏        | 17/143 [00:09<00:40,  3.08it/s]
Training 32/40:  27%|██▋       | 39/143 [00:09<00:10,  9.52it/s]
Training 32/40: 100%|██████████| 143/143 [00:09<00:00, 14.31it/s]


Epoch 31: train_loss=0.1393, val_loss=0.1378



Training 33/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 33/40:   1%|          | 1/143 [00:09<22:01,  9.30s/it]
Training 33/40:   5%|▍         | 7/143 [00:09<02:14,  1.01it/s]
Training 33/40:  12%|█▏        | 17/143 [00:09<00:40,  3.10it/s]
Training 33/40:  20%|█▉        | 28/143 [00:09<00:18,  6.23it/s]
Training 33/40: 100%|██████████| 143/143 [00:10<00:00, 13.98it/s]


Epoch 32: train_loss=0.1345, val_loss=0.1362



Training 34/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 34/40:   1%|          | 1/143 [00:09<22:03,  9.32s/it]
Training 34/40:   6%|▋         | 9/143 [00:09<01:42,  1.31it/s]
Training 34/40:  12%|█▏        | 17/143 [00:09<00:42,  2.97it/s]
Training 34/40:  38%|███▊      | 54/143 [00:09<00:06, 13.69it/s]
Training 34/40: 100%|██████████| 143/143 [00:10<00:00, 14.07it/s]


Epoch 33: train_loss=0.1373, val_loss=0.1327



Training 35/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 35/40:   1%|          | 1/143 [00:09<22:33,  9.53s/it]
Training 35/40:   6%|▌         | 8/143 [00:09<01:58,  1.14it/s]
Training 35/40:  10%|█         | 15/143 [00:09<00:50,  2.55it/s]
Training 35/40: 100%|██████████| 143/143 [00:10<00:00, 13.84it/s]


Epoch 34: train_loss=0.1343, val_loss=0.1386



Training 36/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 36/40:   1%|          | 1/143 [00:09<22:01,  9.31s/it]
Training 36/40:   3%|▎         | 5/143 [00:09<03:14,  1.41s/it]
Training 36/40:  17%|█▋        | 24/143 [00:09<00:25,  4.70it/s]
Training 36/40: 100%|██████████| 143/143 [00:10<00:00, 14.21it/s]


Epoch 35: train_loss=0.1366, val_loss=0.1317



Training 37/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 37/40:   1%|          | 1/143 [00:09<22:03,  9.32s/it]
Training 37/40:   6%|▋         | 9/143 [00:09<01:42,  1.31it/s]
Training 37/40:  20%|██        | 29/143 [00:09<00:20,  5.50it/s]
Training 37/40: 100%|██████████| 143/143 [00:10<00:00, 14.18it/s]


Epoch 36: train_loss=0.1356, val_loss=0.1341



Training 38/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 38/40:   1%|          | 1/143 [00:09<21:50,  9.23s/it]
Training 38/40:   7%|▋         | 10/143 [00:09<01:30,  1.48it/s]
Training 38/40:  14%|█▍        | 20/143 [00:09<00:34,  3.57it/s]
Training 38/40:  23%|██▎       | 33/143 [00:09<00:15,  7.28it/s]
Training 38/40:  33%|███▎      | 47/143 [00:09<00:07, 12.61it/s]
Training 38/40:  47%|████▋     | 67/143 [00:09<00:03, 22.73it/s]
Training 38/40:  61%|██████    | 87/143 [00:09<00:01, 35.41it/s]
Training 38/40: 100%|██████████| 143/143 [00:10<00:00, 13.70it/s]


Epoch 37: train_loss=0.1362, val_loss=0.1505



Training 39/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 39/40:   1%|          | 1/143 [00:09<21:52,  9.24s/it]
Training 39/40:   6%|▋         | 9/143 [00:09<01:41,  1.32it/s]
Training 39/40:  12%|█▏        | 17/143 [00:09<00:42,  2.99it/s]
Training 39/40: 100%|██████████| 143/143 [00:10<00:00, 14.20it/s]


Epoch 38: train_loss=0.1384, val_loss=0.1329



Training 40/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 40/40:   1%|          | 1/143 [00:09<23:08,  9.78s/it]
Training 40/40:   6%|▌         | 8/143 [00:09<02:02,  1.11it/s]
Training 40/40:  13%|█▎        | 18/143 [00:09<00:40,  3.09it/s]
Training 40/40: 100%|██████████| 143/143 [00:10<00:00, 13.48it/s]


Epoch 39: train_loss=0.1371, val_loss=0.1528


2025-06-02 09:15:55,262 - __main__ - INFO - Saved mlp probe for layer 6 to cache\probes\phase1_ijepa_viewpoint_probing\mlp_layer_6_probe.pth
 50%|█████     | 3/6 [2:15:29<2:15:26, 2708.79s/it]2025-06-02 09:16:14,783 - __main__ - INFO - Processing layer 8...
2025-06-02 09:16:14,784 - src.probing.data_preprocessing - INFO - Extracting features from 1149 batches...


Extracting features:   0%|          | 1/1149 [00:07<2:31:33,  7.92s/it]
Extracting features:   0%|          | 2/1149 [00:08<1:11:11,  3.72s/it]
Extracting features:   0%|          | 3/1149 [00:09<45:12,  2.37s/it]  
Extracting features:   0%|          | 4/1149 [00:10<32:58,  1.73s/it]
Extracting features:   0%|          | 5/1149 [00:10<26:12,  1.37s/it]
Extracting features:   1%|          | 6/1149 [00:11<22:05,  1.16s/it]
Extracting features:   1%|          | 7/1149 [00:12<19:31,  1.03s/it]
Extracting features:   1%|          | 8/1149 [00:13<17:48,  1.07it/s]
Extracting features:   1%|          | 9/1149 [00:13<16:39,  1.14it/

Epoch 0: train_loss=0.2832, val_loss=0.1635



Training 2/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/30:   1%|          | 1/143 [00:09<22:49,  9.64s/it]
Training 2/30:  11%|█         | 16/143 [00:09<00:55,  2.29it/s]
Training 2/30:  21%|██        | 30/143 [00:09<00:22,  5.10it/s]
Training 2/30:  34%|███▎      | 48/143 [00:09<00:09, 10.05it/s]
Training 2/30:  55%|█████▌    | 79/143 [00:10<00:02, 21.70it/s]
Training 2/30: 100%|██████████| 143/143 [00:10<00:00, 13.39it/s]


Epoch 1: train_loss=0.1636, val_loss=0.1591



Training 3/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/30:   1%|          | 1/143 [00:09<22:21,  9.45s/it]
Training 3/30:   8%|▊         | 11/143 [00:09<01:22,  1.59it/s]
Training 3/30:  19%|█▉        | 27/143 [00:09<00:23,  4.89it/s]
Training 3/30:  33%|███▎      | 47/143 [00:09<00:09, 10.51it/s]
Training 3/30:  50%|████▉     | 71/143 [00:09<00:03, 19.56it/s]
Training 3/30:  67%|██████▋   | 96/143 [00:09<00:01, 31.87it/s]
Training 3/30: 100%|██████████| 143/143 [00:10<00:00, 13.59it/s]


Epoch 2: train_loss=0.1612, val_loss=0.1650



Training 4/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/30:   1%|          | 1/143 [00:09<22:52,  9.67s/it]
Training 4/30:  10%|█         | 15/143 [00:09<00:59,  2.14it/s]
Training 4/30:  23%|██▎       | 33/143 [00:09<00:19,  5.75it/s]
Training 4/30:  38%|███▊      | 55/143 [00:09<00:07, 11.80it/s]
Training 4/30:  57%|█████▋    | 81/143 [00:10<00:02, 21.40it/s]
Training 4/30: 100%|██████████| 143/143 [00:10<00:00, 13.38it/s]


Epoch 3: train_loss=0.1577, val_loss=0.1571



Training 5/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/30:   1%|          | 1/143 [00:09<22:11,  9.38s/it]
Training 5/30:  10%|█         | 15/143 [00:09<00:58,  2.21it/s]
Training 5/30:  24%|██▍       | 35/143 [00:09<00:17,  6.35it/s]
Training 5/30:  38%|███▊      | 54/143 [00:09<00:07, 11.69it/s]
Training 5/30:  55%|█████▌    | 79/143 [00:09<00:03, 21.17it/s]
Training 5/30:  76%|███████▌  | 109/143 [00:09<00:00, 36.15it/s]
Training 5/30: 100%|██████████| 143/143 [00:10<00:00, 13.77it/s]


Epoch 4: train_loss=0.1561, val_loss=0.1519



Training 6/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/30:   1%|          | 1/143 [00:09<21:42,  9.17s/it]
Training 6/30:   8%|▊         | 12/143 [00:09<01:13,  1.79it/s]
Training 6/30:  16%|█▌        | 23/143 [00:09<00:29,  4.11it/s]
Training 6/30:  35%|███▍      | 50/143 [00:09<00:07, 12.00it/s]
Training 6/30:  54%|█████▍    | 77/143 [00:09<00:02, 22.48it/s]
Training 6/30: 100%|██████████| 143/143 [00:10<00:00, 14.08it/s]


Epoch 5: train_loss=0.1530, val_loss=0.1600



Training 7/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/30:   1%|          | 1/143 [00:09<22:02,  9.32s/it]
Training 7/30:   9%|▉         | 13/143 [00:09<01:07,  1.92it/s]
Training 7/30:  24%|██▍       | 35/143 [00:09<00:16,  6.52it/s]
Training 7/30:  38%|███▊      | 55/143 [00:09<00:07, 12.17it/s]
Training 7/30:  62%|██████▏   | 89/143 [00:09<00:02, 25.29it/s]
Training 7/30: 100%|██████████| 143/143 [00:10<00:00, 13.88it/s]


Epoch 6: train_loss=0.1525, val_loss=0.1476



Training 8/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/30:   1%|          | 1/143 [00:09<21:53,  9.25s/it]
Training 8/30:   8%|▊         | 12/143 [00:09<01:13,  1.78it/s]
Training 8/30:  21%|██        | 30/143 [00:09<00:20,  5.56it/s]
Training 8/30:  36%|███▌      | 51/143 [00:09<00:07, 11.58it/s]
Training 8/30:  56%|█████▌    | 80/143 [00:09<00:02, 22.78it/s]
Training 8/30: 100%|██████████| 143/143 [00:10<00:00, 13.95it/s]


Epoch 7: train_loss=0.1517, val_loss=0.1459



Training 9/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/30:   1%|          | 1/143 [00:09<21:55,  9.26s/it]
Training 9/30:  11%|█         | 16/143 [00:09<00:53,  2.38it/s]
Training 9/30:  24%|██▍       | 35/143 [00:09<00:16,  6.36it/s]
Training 9/30:  41%|████      | 58/143 [00:09<00:06, 12.93it/s]
Training 9/30:  62%|██████▏   | 88/143 [00:09<00:02, 24.46it/s]
Training 9/30: 100%|██████████| 143/143 [00:10<00:00, 13.91it/s]


Epoch 8: train_loss=0.1502, val_loss=0.1447



Training 10/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/30:   1%|          | 1/143 [00:09<21:27,  9.07s/it]
Training 10/30:  10%|▉         | 14/143 [00:09<01:00,  2.12it/s]
Training 10/30:  21%|██        | 30/143 [00:09<00:20,  5.54it/s]
Training 10/30:  37%|███▋      | 53/143 [00:09<00:07, 12.27it/s]
Training 10/30:  52%|█████▏    | 74/143 [00:09<00:03, 20.39it/s]
Training 10/30: 100%|██████████| 143/143 [00:10<00:00, 14.14it/s]


Epoch 9: train_loss=0.1489, val_loss=0.1433



Training 11/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/30:   1%|          | 1/143 [00:09<21:57,  9.28s/it]
Training 11/30:  13%|█▎        | 19/143 [00:09<00:43,  2.83it/s]
Training 11/30:  27%|██▋       | 38/143 [00:09<00:15,  6.79it/s]
Training 11/30:  45%|████▌     | 65/143 [00:09<00:05, 14.50it/s]
Training 11/30:  63%|██████▎   | 90/143 [00:09<00:02, 23.94it/s]
Training 11/30: 100%|██████████| 143/143 [00:10<00:00, 13.92it/s]


Epoch 10: train_loss=0.1469, val_loss=0.1455



Training 12/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/30:   1%|          | 1/143 [00:09<21:44,  9.19s/it]
Training 12/30:  10%|▉         | 14/143 [00:09<01:01,  2.10it/s]
Training 12/30:  22%|██▏       | 32/143 [00:09<00:18,  5.90it/s]
Training 12/30:  34%|███▍      | 49/143 [00:09<00:08, 10.76it/s]
Training 12/30:  52%|█████▏    | 75/143 [00:09<00:03, 20.89it/s]
Training 12/30:  72%|███████▏  | 103/143 [00:09<00:01, 35.07it/s]
Training 12/30: 100%|██████████| 143/143 [00:10<00:00, 14.02it/s]


Epoch 11: train_loss=0.1445, val_loss=0.1497



Training 13/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/30:   1%|          | 1/143 [00:09<22:14,  9.40s/it]
Training 13/30:  13%|█▎        | 18/143 [00:09<00:47,  2.65it/s]
Training 13/30:  26%|██▌       | 37/143 [00:09<00:16,  6.57it/s]
Training 13/30:  41%|████▏     | 59/143 [00:09<00:06, 12.76it/s]
Training 13/30:  64%|██████▍   | 92/143 [00:09<00:02, 25.37it/s]
Training 13/30: 100%|██████████| 143/143 [00:10<00:00, 13.75it/s]


Epoch 12: train_loss=0.1458, val_loss=0.1397



Training 14/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/30:   1%|          | 1/143 [00:09<21:48,  9.21s/it]
Training 14/30:  11%|█         | 16/143 [00:09<00:52,  2.40it/s]
Training 14/30:  23%|██▎       | 33/143 [00:09<00:18,  5.97it/s]
Training 14/30:  38%|███▊      | 55/143 [00:09<00:07, 12.30it/s]
Training 14/30:  59%|█████▊    | 84/143 [00:09<00:02, 23.55it/s]
Training 14/30: 100%|██████████| 143/143 [00:10<00:00, 14.01it/s]


Epoch 13: train_loss=0.1427, val_loss=0.1389



Training 15/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/30:   1%|          | 1/143 [00:09<21:39,  9.15s/it]
Training 15/30:  10%|█         | 15/143 [00:09<00:56,  2.26it/s]
Training 15/30:  22%|██▏       | 32/143 [00:09<00:18,  5.86it/s]
Training 15/30:  38%|███▊      | 54/143 [00:09<00:07, 12.22it/s]
Training 15/30:  55%|█████▌    | 79/143 [00:09<00:02, 21.86it/s]
Training 15/30: 100%|██████████| 143/143 [00:10<00:00, 14.01it/s]


Epoch 14: train_loss=0.1411, val_loss=0.1382



Training 16/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/30:   1%|          | 1/143 [00:09<21:54,  9.26s/it]
Training 16/30:  10%|▉         | 14/143 [00:09<01:01,  2.08it/s]
Training 16/30:  22%|██▏       | 32/143 [00:09<00:18,  5.86it/s]
Training 16/30:  39%|███▉      | 56/143 [00:09<00:06, 12.73it/s]
Training 16/30:  56%|█████▌    | 80/143 [00:09<00:02, 21.88it/s]
Training 16/30:  74%|███████▍  | 106/143 [00:09<00:01, 34.82it/s]
Training 16/30: 100%|██████████| 143/143 [00:10<00:00, 13.90it/s]


Epoch 15: train_loss=0.1437, val_loss=0.1380



Training 17/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/30:   1%|          | 1/143 [00:09<22:08,  9.36s/it]
Training 17/30:  10%|▉         | 14/143 [00:09<01:02,  2.06it/s]
Training 17/30:  22%|██▏       | 31/143 [00:09<00:20,  5.59it/s]
Training 17/30:  34%|███▍      | 49/143 [00:09<00:08, 10.66it/s]
Training 17/30:  51%|█████     | 73/143 [00:09<00:03, 19.82it/s]
Training 17/30:  69%|██████▊   | 98/143 [00:09<00:01, 32.20it/s]
Training 17/30: 100%|██████████| 143/143 [00:10<00:00, 13.75it/s]


Epoch 16: train_loss=0.1416, val_loss=0.1392



Training 18/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/30:   1%|          | 1/143 [00:09<21:48,  9.21s/it]
Training 18/30:  11%|█         | 16/143 [00:09<00:53,  2.40it/s]
Training 18/30:  24%|██▍       | 35/143 [00:09<00:16,  6.40it/s]
Training 18/30:  38%|███▊      | 55/143 [00:09<00:07, 12.12it/s]
Training 18/30:  55%|█████▌    | 79/143 [00:09<00:03, 21.32it/s]
Training 18/30:  71%|███████   | 101/143 [00:09<00:01, 32.16it/s]
Training 18/30: 100%|██████████| 143/143 [00:10<00:00, 13.95it/s]


Epoch 17: train_loss=0.1431, val_loss=0.1365



Training 19/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/30:   1%|          | 1/143 [00:09<22:25,  9.47s/it]
Training 19/30:  10%|▉         | 14/143 [00:09<01:03,  2.03it/s]
Training 19/30:  21%|██        | 30/143 [00:09<00:21,  5.30it/s]
Training 19/30:  35%|███▍      | 50/143 [00:09<00:08, 10.90it/s]
Training 19/30:  51%|█████     | 73/143 [00:09<00:03, 19.53it/s]
Training 19/30:  73%|███████▎  | 104/143 [00:09<00:01, 35.00it/s]
Training 19/30: 100%|██████████| 143/143 [00:10<00:00, 13.56it/s]


Epoch 18: train_loss=0.1424, val_loss=0.1348



Training 20/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/30:   1%|          | 1/143 [00:09<21:44,  9.19s/it]
Training 20/30:  12%|█▏        | 17/143 [00:09<00:49,  2.56it/s]
Training 20/30:  24%|██▍       | 35/143 [00:09<00:17,  6.35it/s]
Training 20/30:  38%|███▊      | 54/143 [00:09<00:07, 11.78it/s]
Training 20/30:  55%|█████▌    | 79/143 [00:09<00:02, 21.40it/s]
Training 20/30: 100%|██████████| 143/143 [00:10<00:00, 14.01it/s]


Epoch 19: train_loss=0.1389, val_loss=0.1346



Training 21/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/30:   1%|          | 1/143 [00:09<22:01,  9.31s/it]
Training 21/30:   9%|▉         | 13/143 [00:09<01:07,  1.92it/s]
Training 21/30:  23%|██▎       | 33/143 [00:09<00:18,  6.09it/s]
Training 21/30:  36%|███▌      | 51/143 [00:09<00:08, 11.17it/s]
Training 21/30:  55%|█████▍    | 78/143 [00:09<00:03, 21.53it/s]
Training 21/30: 100%|██████████| 143/143 [00:10<00:00, 13.87it/s]


Epoch 20: train_loss=0.1386, val_loss=0.1445



Training 22/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/30:   1%|          | 1/143 [00:09<22:51,  9.66s/it]
Training 22/30:  11%|█         | 16/143 [00:09<00:55,  2.29it/s]
Training 22/30:  22%|██▏       | 31/143 [00:09<00:21,  5.30it/s]
Training 22/30:  38%|███▊      | 54/143 [00:09<00:07, 11.64it/s]
Training 22/30:  55%|█████▌    | 79/143 [00:10<00:03, 20.87it/s]
Training 22/30: 100%|██████████| 143/143 [00:10<00:00, 13.41it/s]


Epoch 21: train_loss=0.1378, val_loss=0.1335



Training 23/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/30:   1%|          | 1/143 [00:09<22:58,  9.71s/it]
Training 23/30:   9%|▉         | 13/143 [00:09<01:10,  1.84it/s]
Training 23/30:  20%|██        | 29/143 [00:09<00:22,  5.04it/s]
Training 23/30:  34%|███▍      | 49/143 [00:10<00:08, 10.51it/s]
Training 23/30:  52%|█████▏    | 75/143 [00:10<00:03, 20.12it/s]
Training 23/30:  72%|███████▏  | 103/143 [00:10<00:01, 33.66it/s]
Training 23/30: 100%|██████████| 143/143 [00:10<00:00, 13.31it/s]


Epoch 22: train_loss=0.1451, val_loss=0.1350



Training 24/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/30:   1%|          | 1/143 [00:09<21:38,  9.15s/it]
Training 24/30:  13%|█▎        | 18/143 [00:09<00:45,  2.72it/s]
Training 24/30:  22%|██▏       | 31/143 [00:09<00:20,  5.45it/s]
Training 24/30: 100%|██████████| 143/143 [00:09<00:00, 14.40it/s]


Epoch 23: train_loss=0.1367, val_loss=0.1366



Training 25/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/30:   1%|          | 1/143 [00:09<22:04,  9.33s/it]
Training 25/30:  10%|█         | 15/143 [00:09<00:57,  2.22it/s]
Training 25/30:  22%|██▏       | 32/143 [00:09<00:19,  5.76it/s]
Training 25/30:  39%|███▉      | 56/143 [00:09<00:06, 12.60it/s]
Training 25/30:  56%|█████▌    | 80/143 [00:09<00:02, 21.69it/s]
Training 25/30: 100%|██████████| 143/143 [00:10<00:00, 13.85it/s]


Epoch 24: train_loss=0.1368, val_loss=0.1321



Training 26/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/30:   1%|          | 1/143 [00:09<21:44,  9.19s/it]
Training 26/30:  10%|▉         | 14/143 [00:09<01:01,  2.10it/s]
Training 26/30:  24%|██▍       | 35/143 [00:09<00:16,  6.55it/s]
Training 26/30:  42%|████▏     | 60/143 [00:09<00:06, 13.76it/s]
Training 26/30:  62%|██████▏   | 89/143 [00:09<00:02, 24.97it/s]
Training 26/30: 100%|██████████| 143/143 [00:10<00:00, 13.98it/s]


Epoch 25: train_loss=0.1359, val_loss=0.1378



Training 27/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/30:   1%|          | 1/143 [00:09<22:12,  9.38s/it]
Training 27/30:  13%|█▎        | 19/143 [00:09<00:44,  2.80it/s]
Training 27/30:  26%|██▌       | 37/143 [00:09<00:16,  6.51it/s]
Training 27/30:  47%|████▋     | 67/143 [00:09<00:05, 15.04it/s]
Training 27/30:  65%|██████▌   | 93/143 [00:09<00:02, 24.78it/s]
Training 27/30: 100%|██████████| 143/143 [00:10<00:00, 13.82it/s]


Epoch 26: train_loss=0.1352, val_loss=0.1397



Training 28/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/30:   1%|          | 1/143 [00:09<22:16,  9.41s/it]
Training 28/30:  10%|█         | 15/143 [00:09<00:58,  2.20it/s]
Training 28/30:  22%|██▏       | 31/143 [00:09<00:20,  5.49it/s]
Training 28/30:  37%|███▋      | 53/143 [00:09<00:07, 11.69it/s]
Training 28/30:  55%|█████▌    | 79/143 [00:09<00:02, 21.52it/s]
Training 28/30:  76%|███████▌  | 109/143 [00:09<00:00, 36.38it/s]
Training 28/30: 100%|██████████| 143/143 [00:10<00:00, 13.71it/s]


Epoch 27: train_loss=0.1377, val_loss=0.1323



Training 29/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/30:   1%|          | 1/143 [00:09<21:51,  9.23s/it]
Training 29/30:   8%|▊         | 11/143 [00:09<01:21,  1.63it/s]
Training 29/30:  18%|█▊        | 26/143 [00:09<00:24,  4.79it/s]
Training 29/30:  36%|███▌      | 51/143 [00:09<00:07, 12.01it/s]
Training 29/30:  53%|█████▎    | 76/143 [00:09<00:03, 21.59it/s]
Training 29/30: 100%|██████████| 143/143 [00:10<00:00, 13.97it/s]


Epoch 28: train_loss=0.1338, val_loss=0.1361



Training 30/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/30:   1%|          | 1/143 [00:09<22:06,  9.34s/it]
Training 30/30:  11%|█         | 16/143 [00:09<00:53,  2.36it/s]
Training 30/30:  24%|██▍       | 34/143 [00:09<00:17,  6.10it/s]
Training 30/30:  36%|███▋      | 52/143 [00:09<00:08, 11.15it/s]
Training 30/30:  50%|████▉     | 71/143 [00:09<00:03, 18.29it/s]
Training 30/30:  69%|██████▉   | 99/143 [00:09<00:01, 32.37it/s]
Training 30/30: 100%|██████████| 143/143 [00:10<00:00, 13.78it/s]


Epoch 29: train_loss=0.1360, val_loss=0.1302


2025-06-02 09:47:32,810 - __main__ - INFO - Saved linear probe for layer 8 to cache\probes\phase1_ijepa_viewpoint_probing\linear_layer_8_probe.pth
2025-06-02 09:47:52,502 - __main__ - INFO - Running mlp probe on layer 8...
2025-06-02 09:47:52,502 - __main__ - INFO - Running mlp probe on layer 8 (feature_dim: 1280)

Training 1/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 1/40:   1%|          | 1/143 [00:09<21:39,  9.15s/it]
Training 1/40:   6%|▋         | 9/143 [00:09<01:40,  1.34it/s]
Training 1/40:  15%|█▍        | 21/143 [00:09<00:31,  3.88it/s]
Training 1/40:  24%|██▍       | 34/143 [00:09<00:14,  7.61it/s]
Training 1/40: 100%|██████████| 143/143 [00:10<00:00, 14.23it/s]


Epoch 0: train_loss=0.6221, val_loss=0.1626



Training 2/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/40:   1%|          | 1/143 [00:09<22:54,  9.68s/it]
Training 2/40:   7%|▋         | 10/143 [00:09<01:34,  1.41it/s]
Training 2/40:  15%|█▍        | 21/143 [00:09<00:33,  3.61it/s]
Training 2/40:  21%|██        | 30/143 [00:09<00:18,  6.04it/s]
Training 2/40:  34%|███▎      | 48/143 [00:10<00:07, 12.76it/s]
Training 2/40:  46%|████▌     | 66/143 [00:10<00:03, 21.51it/s]
Training 2/40:  62%|██████▏   | 89/143 [00:10<00:01, 35.99it/s]
Training 2/40: 100%|██████████| 143/143 [00:10<00:00, 13.21it/s]


Epoch 1: train_loss=0.1626, val_loss=0.1605



Training 3/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/40:   1%|          | 1/143 [00:09<23:13,  9.81s/it]
Training 3/40:   7%|▋         | 10/143 [00:09<01:35,  1.39it/s]
Training 3/40:  13%|█▎        | 18/143 [00:10<00:42,  2.97it/s]
Training 3/40: 100%|██████████| 143/143 [00:10<00:00, 13.51it/s]


Epoch 2: train_loss=0.1619, val_loss=0.1596



Training 4/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/40:   1%|          | 1/143 [00:09<21:58,  9.29s/it]
Training 4/40:   4%|▍         | 6/143 [00:09<02:38,  1.16s/it]
Training 4/40:   9%|▉         | 13/143 [00:09<00:56,  2.32it/s]
Training 4/40:  16%|█▌        | 23/143 [00:09<00:23,  5.16it/s]
Training 4/40:  24%|██▍       | 34/143 [00:09<00:11,  9.36it/s]
Training 4/40:  38%|███▊      | 54/143 [00:09<00:04, 19.65it/s]
Training 4/40:  50%|█████     | 72/143 [00:09<00:02, 31.13it/s]
Training 4/40:  66%|██████▌   | 94/143 [00:10<00:01, 48.49it/s]
Training 4/40: 100%|██████████| 143/143 [00:10<00:00, 13.53it/s]


Epoch 3: train_loss=0.1609, val_loss=0.1586



Training 5/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/40:   1%|          | 1/143 [00:09<21:43,  9.18s/it]
Training 5/40:   8%|▊         | 12/143 [00:09<01:13,  1.79it/s]
Training 5/40:  17%|█▋        | 25/143 [00:09<00:26,  4.53it/s]
Training 5/40:  26%|██▌       | 37/143 [00:09<00:13,  7.94it/s]
Training 5/40:  39%|███▉      | 56/143 [00:09<00:05, 15.31it/s]
Training 5/40:  52%|█████▏    | 75/143 [00:09<00:02, 24.84it/s]
Training 5/40:  67%|██████▋   | 96/143 [00:09<00:01, 38.23it/s]
Training 5/40: 100%|██████████| 143/143 [00:10<00:00, 13.81it/s]


Epoch 4: train_loss=0.1601, val_loss=0.1595



Training 6/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/40:   1%|          | 1/143 [00:09<22:18,  9.43s/it]
Training 6/40:   7%|▋         | 10/143 [00:09<01:31,  1.45it/s]
Training 6/40:  14%|█▍        | 20/143 [00:09<00:35,  3.49it/s]
Training 6/40: 100%|██████████| 143/143 [00:10<00:00, 14.00it/s]


Epoch 5: train_loss=0.1594, val_loss=0.1594



Training 7/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/40:   1%|          | 1/143 [00:09<22:07,  9.35s/it]
Training 7/40:   6%|▋         | 9/143 [00:09<01:42,  1.31it/s]
Training 7/40:  13%|█▎        | 18/143 [00:09<00:39,  3.17it/s]
Training 7/40:  27%|██▋       | 39/143 [00:09<00:11,  9.19it/s]
Training 7/40: 100%|██████████| 143/143 [00:10<00:00, 14.02it/s]


Epoch 6: train_loss=0.1574, val_loss=0.1544



Training 8/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/40:   1%|          | 1/143 [00:09<22:00,  9.30s/it]
Training 8/40:   6%|▋         | 9/143 [00:09<01:41,  1.32it/s]
Training 8/40:  14%|█▍        | 20/143 [00:09<00:34,  3.61it/s]
Training 8/40:  43%|████▎     | 61/143 [00:09<00:05, 15.53it/s]
Training 8/40: 100%|██████████| 143/143 [00:10<00:00, 14.06it/s]


Epoch 7: train_loss=0.1557, val_loss=0.1520



Training 9/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/40:   1%|          | 1/143 [00:09<22:03,  9.32s/it]
Training 9/40:   6%|▋         | 9/143 [00:09<01:42,  1.31it/s]
Training 9/40:  10%|█         | 15/143 [00:09<00:50,  2.55it/s]
Training 9/40: 100%|██████████| 143/143 [00:10<00:00, 14.15it/s]


Epoch 8: train_loss=0.1535, val_loss=0.1505



Training 10/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/40:   1%|          | 1/143 [00:09<22:30,  9.51s/it]
Training 10/40:   6%|▋         | 9/143 [00:09<01:44,  1.29it/s]
Training 10/40:  14%|█▍        | 20/143 [00:09<00:34,  3.53it/s]
Training 10/40:  22%|██▏       | 32/143 [00:09<00:16,  6.86it/s]
Training 10/40: 100%|██████████| 143/143 [00:10<00:00, 13.80it/s]


Epoch 9: train_loss=0.1521, val_loss=0.1473



Training 11/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/40:   1%|          | 1/143 [00:09<22:06,  9.34s/it]
Training 11/40:   8%|▊         | 11/143 [00:09<01:21,  1.61it/s]
Training 11/40:  17%|█▋        | 24/143 [00:09<00:27,  4.32it/s]
Training 11/40:  27%|██▋       | 39/143 [00:09<00:12,  8.56it/s]
Training 11/40:  40%|███▉      | 57/143 [00:09<00:05, 15.42it/s]
Training 11/40:  53%|█████▎    | 76/143 [00:09<00:02, 24.82it/s]
Training 11/40:  76%|███████▌  | 108/143 [00:09<00:00, 45.83it/s]
Training 11/40: 100%|██████████| 143/143 [00:10<00:00, 13.65it/s]


Epoch 10: train_loss=0.1483, val_loss=0.1678



Training 12/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/40:   1%|          | 1/143 [00:09<22:04,  9.33s/it]
Training 12/40:   7%|▋         | 10/143 [00:09<01:30,  1.46it/s]
Training 12/40:  15%|█▌        | 22/143 [00:09<00:30,  3.96it/s]
Training 12/40:  24%|██▍       | 34/143 [00:09<00:14,  7.34it/s]
Training 12/40:  36%|███▋      | 52/143 [00:09<00:06, 14.25it/s]
Training 12/40:  51%|█████     | 73/143 [00:09<00:02, 24.81it/s]
Training 12/40:  65%|██████▌   | 93/143 [00:09<00:01, 37.29it/s]
Training 12/40: 100%|██████████| 143/143 [00:10<00:00, 13.69it/s]


Epoch 11: train_loss=0.1483, val_loss=0.1406



Training 13/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/40:   1%|          | 1/143 [00:09<22:01,  9.31s/it]
Training 13/40:   8%|▊         | 11/143 [00:09<01:21,  1.62it/s]
Training 13/40:  15%|█▌        | 22/143 [00:09<00:30,  3.90it/s]
Training 13/40: 100%|██████████| 143/143 [00:10<00:00, 14.16it/s]


Epoch 12: train_loss=0.1516, val_loss=0.1429



Training 14/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/40:   1%|          | 1/143 [00:09<21:56,  9.27s/it]
Training 14/40:   9%|▉         | 13/143 [00:09<01:07,  1.93it/s]
Training 14/40:  17%|█▋        | 24/143 [00:09<00:28,  4.21it/s]
Training 14/40: 100%|██████████| 143/143 [00:10<00:00, 14.24it/s]


Epoch 13: train_loss=0.1448, val_loss=0.1723



Training 15/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/40:   1%|          | 1/143 [00:09<22:30,  9.51s/it]
Training 15/40:   8%|▊         | 11/143 [00:09<01:23,  1.58it/s]
Training 15/40:  15%|█▍        | 21/143 [00:09<00:33,  3.61it/s]
Training 15/40:  38%|███▊      | 54/143 [00:09<00:06, 12.95it/s]
Training 15/40: 100%|██████████| 143/143 [00:10<00:00, 13.85it/s]


Epoch 14: train_loss=0.1473, val_loss=0.1400



Training 16/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/40:   1%|          | 1/143 [00:09<22:15,  9.40s/it]
Training 16/40:   8%|▊         | 12/143 [00:09<01:14,  1.75it/s]
Training 16/40:  15%|█▌        | 22/143 [00:09<00:31,  3.80it/s]
Training 16/40:  24%|██▍       | 34/143 [00:09<00:15,  7.17it/s]
Training 16/40:  34%|███▍      | 49/143 [00:09<00:07, 12.85it/s]
Training 16/40:  49%|████▉     | 70/143 [00:09<00:03, 23.43it/s]
Training 16/40:  64%|██████▎   | 91/143 [00:10<00:01, 36.63it/s]
Training 16/40: 100%|██████████| 143/143 [00:10<00:00, 13.47it/s]


Epoch 15: train_loss=0.1425, val_loss=0.1392



Training 17/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/40:   1%|          | 1/143 [00:09<22:06,  9.34s/it]
Training 17/40:   8%|▊         | 12/143 [00:09<01:14,  1.76it/s]
Training 17/40:  16%|█▌        | 23/143 [00:09<00:29,  4.04it/s]
Training 17/40:  25%|██▌       | 36/143 [00:09<00:13,  7.70it/s]
Training 17/40:  39%|███▉      | 56/143 [00:09<00:05, 15.35it/s]
Training 17/40:  54%|█████▍    | 77/143 [00:09<00:02, 25.84it/s]
Training 17/40:  71%|███████▏  | 102/143 [00:09<00:00, 41.84it/s]
Training 17/40: 100%|██████████| 143/143 [00:10<00:00, 13.63it/s]


Epoch 16: train_loss=0.1466, val_loss=0.1538



Training 18/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/40:   1%|          | 1/143 [00:09<22:14,  9.40s/it]
Training 18/40:   8%|▊         | 12/143 [00:09<01:14,  1.75it/s]
Training 18/40:  16%|█▌        | 23/143 [00:09<00:29,  4.01it/s]
Training 18/40:  26%|██▌       | 37/143 [00:09<00:13,  7.94it/s]
Training 18/40:  38%|███▊      | 54/143 [00:09<00:06, 14.34it/s]
Training 18/40:  50%|█████     | 72/143 [00:09<00:03, 23.21it/s]
Training 18/40:  66%|██████▌   | 94/143 [00:10<00:01, 37.16it/s]
Training 18/40: 100%|██████████| 143/143 [00:10<00:00, 13.43it/s]


Epoch 17: train_loss=0.1507, val_loss=0.1407



Training 19/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/40:   1%|          | 1/143 [00:09<22:02,  9.31s/it]
Training 19/40:   8%|▊         | 11/143 [00:09<01:21,  1.61it/s]
Training 19/40:  15%|█▍        | 21/143 [00:09<00:33,  3.69it/s]
Training 19/40:  24%|██▍       | 34/143 [00:09<00:14,  7.38it/s]
Training 19/40:  34%|███▎      | 48/143 [00:09<00:07, 12.69it/s]
Training 19/40:  47%|████▋     | 67/143 [00:09<00:03, 22.28it/s]
Training 19/40:  62%|██████▏   | 88/143 [00:09<00:01, 35.72it/s]
Training 19/40:  78%|███████▊  | 112/143 [00:10<00:00, 54.40it/s]
Training 19/40: 100%|██████████| 143/143 [00:10<00:00, 13.56it/s]


Epoch 18: train_loss=0.1441, val_loss=0.1390



Training 20/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/40:   1%|          | 1/143 [00:09<22:08,  9.36s/it]
Training 20/40:   8%|▊         | 12/143 [00:09<01:14,  1.76it/s]
Training 20/40:  15%|█▌        | 22/143 [00:09<00:31,  3.82it/s]
Training 20/40:  24%|██▍       | 34/143 [00:09<00:15,  7.19it/s]
Training 20/40:  34%|███▍      | 49/143 [00:09<00:07, 12.87it/s]
Training 20/40:  48%|████▊     | 68/143 [00:09<00:03, 22.36it/s]
Training 20/40:  62%|██████▏   | 89/143 [00:09<00:01, 35.68it/s]
Training 20/40: 100%|██████████| 143/143 [00:10<00:00, 13.54it/s]


Epoch 19: train_loss=0.1438, val_loss=0.1413



Training 21/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/40:   1%|          | 1/143 [00:09<22:06,  9.34s/it]
Training 21/40:   8%|▊         | 11/143 [00:09<01:21,  1.61it/s]
Training 21/40:  15%|█▌        | 22/143 [00:09<00:31,  3.89it/s]
Training 21/40:  24%|██▍       | 35/143 [00:09<00:14,  7.56it/s]
Training 21/40:  35%|███▍      | 50/143 [00:09<00:07, 13.25it/s]
Training 21/40:  49%|████▉     | 70/143 [00:09<00:03, 23.32it/s]
Training 21/40:  65%|██████▌   | 93/143 [00:09<00:01, 38.10it/s]
Training 21/40: 100%|██████████| 143/143 [00:10<00:00, 13.57it/s]


Epoch 20: train_loss=0.1414, val_loss=0.1365



Training 22/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/40:   1%|          | 1/143 [00:09<22:21,  9.45s/it]
Training 22/40:   7%|▋         | 10/143 [00:09<01:32,  1.44it/s]
Training 22/40:  13%|█▎        | 18/143 [00:09<00:40,  3.07it/s]
Training 22/40:  17%|█▋        | 25/143 [00:09<00:23,  5.00it/s]
Training 22/40:  24%|██▍       | 34/143 [00:09<00:13,  8.34it/s]
Training 22/40:  36%|███▋      | 52/143 [00:09<00:05, 17.46it/s]
Training 22/40:  50%|█████     | 72/143 [00:10<00:02, 30.31it/s]
Training 22/40:  68%|██████▊   | 97/143 [00:10<00:00, 50.26it/s]
Training 22/40: 100%|██████████| 143/143 [00:10<00:00, 13.32it/s]


Epoch 21: train_loss=0.1405, val_loss=0.1407



Training 23/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/40:   1%|          | 1/143 [00:09<21:46,  9.20s/it]
Training 23/40:   8%|▊         | 11/143 [00:09<01:20,  1.64it/s]
Training 23/40:  16%|█▌        | 23/143 [00:09<00:28,  4.16it/s]
Training 23/40:  24%|██▍       | 35/143 [00:09<00:14,  7.59it/s]
Training 23/40:  36%|███▌      | 51/143 [00:09<00:06, 13.77it/s]
Training 23/40:  49%|████▉     | 70/143 [00:09<00:03, 23.34it/s]
Training 23/40:  66%|██████▋   | 95/143 [00:09<00:01, 39.63it/s]
Training 23/40: 100%|██████████| 143/143 [00:10<00:00, 13.71it/s]


Epoch 22: train_loss=0.1422, val_loss=0.1345



Training 24/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/40:   1%|          | 1/143 [00:09<22:35,  9.55s/it]
Training 24/40:   6%|▌         | 8/143 [00:09<01:59,  1.13it/s]
Training 24/40:  13%|█▎        | 19/143 [00:09<00:36,  3.37it/s]
Training 24/40:  22%|██▏       | 31/143 [00:09<00:16,  6.69it/s]
Training 24/40:  33%|███▎      | 47/143 [00:09<00:07, 12.67it/s]
Training 24/40:  44%|████▍     | 63/143 [00:10<00:03, 20.44it/s]
Training 24/40:  59%|█████▊    | 84/143 [00:10<00:01, 33.68it/s]
Training 24/40:  73%|███████▎  | 104/143 [00:10<00:00, 48.71it/s]
Training 24/40: 100%|██████████| 143/143 [00:10<00:00, 13.19it/s]


Epoch 23: train_loss=0.1394, val_loss=0.1360



Training 25/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/40:   1%|          | 1/143 [00:09<22:13,  9.39s/it]
Training 25/40:   7%|▋         | 10/143 [00:09<01:31,  1.45it/s]
Training 25/40:  13%|█▎        | 19/143 [00:09<00:37,  3.30it/s]
Training 25/40:  23%|██▎       | 33/143 [00:09<00:15,  7.25it/s]
Training 25/40:  33%|███▎      | 47/143 [00:09<00:07, 12.49it/s]
Training 25/40:  47%|████▋     | 67/143 [00:09<00:03, 22.52it/s]
Training 25/40:  62%|██████▏   | 89/143 [00:10<00:01, 36.47it/s]
Training 25/40: 100%|██████████| 143/143 [00:10<00:00, 13.39it/s]


Epoch 24: train_loss=0.1384, val_loss=0.1322



Training 26/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/40:   1%|          | 1/143 [00:09<22:09,  9.36s/it]
Training 26/40:   8%|▊         | 11/143 [00:09<01:22,  1.60it/s]
Training 26/40:  16%|█▌        | 23/143 [00:09<00:29,  4.08it/s]
Training 26/40:  27%|██▋       | 39/143 [00:09<00:12,  8.62it/s]
Training 26/40:  38%|███▊      | 54/143 [00:09<00:06, 14.25it/s]
Training 26/40:  54%|█████▍    | 77/143 [00:09<00:02, 25.82it/s]
Training 26/40:  69%|██████▉   | 99/143 [00:09<00:01, 39.54it/s]
Training 26/40: 100%|██████████| 143/143 [00:10<00:00, 13.56it/s]


Epoch 25: train_loss=0.1382, val_loss=0.1312



Training 27/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/40:   1%|          | 1/143 [00:09<22:36,  9.56s/it]
Training 27/40:   8%|▊         | 11/143 [00:09<01:23,  1.58it/s]
Training 27/40:  17%|█▋        | 24/143 [00:09<00:28,  4.21it/s]
Training 27/40:  27%|██▋       | 38/143 [00:09<00:12,  8.08it/s]
Training 27/40:  38%|███▊      | 54/143 [00:09<00:06, 14.01it/s]
Training 27/40:  52%|█████▏    | 74/143 [00:10<00:02, 23.79it/s]
Training 27/40:  68%|██████▊   | 97/143 [00:10<00:01, 38.14it/s]
Training 27/40: 100%|██████████| 143/143 [00:10<00:00, 13.33it/s]


Epoch 26: train_loss=0.1360, val_loss=0.1343



Training 28/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/40:   1%|          | 1/143 [00:09<22:35,  9.54s/it]
Training 28/40:   6%|▋         | 9/143 [00:09<01:44,  1.28it/s]
Training 28/40:  12%|█▏        | 17/143 [00:09<00:43,  2.90it/s]
Training 28/40:  19%|█▉        | 27/143 [00:09<00:20,  5.67it/s]
Training 28/40:  28%|██▊       | 40/143 [00:09<00:09, 10.53it/s]
Training 28/40:  40%|███▉      | 57/143 [00:10<00:04, 18.91it/s]
Training 28/40:  55%|█████▌    | 79/143 [00:10<00:01, 32.94it/s]
Training 28/40:  71%|███████   | 101/143 [00:10<00:00, 49.86it/s]
Training 28/40: 100%|██████████| 143/143 [00:10<00:00, 13.18it/s]


Epoch 27: train_loss=0.1335, val_loss=0.1298



Training 29/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/40:   1%|          | 1/143 [00:09<22:31,  9.52s/it]
Training 29/40:   9%|▉         | 13/143 [00:09<01:09,  1.88it/s]
Training 29/40:  16%|█▌        | 23/143 [00:09<00:30,  3.90it/s]
Training 29/40: 100%|██████████| 143/143 [00:10<00:00, 13.91it/s]


Epoch 28: train_loss=0.1342, val_loss=0.1480



Training 30/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/40:   1%|          | 1/143 [00:09<21:52,  9.25s/it]
Training 30/40:   7%|▋         | 10/143 [00:09<01:30,  1.48it/s]
Training 30/40:  16%|█▌        | 23/143 [00:09<00:28,  4.20it/s]
Training 30/40:  30%|███       | 43/143 [00:09<00:10,  9.96it/s]
Training 30/40: 100%|██████████| 143/143 [00:10<00:00, 14.19it/s]


Epoch 29: train_loss=0.1339, val_loss=0.1360



Training 31/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 31/40:   1%|          | 1/143 [00:09<22:09,  9.36s/it]
Training 31/40:   6%|▋         | 9/143 [00:09<01:42,  1.31it/s]
Training 31/40:  14%|█▍        | 20/143 [00:09<00:34,  3.58it/s]
Training 31/40:  46%|████▌     | 66/143 [00:09<00:04, 16.85it/s]
Training 31/40: 100%|██████████| 143/143 [00:10<00:00, 14.06it/s]


Epoch 30: train_loss=0.1348, val_loss=0.1345



Training 32/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 32/40:   1%|          | 1/143 [00:09<22:03,  9.32s/it]
Training 32/40:   8%|▊         | 11/143 [00:09<01:21,  1.62it/s]
Training 32/40:  15%|█▌        | 22/143 [00:09<00:31,  3.90it/s]
Training 32/40:  24%|██▍       | 35/143 [00:09<00:14,  7.58it/s]
Training 32/40:  38%|███▊      | 54/143 [00:09<00:05, 14.84it/s]
Training 32/40:  53%|█████▎    | 76/143 [00:09<00:02, 25.86it/s]
Training 32/40:  69%|██████▊   | 98/143 [00:09<00:01, 39.66it/s]
Training 32/40: 100%|██████████| 143/143 [00:10<00:00, 13.68it/s]


Epoch 31: train_loss=0.1335, val_loss=0.1314



Training 33/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 33/40:   1%|          | 1/143 [00:09<22:18,  9.43s/it]
Training 33/40:   8%|▊         | 11/143 [00:09<01:22,  1.60it/s]
Training 33/40:  13%|█▎        | 19/143 [00:09<00:38,  3.22it/s]
Training 33/40:  20%|██        | 29/143 [00:09<00:18,  6.00it/s]
Training 33/40:  28%|██▊       | 40/143 [00:09<00:10, 10.09it/s]
Training 33/40:  37%|███▋      | 53/143 [00:09<00:05, 16.45it/s]
Training 33/40:  51%|█████     | 73/143 [00:10<00:02, 29.41it/s]
Training 33/40:  66%|██████▌   | 94/143 [00:10<00:01, 45.94it/s]
Training 33/40: 100%|██████████| 143/143 [00:10<00:00, 13.30it/s]


Epoch 32: train_loss=0.1291, val_loss=0.1247



Training 34/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 34/40:   1%|          | 1/143 [00:09<22:19,  9.43s/it]
Training 34/40:   7%|▋         | 10/143 [00:09<01:32,  1.44it/s]
Training 34/40:  13%|█▎        | 19/143 [00:09<00:37,  3.29it/s]
Training 34/40:  19%|█▉        | 27/143 [00:09<00:21,  5.50it/s]
Training 34/40:  25%|██▌       | 36/143 [00:09<00:12,  8.83it/s]
Training 34/40:  36%|███▋      | 52/143 [00:09<00:05, 16.86it/s]
Training 34/40:  49%|████▉     | 70/143 [00:10<00:02, 28.39it/s]
Training 34/40:  65%|██████▌   | 93/143 [00:10<00:01, 46.81it/s]
Training 34/40: 100%|██████████| 143/143 [00:10<00:00, 13.29it/s]


Epoch 33: train_loss=0.1289, val_loss=0.1328



Training 35/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 35/40:   1%|          | 1/143 [00:09<22:26,  9.48s/it]
Training 35/40:   8%|▊         | 12/143 [00:09<01:15,  1.74it/s]
Training 35/40:  17%|█▋        | 24/143 [00:09<00:28,  4.19it/s]
Training 35/40:  36%|███▌      | 51/143 [00:09<00:07, 11.84it/s]
Training 35/40: 100%|██████████| 143/143 [00:10<00:00, 13.88it/s]


Epoch 34: train_loss=0.1323, val_loss=0.1246



Training 36/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 36/40:   1%|          | 1/143 [00:09<21:42,  9.17s/it]
Training 36/40:   8%|▊         | 12/143 [00:09<01:13,  1.79it/s]
Training 36/40:  16%|█▌        | 23/143 [00:09<00:29,  4.11it/s]
Training 36/40:  33%|███▎      | 47/143 [00:09<00:08, 11.11it/s]
Training 36/40: 100%|██████████| 143/143 [00:10<00:00, 14.29it/s]


Epoch 35: train_loss=0.1315, val_loss=0.1256



Training 37/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 37/40:   1%|          | 1/143 [00:09<21:42,  9.17s/it]
Training 37/40:   7%|▋         | 10/143 [00:09<01:29,  1.49it/s]
Training 37/40:  15%|█▌        | 22/143 [00:09<00:30,  4.02it/s]
Training 37/40:  23%|██▎       | 33/143 [00:09<00:15,  7.16it/s]
Training 37/40:  35%|███▍      | 50/143 [00:09<00:06, 13.76it/s]
Training 37/40:  50%|████▉     | 71/143 [00:09<00:02, 24.46it/s]
Training 37/40:  68%|██████▊   | 97/143 [00:09<00:01, 41.34it/s]
Training 37/40: 100%|██████████| 143/143 [00:10<00:00, 13.82it/s]


Epoch 36: train_loss=0.1277, val_loss=0.1431



Training 38/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 38/40:   1%|          | 1/143 [00:09<21:42,  9.17s/it]
Training 38/40:   7%|▋         | 10/143 [00:09<01:29,  1.49it/s]
Training 38/40:  15%|█▌        | 22/143 [00:09<00:30,  4.03it/s]
Training 38/40:  24%|██▍       | 35/143 [00:09<00:13,  7.77it/s]
Training 38/40:  39%|███▉      | 56/143 [00:09<00:05, 15.96it/s]
Training 38/40:  52%|█████▏    | 75/143 [00:09<00:02, 25.49it/s]
Training 38/40:  71%|███████   | 101/143 [00:09<00:00, 42.40it/s]
Training 38/40: 100%|██████████| 143/143 [00:10<00:00, 13.79it/s]


Epoch 37: train_loss=0.1276, val_loss=0.1232



Training 39/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 39/40:   1%|          | 1/143 [00:09<22:06,  9.34s/it]
Training 39/40:   7%|▋         | 10/143 [00:09<01:31,  1.46it/s]
Training 39/40:  14%|█▍        | 20/143 [00:09<00:34,  3.53it/s]
Training 39/40: 100%|██████████| 143/143 [00:10<00:00, 14.15it/s]


Epoch 38: train_loss=0.1298, val_loss=0.1237



Training 40/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 40/40:   1%|          | 1/143 [00:09<22:11,  9.38s/it]
Training 40/40:   8%|▊         | 12/143 [00:09<01:14,  1.75it/s]
Training 40/40:  16%|█▌        | 23/143 [00:09<00:29,  4.01it/s]
Training 40/40:  24%|██▍       | 34/143 [00:09<00:15,  7.08it/s]
Training 40/40:  36%|███▌      | 51/143 [00:09<00:06, 13.53it/s]
Training 40/40:  48%|████▊     | 68/143 [00:09<00:03, 21.88it/s]
Training 40/40:  64%|██████▍   | 92/143 [00:10<00:01, 37.29it/s]
Training 40/40: 100%|██████████| 143/143 [00:10<00:00, 13.50it/s]


Epoch 39: train_loss=0.1346, val_loss=0.1298


2025-06-02 10:01:25,684 - __main__ - INFO - Saved mlp probe for layer 8 to cache\probes\phase1_ijepa_viewpoint_probing\mlp_layer_8_probe.pth
 67%|██████▋   | 4/6 [3:00:59<1:30:34, 2717.32s/it]2025-06-02 10:01:45,176 - __main__ - INFO - Processing layer 10...
2025-06-02 10:01:45,177 - src.probing.data_preprocessing - INFO - Extracting features from 1149 batches...


Extracting features:   0%|          | 1/1149 [00:07<2:25:08,  7.59s/it]
Extracting features:   0%|          | 2/1149 [00:08<1:08:29,  3.58s/it]
Extracting features:   0%|          | 3/1149 [00:09<43:48,  2.29s/it]  
Extracting features:   0%|          | 4/1149 [00:09<32:10,  1.69s/it]
Extracting features:   0%|          | 5/1149 [00:10<25:40,  1.35s/it]
Extracting features:   1%|          | 6/1149 [00:11<21:45,  1.14s/it]
Extracting features:   1%|          | 7/1149 [00:12<19:14,  1.01s/it]
Extracting features:   1%|          | 8/1149 [00:12<17:36,  1.08it/s]
Extracting features:   1%|          | 9/1149 [00:13<16:31,  1.15it

Epoch 0: train_loss=0.2321, val_loss=0.1701



Training 2/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/30:   1%|          | 1/143 [00:09<21:43,  9.18s/it]
Training 2/30:  13%|█▎        | 18/143 [00:09<00:46,  2.71it/s]
Training 2/30:  24%|██▍       | 35/143 [00:09<00:17,  6.29it/s]
Training 2/30:  39%|███▉      | 56/143 [00:09<00:07, 12.33it/s]
Training 2/30:  57%|█████▋    | 81/143 [00:09<00:02, 21.98it/s]
Training 2/30: 100%|██████████| 143/143 [00:10<00:00, 14.10it/s]


Epoch 1: train_loss=0.1643, val_loss=0.1570



Training 3/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/30:   1%|          | 1/143 [00:08<21:15,  8.98s/it]
Training 3/30:  13%|█▎        | 19/143 [00:09<00:42,  2.93it/s]
Training 3/30:  27%|██▋       | 38/143 [00:09<00:14,  7.01it/s]
Training 3/30:  42%|████▏     | 60/143 [00:09<00:06, 13.44it/s]
Training 3/30:  60%|██████    | 86/143 [00:09<00:02, 23.63it/s]
Training 3/30: 100%|██████████| 143/143 [00:09<00:00, 14.36it/s]


Epoch 2: train_loss=0.1600, val_loss=0.1618



Training 4/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/30:   1%|          | 1/143 [00:08<21:10,  8.95s/it]
Training 4/30:  13%|█▎        | 19/143 [00:09<00:42,  2.94it/s]
Training 4/30:  25%|██▌       | 36/143 [00:09<00:16,  6.60it/s]
Training 4/30:  42%|████▏     | 60/143 [00:09<00:06, 13.68it/s]
Training 4/30:  63%|██████▎   | 90/143 [00:09<00:02, 25.56it/s]
Training 4/30: 100%|██████████| 143/143 [00:09<00:00, 14.43it/s]


Epoch 3: train_loss=0.1544, val_loss=0.1522



Training 5/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/30:   1%|          | 1/143 [00:09<21:42,  9.17s/it]
Training 5/30:  14%|█▍        | 20/143 [00:09<00:40,  3.02it/s]
Training 5/30:  29%|██▉       | 42/143 [00:09<00:13,  7.67it/s]
Training 5/30:  43%|████▎     | 62/143 [00:09<00:06, 13.38it/s]
Training 5/30:  64%|██████▎   | 91/143 [00:09<00:02, 24.59it/s]
Training 5/30: 100%|██████████| 143/143 [00:10<00:00, 14.09it/s]


Epoch 4: train_loss=0.1526, val_loss=0.1469



Training 6/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/30:   1%|          | 1/143 [00:08<20:56,  8.85s/it]
Training 6/30:  13%|█▎        | 19/143 [00:08<00:41,  2.97it/s]
Training 6/30:  29%|██▉       | 42/143 [00:09<00:12,  8.00it/s]
Training 6/30:  43%|████▎     | 62/143 [00:09<00:05, 13.88it/s]
Training 6/30:  64%|██████▎   | 91/143 [00:09<00:02, 25.46it/s]
Training 6/30: 100%|██████████| 143/143 [00:09<00:00, 14.50it/s]


Epoch 5: train_loss=0.1495, val_loss=0.1455



Training 7/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/30:   1%|          | 1/143 [00:09<22:54,  9.68s/it]
Training 7/30:  13%|█▎        | 18/143 [00:09<00:48,  2.57it/s]
Training 7/30:  27%|██▋       | 39/143 [00:09<00:15,  6.79it/s]
Training 7/30:  42%|████▏     | 60/143 [00:09<00:06, 12.51it/s]
Training 7/30:  60%|██████    | 86/143 [00:10<00:02, 22.07it/s]
Training 7/30: 100%|██████████| 143/143 [00:10<00:00, 13.42it/s]


Epoch 6: train_loss=0.1472, val_loss=0.1414



Training 8/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/30:   1%|          | 1/143 [00:09<21:38,  9.15s/it]
Training 8/30:  13%|█▎        | 19/143 [00:09<00:43,  2.87it/s]
Training 8/30:  29%|██▊       | 41/143 [00:09<00:13,  7.54it/s]
Training 8/30:  44%|████▍     | 63/143 [00:09<00:05, 13.85it/s]
Training 8/30:  70%|██████▉   | 100/143 [00:09<00:01, 28.35it/s]
Training 8/30: 100%|██████████| 143/143 [00:10<00:00, 14.21it/s]


Epoch 7: train_loss=0.1433, val_loss=0.1403



Training 9/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/30:   1%|          | 1/143 [00:09<22:01,  9.31s/it]
Training 9/30:  15%|█▍        | 21/143 [00:09<00:39,  3.13it/s]
Training 9/30:  31%|███       | 44/143 [00:09<00:12,  7.92it/s]
Training 9/30:  47%|████▋     | 67/143 [00:09<00:05, 14.41it/s]
Training 9/30:  66%|██████▌   | 94/143 [00:09<00:01, 24.64it/s]
Training 9/30: 100%|██████████| 143/143 [00:10<00:00, 13.95it/s]


Epoch 8: train_loss=0.1410, val_loss=0.1395



Training 10/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/30:   1%|          | 1/143 [00:09<22:29,  9.50s/it]
Training 10/30:  12%|█▏        | 17/143 [00:09<00:50,  2.47it/s]
Training 10/30:  24%|██▍       | 34/143 [00:09<00:18,  5.93it/s]
Training 10/30:  41%|████      | 58/143 [00:09<00:06, 12.64it/s]
Training 10/30:  60%|██████    | 86/143 [00:09<00:02, 23.15it/s]
Training 10/30: 100%|██████████| 143/143 [00:10<00:00, 13.67it/s]


Epoch 9: train_loss=0.1422, val_loss=0.1398



Training 11/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/30:   1%|          | 1/143 [00:09<21:22,  9.03s/it]
Training 11/30:  13%|█▎        | 19/143 [00:09<00:42,  2.91it/s]
Training 11/30:  26%|██▌       | 37/143 [00:09<00:15,  6.76it/s]
Training 11/30:  41%|████      | 58/143 [00:09<00:06, 12.88it/s]
Training 11/30:  60%|██████    | 86/143 [00:09<00:02, 23.90it/s]
Training 11/30: 100%|██████████| 143/143 [00:09<00:00, 14.31it/s]


Epoch 10: train_loss=0.1395, val_loss=0.1518



Training 12/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/30:   1%|          | 1/143 [00:09<21:52,  9.25s/it]
Training 12/30:  15%|█▍        | 21/143 [00:09<00:38,  3.15it/s]
Training 12/30:  32%|███▏      | 46/143 [00:09<00:11,  8.40it/s]
Training 12/30:  48%|████▊     | 69/143 [00:09<00:04, 14.92it/s]
Training 12/30:  69%|██████▊   | 98/143 [00:09<00:01, 26.02it/s]
Training 12/30: 100%|██████████| 143/143 [00:10<00:00, 14.02it/s]


Epoch 11: train_loss=0.1382, val_loss=0.1443



Training 13/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/30:   1%|          | 1/143 [00:08<21:15,  8.98s/it]
Training 13/30:  13%|█▎        | 19/143 [00:09<00:42,  2.93it/s]
Training 13/30:  29%|██▊       | 41/143 [00:09<00:13,  7.67it/s]
Training 13/30:  43%|████▎     | 62/143 [00:09<00:05, 13.79it/s]
Training 13/30:  64%|██████▍   | 92/143 [00:09<00:01, 25.63it/s]
Training 13/30: 100%|██████████| 143/143 [00:09<00:00, 14.39it/s]


Epoch 12: train_loss=0.1367, val_loss=0.1356



Training 14/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/30:   1%|          | 1/143 [00:08<21:17,  9.00s/it]
Training 14/30:  11%|█         | 16/143 [00:09<00:51,  2.45it/s]
Training 14/30:  27%|██▋       | 39/143 [00:09<00:14,  7.41it/s]
Training 14/30:  43%|████▎     | 62/143 [00:09<00:05, 14.14it/s]
Training 14/30:  64%|██████▎   | 91/143 [00:09<00:02, 25.53it/s]
Training 14/30: 100%|██████████| 143/143 [00:09<00:00, 14.40it/s]


Epoch 13: train_loss=0.1338, val_loss=0.1323



Training 15/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/30:   1%|          | 1/143 [00:08<20:53,  8.83s/it]
Training 15/30:  13%|█▎        | 18/143 [00:08<00:44,  2.82it/s]
Training 15/30:  27%|██▋       | 38/143 [00:09<00:14,  7.20it/s]
Training 15/30:  46%|████▌     | 66/143 [00:09<00:04, 15.59it/s]
Training 15/30:  66%|██████▋   | 95/143 [00:09<00:01, 27.11it/s]
Training 15/30: 100%|██████████| 143/143 [00:09<00:00, 14.66it/s]


Epoch 14: train_loss=0.1376, val_loss=0.1347



Training 16/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/30:   1%|          | 1/143 [00:08<21:15,  8.98s/it]
Training 16/30:  12%|█▏        | 17/143 [00:09<00:48,  2.61it/s]
Training 16/30:  28%|██▊       | 40/143 [00:09<00:13,  7.58it/s]
Training 16/30:  49%|████▉     | 70/143 [00:09<00:04, 16.43it/s]
Training 16/30:  68%|██████▊   | 97/143 [00:09<00:01, 26.94it/s]
Training 16/30: 100%|██████████| 143/143 [00:09<00:00, 14.48it/s]


Epoch 15: train_loss=0.1358, val_loss=0.1336



Training 17/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/30:   1%|          | 1/143 [00:08<21:09,  8.94s/it]
Training 17/30:  13%|█▎        | 19/143 [00:09<00:42,  2.94it/s]
Training 17/30:  27%|██▋       | 38/143 [00:09<00:14,  7.05it/s]
Training 17/30:  47%|████▋     | 67/143 [00:09<00:04, 15.64it/s]
Training 17/30:  67%|██████▋   | 96/143 [00:09<00:01, 27.05it/s]
Training 17/30: 100%|██████████| 143/143 [00:09<00:00, 14.51it/s]


Epoch 16: train_loss=0.1313, val_loss=0.1305



Training 18/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/30:   1%|          | 1/143 [00:09<21:25,  9.05s/it]
Training 18/30:  12%|█▏        | 17/143 [00:09<00:48,  2.59it/s]
Training 18/30:  27%|██▋       | 38/143 [00:09<00:14,  7.09it/s]
Training 18/30:  41%|████▏     | 59/143 [00:09<00:06, 13.15it/s]
Training 18/30:  63%|██████▎   | 90/143 [00:09<00:02, 25.32it/s]
Training 18/30: 100%|██████████| 143/143 [00:09<00:00, 14.32it/s]


Epoch 17: train_loss=0.1320, val_loss=0.1324



Training 19/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/30:   1%|          | 1/143 [00:08<21:06,  8.92s/it]
Training 19/30:  13%|█▎        | 18/143 [00:09<00:44,  2.79it/s]
Training 19/30:  27%|██▋       | 38/143 [00:09<00:14,  7.13it/s]
Training 19/30:  40%|███▉      | 57/143 [00:09<00:06, 12.69it/s]
Training 19/30:  62%|██████▏   | 89/143 [00:09<00:02, 25.45it/s]
Training 19/30: 100%|██████████| 143/143 [00:09<00:00, 14.52it/s]


Epoch 18: train_loss=0.1302, val_loss=0.1306



Training 20/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/30:   1%|          | 1/143 [00:08<20:58,  8.86s/it]
Training 20/30:  13%|█▎        | 18/143 [00:08<00:44,  2.81it/s]
Training 20/30:  24%|██▍       | 34/143 [00:09<00:17,  6.29it/s]
Training 20/30:  40%|███▉      | 57/143 [00:09<00:06, 13.15it/s]
Training 20/30:  59%|█████▊    | 84/143 [00:09<00:02, 23.93it/s]
Training 20/30: 100%|██████████| 143/143 [00:09<00:00, 14.56it/s]


Epoch 19: train_loss=0.1311, val_loss=0.1295



Training 21/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/30:   1%|          | 1/143 [00:08<21:11,  8.95s/it]
Training 21/30:  13%|█▎        | 19/143 [00:09<00:42,  2.94it/s]
Training 21/30:  31%|███       | 44/143 [00:09<00:11,  8.34it/s]
Training 21/30:  48%|████▊     | 68/143 [00:09<00:04, 15.36it/s]
Training 21/30:  67%|██████▋   | 96/143 [00:09<00:01, 26.31it/s]
Training 21/30: 100%|██████████| 143/143 [00:09<00:00, 14.47it/s]


Epoch 20: train_loss=0.1332, val_loss=0.1383



Training 22/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/30:   1%|          | 1/143 [00:08<21:00,  8.88s/it]
Training 22/30:  13%|█▎        | 19/143 [00:08<00:41,  2.96it/s]
Training 22/30:  30%|███       | 43/143 [00:09<00:12,  8.20it/s]
Training 22/30:  46%|████▌     | 66/143 [00:09<00:05, 14.98it/s]
Training 22/30:  67%|██████▋   | 96/143 [00:09<00:01, 26.90it/s]
Training 22/30: 100%|██████████| 143/143 [00:09<00:00, 14.61it/s]


Epoch 21: train_loss=0.1303, val_loss=0.1299



Training 23/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/30:   1%|          | 1/143 [00:09<21:47,  9.21s/it]
Training 23/30:  11%|█         | 16/143 [00:09<00:52,  2.40it/s]
Training 23/30:  27%|██▋       | 39/143 [00:09<00:14,  7.26it/s]
Training 23/30:  45%|████▍     | 64/143 [00:09<00:05, 14.43it/s]
Training 23/30:  64%|██████▍   | 92/143 [00:09<00:02, 25.19it/s]
Training 23/30: 100%|██████████| 143/143 [00:10<00:00, 14.07it/s]


Epoch 22: train_loss=0.1294, val_loss=0.1288



Training 24/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/30:   1%|          | 1/143 [00:08<20:53,  8.83s/it]
Training 24/30:  11%|█         | 16/143 [00:08<00:50,  2.50it/s]
Training 24/30:  25%|██▌       | 36/143 [00:09<00:15,  6.87it/s]
Training 24/30:  46%|████▌     | 66/143 [00:09<00:04, 15.88it/s]
Training 24/30:  66%|██████▋   | 95/143 [00:09<00:01, 27.38it/s]
Training 24/30: 100%|██████████| 143/143 [00:09<00:00, 14.61it/s]


Epoch 23: train_loss=0.1289, val_loss=0.1295



Training 25/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/30:   1%|          | 1/143 [00:09<22:32,  9.52s/it]
Training 25/30:   9%|▉         | 13/143 [00:09<01:09,  1.88it/s]
Training 25/30:  24%|██▍       | 34/143 [00:09<00:17,  6.17it/s]
Training 25/30:  43%|████▎     | 62/143 [00:09<00:05, 14.01it/s]
Training 25/30:  67%|██████▋   | 96/143 [00:09<00:01, 26.79it/s]
Training 25/30: 100%|██████████| 143/143 [00:10<00:00, 13.69it/s]


Epoch 24: train_loss=0.1286, val_loss=0.1261



Training 26/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/30:   1%|          | 1/143 [00:09<22:06,  9.34s/it]
Training 26/30:   9%|▉         | 13/143 [00:09<01:07,  1.91it/s]
Training 26/30:  22%|██▏       | 32/143 [00:09<00:18,  5.87it/s]
Training 26/30:  41%|████▏     | 59/143 [00:09<00:06, 13.57it/s]
Training 26/30:  60%|██████    | 86/143 [00:09<00:02, 23.82it/s]
Training 26/30: 100%|██████████| 143/143 [00:10<00:00, 13.92it/s]


Epoch 25: train_loss=0.1279, val_loss=0.1351



Training 27/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/30:   1%|          | 1/143 [00:08<20:50,  8.81s/it]
Training 27/30:  13%|█▎        | 18/143 [00:08<00:44,  2.82it/s]
Training 27/30:  27%|██▋       | 39/143 [00:09<00:14,  7.42it/s]
Training 27/30:  43%|████▎     | 62/143 [00:09<00:05, 14.26it/s]
Training 27/30:  62%|██████▏   | 89/143 [00:09<00:02, 25.00it/s]
Training 27/30: 100%|██████████| 143/143 [00:09<00:00, 14.65it/s]


Epoch 26: train_loss=0.1261, val_loss=0.1270



Training 28/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/30:   1%|          | 1/143 [00:08<20:55,  8.84s/it]
Training 28/30:  11%|█         | 16/143 [00:08<00:50,  2.50it/s]
Training 28/30:  25%|██▌       | 36/143 [00:09<00:15,  6.87it/s]
Training 28/30:  41%|████      | 58/143 [00:09<00:06, 13.41it/s]
Training 28/30:  62%|██████▏   | 88/143 [00:09<00:02, 25.44it/s]
Training 28/30: 100%|██████████| 143/143 [00:09<00:00, 14.59it/s]


Epoch 27: train_loss=0.1276, val_loss=0.1487



Training 29/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/30:   1%|          | 1/143 [00:08<21:02,  8.89s/it]
Training 29/30:  13%|█▎        | 19/143 [00:08<00:41,  2.95it/s]
Training 29/30:  31%|███▏      | 45/143 [00:09<00:11,  8.62it/s]
Training 29/30:  49%|████▉     | 70/143 [00:09<00:04, 15.99it/s]
Training 29/30:  71%|███████   | 101/143 [00:09<00:01, 28.27it/s]
Training 29/30: 100%|██████████| 143/143 [00:09<00:00, 14.58it/s]


Epoch 28: train_loss=0.1270, val_loss=0.1265



Training 30/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/30:   1%|          | 1/143 [00:08<20:56,  8.85s/it]
Training 30/30:  13%|█▎        | 19/143 [00:08<00:41,  2.97it/s]
Training 30/30:  29%|██▊       | 41/143 [00:09<00:13,  7.77it/s]
Training 30/30:  45%|████▌     | 65/143 [00:09<00:05, 14.88it/s]
Training 30/30:  62%|██████▏   | 89/143 [00:09<00:02, 24.31it/s]
Training 30/30: 100%|██████████| 143/143 [00:09<00:00, 14.60it/s]


Epoch 29: train_loss=0.1269, val_loss=0.1271


2025-06-02 10:32:37,269 - __main__ - INFO - Saved linear probe for layer 10 to cache\probes\phase1_ijepa_viewpoint_probing\linear_layer_10_probe.pth
2025-06-02 10:32:56,228 - __main__ - INFO - Running mlp probe on layer 10...
2025-06-02 10:32:56,229 - __main__ - INFO - Running mlp probe on layer 10 (feature_dim: 1280)

Training 1/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 1/40:   1%|          | 1/143 [00:08<20:54,  8.84s/it]
Training 1/40:   7%|▋         | 10/143 [00:08<01:26,  1.54it/s]
Training 1/40:  15%|█▍        | 21/143 [00:09<00:30,  3.94it/s]
Training 1/40:  39%|███▉      | 56/143 [00:09<00:05, 14.55it/s]
Training 1/40: 100%|██████████| 143/143 [00:09<00:00, 14.82it/s]


Epoch 0: train_loss=0.4752, val_loss=0.1677



Training 2/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/40:   1%|          | 1/143 [00:08<20:58,  8.86s/it]
Training 2/40:   8%|▊         | 12/143 [00:08<01:10,  1.86it/s]
Training 2/40:  16%|█▌        | 23/143 [00:09<00:28,  4.25it/s]
Training 2/40:  24%|██▍       | 35/143 [00:09<00:13,  7.79it/s]
Training 2/40: 100%|██████████| 143/143 [00:09<00:00, 14.77it/s]


Epoch 1: train_loss=0.1622, val_loss=0.1592



Training 3/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/40:   1%|          | 1/143 [00:08<21:05,  8.91s/it]
Training 3/40:   7%|▋         | 10/143 [00:09<01:27,  1.53it/s]
Training 3/40:  14%|█▍        | 20/143 [00:09<00:33,  3.69it/s]
Training 3/40:  22%|██▏       | 32/143 [00:09<00:15,  7.24it/s]
Training 3/40:  38%|███▊      | 55/143 [00:09<00:05, 16.48it/s]
Training 3/40:  55%|█████▌    | 79/143 [00:09<00:02, 28.96it/s]
Training 3/40:  71%|███████▏  | 102/143 [00:09<00:00, 43.73it/s]
Training 3/40: 100%|██████████| 143/143 [00:10<00:00, 14.23it/s]


Epoch 2: train_loss=0.1599, val_loss=0.1572



Training 4/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/40:   1%|          | 1/143 [00:09<21:42,  9.18s/it]
Training 4/40:  10%|▉         | 14/143 [00:09<01:01,  2.10it/s]
Training 4/40:  17%|█▋        | 25/143 [00:09<00:26,  4.41it/s]
Training 4/40: 100%|██████████| 143/143 [00:09<00:00, 14.42it/s]


Epoch 3: train_loss=0.1581, val_loss=0.1565



Training 5/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/40:   1%|          | 1/143 [00:08<20:48,  8.79s/it]
Training 5/40:   8%|▊         | 11/143 [00:08<01:17,  1.71it/s]
Training 5/40:  16%|█▌        | 23/143 [00:08<00:27,  4.35it/s]
Training 5/40:  28%|██▊       | 40/143 [00:09<00:10,  9.44it/s]
Training 5/40:  42%|████▏     | 60/143 [00:09<00:04, 17.45it/s]
Training 5/40:  59%|█████▊    | 84/143 [00:09<00:01, 30.04it/s]
Training 5/40: 100%|██████████| 143/143 [00:09<00:00, 14.58it/s]


Epoch 4: train_loss=0.1547, val_loss=0.1528



Training 6/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/40:   1%|          | 1/143 [00:08<21:12,  8.96s/it]
Training 6/40:   7%|▋         | 10/143 [00:09<01:27,  1.52it/s]
Training 6/40:  17%|█▋        | 24/143 [00:09<00:26,  4.55it/s]
Training 6/40:  26%|██▌       | 37/143 [00:09<00:12,  8.33it/s]
Training 6/40:  38%|███▊      | 55/143 [00:09<00:05, 15.41it/s]
Training 6/40:  52%|█████▏    | 75/143 [00:09<00:02, 25.67it/s]
Training 6/40:  72%|███████▏  | 103/143 [00:09<00:00, 44.36it/s]
Training 6/40: 100%|██████████| 143/143 [00:10<00:00, 14.11it/s]


Epoch 5: train_loss=0.1530, val_loss=0.1501



Training 7/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/40:   1%|          | 1/143 [00:08<20:50,  8.80s/it]
Training 7/40:   8%|▊         | 12/143 [00:08<01:10,  1.87it/s]
Training 7/40:  16%|█▌        | 23/143 [00:09<00:28,  4.27it/s]
Training 7/40:  28%|██▊       | 40/143 [00:09<00:10,  9.38it/s]
Training 7/40:  41%|████▏     | 59/143 [00:09<00:04, 17.00it/s]
Training 7/40:  57%|█████▋    | 81/143 [00:09<00:02, 28.47it/s]
Training 7/40:  75%|███████▍  | 107/143 [00:09<00:00, 45.72it/s]
Training 7/40: 100%|██████████| 143/143 [00:09<00:00, 14.39it/s]


Epoch 6: train_loss=0.1474, val_loss=0.1597



Training 8/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/40:   1%|          | 1/143 [00:08<20:47,  8.79s/it]
Training 8/40:   8%|▊         | 11/143 [00:08<01:17,  1.71it/s]
Training 8/40:  15%|█▌        | 22/143 [00:08<00:29,  4.12it/s]
Training 8/40: 100%|██████████| 143/143 [00:09<00:00, 14.99it/s]


Epoch 7: train_loss=0.1474, val_loss=0.1426



Training 9/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/40:   1%|          | 1/143 [00:09<21:18,  9.00s/it]
Training 9/40:   7%|▋         | 10/143 [00:09<01:27,  1.51it/s]
Training 9/40:  13%|█▎        | 19/143 [00:09<00:36,  3.44it/s]
Training 9/40:  40%|███▉      | 57/143 [00:09<00:05, 14.82it/s]
Training 9/40: 100%|██████████| 143/143 [00:09<00:00, 14.60it/s]


Epoch 8: train_loss=0.1428, val_loss=0.1426



Training 10/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/40:   1%|          | 1/143 [00:08<21:09,  8.94s/it]
Training 10/40:   8%|▊         | 11/143 [00:09<01:18,  1.68it/s]
Training 10/40:  16%|█▌        | 23/143 [00:09<00:28,  4.28it/s]
Training 10/40:  24%|██▍       | 35/143 [00:09<00:13,  7.79it/s]
Training 10/40: 100%|██████████| 143/143 [00:09<00:00, 14.66it/s]


Epoch 9: train_loss=0.1388, val_loss=0.1407



Training 11/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/40:   1%|          | 1/143 [00:09<21:23,  9.04s/it]
Training 11/40:   8%|▊         | 11/143 [00:09<01:19,  1.66it/s]
Training 11/40:  15%|█▍        | 21/143 [00:09<00:32,  3.80it/s]
Training 11/40:  34%|███▎      | 48/143 [00:09<00:08, 11.80it/s]
Training 11/40: 100%|██████████| 143/143 [00:09<00:00, 14.53it/s]


Epoch 10: train_loss=0.1411, val_loss=0.1422



Training 12/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/40:   1%|          | 1/143 [00:08<20:48,  8.79s/it]
Training 12/40:   7%|▋         | 10/143 [00:08<01:25,  1.55it/s]
Training 12/40:  15%|█▌        | 22/143 [00:09<00:28,  4.19it/s]
Training 12/40:  24%|██▍       | 34/143 [00:09<00:14,  7.75it/s]
Training 12/40:  38%|███▊      | 54/143 [00:09<00:05, 15.82it/s]
Training 12/40:  54%|█████▍    | 77/143 [00:09<00:02, 27.95it/s]
Training 12/40:  70%|██████▉   | 100/143 [00:09<00:01, 42.97it/s]
Training 12/40: 100%|██████████| 143/143 [00:09<00:00, 14.39it/s]


Epoch 11: train_loss=0.1391, val_loss=0.1348



Training 13/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/40:   1%|          | 1/143 [00:09<21:49,  9.22s/it]
Training 13/40:   8%|▊         | 11/143 [00:09<01:20,  1.63it/s]
Training 13/40:  16%|█▌        | 23/143 [00:09<00:28,  4.15it/s]
Training 13/40:  26%|██▌       | 37/143 [00:09<00:13,  8.15it/s]
Training 13/40:  39%|███▉      | 56/143 [00:09<00:05, 15.47it/s]
Training 13/40:  56%|█████▌    | 80/143 [00:09<00:02, 27.62it/s]
Training 13/40:  71%|███████▏  | 102/143 [00:09<00:00, 41.43it/s]
Training 13/40: 100%|██████████| 143/143 [00:10<00:00, 13.74it/s]


Epoch 12: train_loss=0.1332, val_loss=0.1344



Training 14/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/40:   1%|          | 1/143 [00:08<21:15,  8.98s/it]
Training 14/40:   8%|▊         | 11/143 [00:09<01:18,  1.67it/s]
Training 14/40:  17%|█▋        | 24/143 [00:09<00:26,  4.48it/s]
Training 14/40:  27%|██▋       | 38/143 [00:09<00:12,  8.58it/s]
Training 14/40: 100%|██████████| 143/143 [00:09<00:00, 14.57it/s]


Epoch 13: train_loss=0.1288, val_loss=0.1249



Training 15/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/40:   1%|          | 1/143 [00:08<21:11,  8.95s/it]
Training 15/40:   8%|▊         | 11/143 [00:09<01:18,  1.68it/s]
Training 15/40:  15%|█▌        | 22/143 [00:09<00:29,  4.05it/s]
Training 15/40:  26%|██▌       | 37/143 [00:09<00:12,  8.47it/s]
Training 15/40:  37%|███▋      | 53/143 [00:09<00:06, 14.74it/s]
Training 15/40:  52%|█████▏    | 74/143 [00:09<00:02, 25.61it/s]
Training 15/40:  68%|██████▊   | 97/143 [00:09<00:01, 40.65it/s]
Training 15/40: 100%|██████████| 143/143 [00:10<00:00, 14.07it/s]


Epoch 14: train_loss=0.1287, val_loss=0.1351



Training 16/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/40:   1%|          | 1/143 [00:09<21:21,  9.02s/it]
Training 16/40:   8%|▊         | 11/143 [00:09<01:19,  1.67it/s]
Training 16/40:  15%|█▌        | 22/143 [00:09<00:30,  4.02it/s]
Training 16/40:  24%|██▍       | 35/143 [00:09<00:13,  7.79it/s]
Training 16/40:  38%|███▊      | 55/143 [00:09<00:05, 15.68it/s]
Training 16/40:  52%|█████▏    | 74/143 [00:09<00:02, 25.29it/s]
Training 16/40:  69%|██████▊   | 98/143 [00:09<00:01, 40.94it/s]
Training 16/40: 100%|██████████| 143/143 [00:10<00:00, 14.08it/s]


Epoch 15: train_loss=0.1280, val_loss=0.1375



Training 17/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/40:   1%|          | 1/143 [00:08<20:47,  8.78s/it]
Training 17/40:   8%|▊         | 11/143 [00:08<01:17,  1.71it/s]
Training 17/40:  13%|█▎        | 19/143 [00:08<00:35,  3.46it/s]
Training 17/40: 100%|██████████| 143/143 [00:09<00:00, 14.96it/s]


Epoch 16: train_loss=0.1287, val_loss=0.1264



Training 18/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/40:   1%|          | 1/143 [00:08<20:59,  8.87s/it]
Training 18/40:   8%|▊         | 12/143 [00:08<01:10,  1.85it/s]
Training 18/40:  17%|█▋        | 25/143 [00:09<00:25,  4.68it/s]
Training 18/40:  26%|██▌       | 37/143 [00:09<00:12,  8.20it/s]
Training 18/40: 100%|██████████| 143/143 [00:09<00:00, 14.74it/s]


Epoch 17: train_loss=0.1243, val_loss=0.1227



Training 19/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/40:   1%|          | 1/143 [00:08<20:48,  8.79s/it]
Training 19/40:   8%|▊         | 11/143 [00:08<01:17,  1.71it/s]
Training 19/40:  16%|█▌        | 23/143 [00:09<00:27,  4.34it/s]
Training 19/40:  26%|██▌       | 37/143 [00:09<00:12,  8.52it/s]
Training 19/40: 100%|██████████| 143/143 [00:09<00:00, 14.82it/s]


Epoch 18: train_loss=0.1227, val_loss=0.1328



Training 20/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/40:   1%|          | 1/143 [00:08<21:06,  8.92s/it]
Training 20/40:   8%|▊         | 12/143 [00:09<01:11,  1.84it/s]
Training 20/40:  17%|█▋        | 24/143 [00:09<00:26,  4.44it/s]
Training 20/40:  25%|██▌       | 36/143 [00:09<00:13,  7.95it/s]
Training 20/40:  39%|███▉      | 56/143 [00:09<00:05, 15.92it/s]
Training 20/40:  55%|█████▌    | 79/143 [00:09<00:02, 27.82it/s]
Training 20/40:  71%|███████▏  | 102/143 [00:09<00:00, 42.64it/s]
Training 20/40: 100%|██████████| 143/143 [00:10<00:00, 14.24it/s]


Epoch 19: train_loss=0.1256, val_loss=0.1343



Training 21/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/40:   1%|          | 1/143 [00:08<21:03,  8.90s/it]
Training 21/40:   8%|▊         | 11/143 [00:09<01:18,  1.69it/s]
Training 21/40:  16%|█▌        | 23/143 [00:09<00:27,  4.29it/s]
Training 21/40:  27%|██▋       | 38/143 [00:09<00:12,  8.72it/s]
Training 21/40: 100%|██████████| 143/143 [00:09<00:00, 14.64it/s]


Epoch 20: train_loss=0.1235, val_loss=0.1275



Training 22/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/40:   1%|          | 1/143 [00:08<21:06,  8.92s/it]
Training 22/40:   7%|▋         | 10/143 [00:09<01:26,  1.53it/s]
Training 22/40:  15%|█▍        | 21/143 [00:09<00:31,  3.92it/s]
Training 22/40:  24%|██▍       | 34/143 [00:09<00:14,  7.76it/s]
Training 22/40: 100%|██████████| 143/143 [00:09<00:00, 14.69it/s]


Epoch 21: train_loss=0.1265, val_loss=0.1263



Training 23/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/40:   1%|          | 1/143 [00:08<21:15,  8.98s/it]
Training 23/40:   6%|▌         | 8/143 [00:09<01:52,  1.21it/s]
Training 23/40:  14%|█▍        | 20/143 [00:09<00:32,  3.80it/s]
Training 23/40: 100%|██████████| 143/143 [00:09<00:00, 14.66it/s]


Epoch 22: train_loss=0.1243, val_loss=0.1244



Training 24/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/40:   1%|          | 1/143 [00:08<21:09,  8.94s/it]
Training 24/40:   6%|▋         | 9/143 [00:09<01:37,  1.37it/s]
Training 24/40:  14%|█▍        | 20/143 [00:09<00:32,  3.75it/s]
Training 24/40:  32%|███▏      | 46/143 [00:09<00:08, 11.54it/s]
Training 24/40: 100%|██████████| 143/143 [00:09<00:00, 14.67it/s]


Epoch 23: train_loss=0.1255, val_loss=0.1228



Training 25/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/40:   1%|          | 1/143 [00:08<21:12,  8.96s/it]
Training 25/40:   8%|▊         | 11/143 [00:09<01:18,  1.68it/s]
Training 25/40:  15%|█▌        | 22/143 [00:09<00:29,  4.04it/s]
Training 25/40:  34%|███▎      | 48/143 [00:09<00:08, 11.79it/s]
Training 25/40: 100%|██████████| 143/143 [00:09<00:00, 14.56it/s]


Epoch 24: train_loss=0.1221, val_loss=0.1381



Training 26/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/40:   1%|          | 1/143 [00:08<21:02,  8.89s/it]
Training 26/40:   7%|▋         | 10/143 [00:08<01:26,  1.53it/s]
Training 26/40:  15%|█▌        | 22/143 [00:09<00:29,  4.15it/s]
Training 26/40:  23%|██▎       | 33/143 [00:09<00:14,  7.37it/s]
Training 26/40:  37%|███▋      | 53/143 [00:09<00:05, 15.40it/s]
Training 26/40:  50%|████▉     | 71/143 [00:09<00:02, 24.65it/s]
Training 26/40:  67%|██████▋   | 96/143 [00:09<00:01, 41.33it/s]
Training 26/40: 100%|██████████| 143/143 [00:10<00:00, 14.28it/s]


Epoch 25: train_loss=0.1237, val_loss=0.1415



Training 27/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/40:   1%|          | 1/143 [00:09<21:28,  9.08s/it]
Training 27/40:   7%|▋         | 10/143 [00:09<01:28,  1.50it/s]
Training 27/40:  14%|█▍        | 20/143 [00:09<00:33,  3.63it/s]
Training 27/40:  23%|██▎       | 33/143 [00:09<00:14,  7.41it/s]
Training 27/40:  33%|███▎      | 47/143 [00:09<00:07, 12.83it/s]
Training 27/40:  46%|████▌     | 66/143 [00:09<00:03, 22.58it/s]
Training 27/40:  60%|██████    | 86/143 [00:09<00:01, 35.50it/s]
Training 27/40: 100%|██████████| 143/143 [00:10<00:00, 13.95it/s]


Epoch 26: train_loss=0.1224, val_loss=0.1206



Training 28/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/40:   1%|          | 1/143 [00:09<21:24,  9.05s/it]
Training 28/40:   8%|▊         | 12/143 [00:09<01:12,  1.82it/s]
Training 28/40:  16%|█▌        | 23/143 [00:09<00:28,  4.16it/s]
Training 28/40:  26%|██▌       | 37/143 [00:09<00:12,  8.23it/s]
Training 28/40: 100%|██████████| 143/143 [00:09<00:00, 14.43it/s]


Epoch 27: train_loss=0.1227, val_loss=0.1181



Training 29/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/40:   1%|          | 1/143 [00:09<22:03,  9.32s/it]
Training 29/40:   8%|▊         | 11/143 [00:09<01:21,  1.61it/s]
Training 29/40:  15%|█▌        | 22/143 [00:09<00:31,  3.90it/s]
Training 29/40:  25%|██▌       | 36/143 [00:09<00:13,  7.88it/s]
Training 29/40:  37%|███▋      | 53/143 [00:09<00:06, 14.36it/s]
Training 29/40:  54%|█████▍    | 77/143 [00:09<00:02, 26.50it/s]
Training 29/40:  69%|██████▊   | 98/143 [00:09<00:01, 39.60it/s]
Training 29/40: 100%|██████████| 143/143 [00:10<00:00, 13.64it/s]


Epoch 28: train_loss=0.1190, val_loss=0.1168



Training 30/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/40:   1%|          | 1/143 [00:09<21:36,  9.13s/it]
Training 30/40:   9%|▉         | 13/143 [00:09<01:06,  1.95it/s]
Training 30/40:  18%|█▊        | 26/143 [00:09<00:24,  4.71it/s]
Training 30/40:  28%|██▊       | 40/143 [00:09<00:11,  8.73it/s]
Training 30/40:  41%|████      | 58/143 [00:09<00:05, 15.71it/s]
Training 30/40:  55%|█████▌    | 79/143 [00:09<00:02, 26.37it/s]
Training 30/40:  76%|███████▌  | 108/143 [00:09<00:00, 45.48it/s]
Training 30/40: 100%|██████████| 143/143 [00:10<00:00, 13.95it/s]


Epoch 29: train_loss=0.1184, val_loss=0.1178



Training 31/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 31/40:   1%|          | 1/143 [00:09<21:57,  9.28s/it]
Training 31/40:   7%|▋         | 10/143 [00:09<01:30,  1.47it/s]
Training 31/40:  15%|█▍        | 21/143 [00:09<00:32,  3.77it/s]
Training 31/40:  24%|██▍       | 34/143 [00:09<00:14,  7.46it/s]
Training 31/40:  36%|███▋      | 52/143 [00:09<00:06, 14.40it/s]
Training 31/40:  50%|█████     | 72/143 [00:09<00:02, 24.44it/s]
Training 31/40:  66%|██████▌   | 94/143 [00:09<00:01, 38.48it/s]
Training 31/40: 100%|██████████| 143/143 [00:10<00:00, 13.72it/s]


Epoch 30: train_loss=0.1186, val_loss=0.1160



Training 32/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 32/40:   1%|          | 1/143 [00:08<21:06,  8.92s/it]
Training 32/40:   8%|▊         | 11/143 [00:09<01:18,  1.69it/s]
Training 32/40:  16%|█▌        | 23/143 [00:09<00:28,  4.28it/s]
Training 32/40: 100%|██████████| 143/143 [00:09<00:00, 14.74it/s]


Epoch 31: train_loss=0.1188, val_loss=0.1508



Training 33/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 33/40:   1%|          | 1/143 [00:09<21:31,  9.10s/it]
Training 33/40:   8%|▊         | 12/143 [00:09<01:12,  1.81it/s]
Training 33/40:  21%|██        | 30/143 [00:09<00:19,  5.66it/s]
Training 33/40: 100%|██████████| 143/143 [00:09<00:00, 14.47it/s]


Epoch 32: train_loss=0.1202, val_loss=0.1384



Training 34/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 34/40:   1%|          | 1/143 [00:09<21:45,  9.19s/it]
Training 34/40:   8%|▊         | 11/143 [00:09<01:20,  1.64it/s]
Training 34/40:  15%|█▍        | 21/143 [00:09<00:32,  3.74it/s]
Training 34/40:  23%|██▎       | 33/143 [00:09<00:15,  7.16it/s]
Training 34/40:  38%|███▊      | 55/143 [00:09<00:05, 15.75it/s]
Training 34/40:  52%|█████▏    | 74/143 [00:09<00:02, 25.25it/s]
Training 34/40:  69%|██████▊   | 98/143 [00:09<00:01, 40.73it/s]
Training 34/40: 100%|██████████| 143/143 [00:10<00:00, 13.84it/s]


Epoch 33: train_loss=0.1202, val_loss=0.1185



Training 35/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 35/40:   1%|          | 1/143 [00:09<23:28,  9.92s/it]
Training 35/40:   8%|▊         | 12/143 [00:10<01:18,  1.66it/s]
Training 35/40:  16%|█▌        | 23/143 [00:10<00:31,  3.81it/s]
Training 35/40: 100%|██████████| 143/143 [00:10<00:00, 13.36it/s]


Epoch 34: train_loss=0.1195, val_loss=0.1215



Training 36/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 36/40:   1%|          | 1/143 [00:09<23:10,  9.79s/it]
Training 36/40:   8%|▊         | 12/143 [00:09<01:18,  1.68it/s]
Training 36/40:  17%|█▋        | 24/143 [00:10<00:29,  4.05it/s]
Training 36/40:  24%|██▍       | 34/143 [00:10<00:16,  6.73it/s]
Training 36/40:  38%|███▊      | 55/143 [00:10<00:06, 14.51it/s]
Training 36/40:  52%|█████▏    | 75/143 [00:10<00:02, 24.07it/s]
Training 36/40:  69%|██████▊   | 98/143 [00:10<00:01, 38.15it/s]
Training 36/40: 100%|██████████| 143/143 [00:10<00:00, 13.11it/s]


Epoch 35: train_loss=0.1188, val_loss=0.1156



Training 37/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 37/40:   1%|          | 1/143 [00:09<23:05,  9.76s/it]
Training 37/40:   8%|▊         | 11/143 [00:09<01:25,  1.54it/s]
Training 37/40:  16%|█▌        | 23/143 [00:09<00:30,  3.93it/s]
Training 37/40:  24%|██▍       | 34/143 [00:10<00:15,  6.89it/s]
Training 37/40:  38%|███▊      | 55/143 [00:10<00:05, 14.68it/s]
Training 37/40:  53%|█████▎    | 76/143 [00:10<00:02, 24.80it/s]
Training 37/40:  69%|██████▉   | 99/143 [00:10<00:01, 38.93it/s]
Training 37/40: 100%|██████████| 143/143 [00:10<00:00, 13.14it/s]


Epoch 36: train_loss=0.1183, val_loss=0.1169



Training 38/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 38/40:   1%|          | 1/143 [00:09<23:33,  9.95s/it]
Training 38/40:   7%|▋         | 10/143 [00:10<01:36,  1.37it/s]
Training 38/40:  15%|█▌        | 22/143 [00:10<00:32,  3.72it/s]
Training 38/40:  30%|███       | 43/143 [00:10<00:10,  9.38it/s]
Training 38/40: 100%|██████████| 143/143 [00:10<00:00, 13.26it/s]


Epoch 37: train_loss=0.1200, val_loss=0.1150



Training 39/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 39/40:   1%|          | 1/143 [00:09<22:42,  9.59s/it]
Training 39/40:   8%|▊         | 11/143 [00:09<01:24,  1.57it/s]
Training 39/40:  16%|█▌        | 23/143 [00:09<00:30,  3.99it/s]
Training 39/40:  26%|██▌       | 37/143 [00:09<00:13,  7.85it/s]
Training 39/40:  41%|████▏     | 59/143 [00:10<00:05, 16.09it/s]
Training 39/40:  56%|█████▌    | 80/143 [00:10<00:02, 26.25it/s]
Training 39/40:  74%|███████▍  | 106/143 [00:10<00:00, 42.53it/s]
Training 39/40: 100%|██████████| 143/143 [00:10<00:00, 13.31it/s]


Epoch 38: train_loss=0.1252, val_loss=0.1192



Training 40/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 40/40:   1%|          | 1/143 [00:09<22:27,  9.49s/it]
Training 40/40:   8%|▊         | 12/143 [00:09<01:15,  1.73it/s]
Training 40/40:  17%|█▋        | 25/143 [00:09<00:26,  4.39it/s]
Training 40/40:  26%|██▌       | 37/143 [00:09<00:13,  7.70it/s]
Training 40/40:  39%|███▉      | 56/143 [00:09<00:05, 14.86it/s]
Training 40/40:  54%|█████▍    | 77/143 [00:10<00:02, 25.19it/s]
Training 40/40:  71%|███████▏  | 102/143 [00:10<00:01, 40.92it/s]
Training 40/40: 100%|██████████| 143/143 [00:10<00:00, 13.35it/s]


Epoch 39: train_loss=0.1167, val_loss=0.1222


2025-06-02 10:46:03,171 - __main__ - INFO - Saved mlp probe for layer 10 to cache\probes\phase1_ijepa_viewpoint_probing\mlp_layer_10_probe.pth
 83%|████████▎ | 5/6 [3:45:37<45:03, 2703.25s/it]  2025-06-02 10:46:23,467 - __main__ - INFO - Processing layer 11...
2025-06-02 10:46:23,468 - src.probing.data_preprocessing - INFO - Extracting features from 1149 batches...


Extracting features:   0%|          | 1/1149 [00:08<2:37:32,  8.23s/it]
Extracting features:   0%|          | 2/1149 [00:09<1:13:41,  3.86s/it]
Extracting features:   0%|          | 3/1149 [00:09<46:42,  2.45s/it]  
Extracting features:   0%|          | 4/1149 [00:10<34:01,  1.78s/it]
Extracting features:   0%|          | 5/1149 [00:11<26:56,  1.41s/it]
Extracting features:   1%|          | 6/1149 [00:12<22:35,  1.19s/it]
Extracting features:   1%|          | 7/1149 [00:12<19:49,  1.04s/it]
Extracting features:   1%|          | 8/1149 [00:13<18:00,  1.06it/s]
Extracting features:   1%|          | 9/1149 [00:14<16:48,  1.13

Epoch 0: train_loss=0.2545, val_loss=0.1639



Training 2/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/30:   1%|          | 1/143 [00:09<21:31,  9.09s/it]
Training 2/30:  12%|█▏        | 17/143 [00:09<00:48,  2.58it/s]
Training 2/30:  27%|██▋       | 38/143 [00:09<00:14,  7.06it/s]
Training 2/30:  41%|████▏     | 59/143 [00:09<00:06, 13.13it/s]
Training 2/30:  62%|██████▏   | 88/143 [00:09<00:02, 24.45it/s]
Training 2/30: 100%|██████████| 143/143 [00:10<00:00, 14.23it/s]


Epoch 1: train_loss=0.1628, val_loss=0.1568



Training 3/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/30:   1%|          | 1/143 [00:09<21:48,  9.22s/it]
Training 3/30:  11%|█         | 16/143 [00:09<00:53,  2.39it/s]
Training 3/30:  25%|██▌       | 36/143 [00:09<00:16,  6.60it/s]
Training 3/30:  43%|████▎     | 61/143 [00:09<00:05, 13.78it/s]
Training 3/30:  63%|██████▎   | 90/143 [00:09<00:02, 24.94it/s]
Training 3/30: 100%|██████████| 143/143 [00:10<00:00, 14.10it/s]


Epoch 2: train_loss=0.1557, val_loss=0.1514



Training 4/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/30:   1%|          | 1/143 [00:09<21:17,  9.00s/it]
Training 4/30:  13%|█▎        | 19/143 [00:09<00:42,  2.92it/s]
Training 4/30:  27%|██▋       | 38/143 [00:09<00:14,  7.00it/s]
Training 4/30:  45%|████▍     | 64/143 [00:09<00:05, 14.63it/s]
Training 4/30:  65%|██████▌   | 93/143 [00:09<00:01, 25.98it/s]
Training 4/30: 100%|██████████| 143/143 [00:09<00:00, 14.38it/s]


Epoch 3: train_loss=0.1498, val_loss=0.1472



Training 5/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/30:   1%|          | 1/143 [00:08<21:13,  8.97s/it]
Training 5/30:  12%|█▏        | 17/143 [00:09<00:48,  2.62it/s]
Training 5/30:  24%|██▍       | 34/143 [00:09<00:17,  6.28it/s]
Training 5/30:  39%|███▉      | 56/143 [00:09<00:06, 12.75it/s]
Training 5/30:  57%|█████▋    | 82/143 [00:09<00:02, 23.00it/s]
Training 5/30: 100%|██████████| 143/143 [00:09<00:00, 14.41it/s]


Epoch 4: train_loss=0.1482, val_loss=0.1420



Training 6/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/30:   1%|          | 1/143 [00:09<21:18,  9.00s/it]
Training 6/30:  13%|█▎        | 18/143 [00:09<00:45,  2.76it/s]
Training 6/30:  28%|██▊       | 40/143 [00:09<00:13,  7.49it/s]
Training 6/30:  45%|████▌     | 65/143 [00:09<00:05, 14.81it/s]
Training 6/30:  66%|██████▋   | 95/143 [00:09<00:01, 26.59it/s]
Training 6/30: 100%|██████████| 143/143 [00:09<00:00, 14.40it/s]


Epoch 5: train_loss=0.1428, val_loss=0.1460



Training 7/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/30:   1%|          | 1/143 [00:09<21:20,  9.02s/it]
Training 7/30:  12%|█▏        | 17/143 [00:09<00:48,  2.60it/s]
Training 7/30:  26%|██▌       | 37/143 [00:09<00:15,  6.89it/s]
Training 7/30:  41%|████▏     | 59/143 [00:09<00:06, 13.32it/s]
Training 7/30:  62%|██████▏   | 88/143 [00:09<00:02, 24.71it/s]
Training 7/30: 100%|██████████| 143/143 [00:09<00:00, 14.32it/s]


Epoch 6: train_loss=0.1398, val_loss=0.1364



Training 8/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/30:   1%|          | 1/143 [00:08<21:09,  8.94s/it]
Training 8/30:  18%|█▊        | 26/143 [00:09<00:28,  4.04it/s]
Training 8/30:  36%|███▋      | 52/143 [00:09<00:09,  9.65it/s]
Training 8/30:  54%|█████▍    | 77/143 [00:09<00:03, 16.94it/s]
Training 8/30: 100%|██████████| 143/143 [00:09<00:00, 14.54it/s]


Epoch 7: train_loss=0.1400, val_loss=0.1366



Training 9/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/30:   1%|          | 1/143 [00:09<21:40,  9.16s/it]
Training 9/30:  13%|█▎        | 19/143 [00:09<00:43,  2.87it/s]
Training 9/30:  28%|██▊       | 40/143 [00:09<00:14,  7.31it/s]
Training 9/30:  45%|████▍     | 64/143 [00:09<00:05, 14.21it/s]
Training 9/30:  64%|██████▍   | 92/143 [00:09<00:02, 25.02it/s]
Training 9/30: 100%|██████████| 143/143 [00:10<00:00, 14.08it/s]


Epoch 8: train_loss=0.1369, val_loss=0.1349



Training 10/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/30:   1%|          | 1/143 [00:09<21:28,  9.07s/it]
Training 10/30:  11%|█         | 16/143 [00:09<00:52,  2.43it/s]
Training 10/30:  27%|██▋       | 38/143 [00:09<00:14,  7.14it/s]
Training 10/30:  43%|████▎     | 62/143 [00:09<00:05, 14.11it/s]
Training 10/30:  67%|██████▋   | 96/143 [00:09<00:01, 27.46it/s]
Training 10/30: 100%|██████████| 143/143 [00:09<00:00, 14.32it/s]


Epoch 9: train_loss=0.1359, val_loss=0.1444



Training 11/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/30:   1%|          | 1/143 [00:09<21:52,  9.24s/it]
Training 11/30:  13%|█▎        | 19/143 [00:09<00:43,  2.85it/s]
Training 11/30:  32%|███▏      | 46/143 [00:09<00:11,  8.52it/s]
Training 11/30:  48%|████▊     | 69/143 [00:09<00:04, 15.02it/s]
Training 11/30:  72%|███████▏  | 103/143 [00:09<00:01, 28.11it/s]
Training 11/30: 100%|██████████| 143/143 [00:10<00:00, 14.05it/s]


Epoch 10: train_loss=0.1335, val_loss=0.1325



Training 12/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/30:   1%|          | 1/143 [00:09<21:44,  9.18s/it]
Training 12/30:  10%|█         | 15/143 [00:09<00:56,  2.25it/s]
Training 12/30:  24%|██▍       | 34/143 [00:09<00:17,  6.26it/s]
Training 12/30:  43%|████▎     | 61/143 [00:09<00:05, 14.07it/s]
Training 12/30:  64%|██████▍   | 92/143 [00:09<00:01, 26.05it/s]
Training 12/30: 100%|██████████| 143/143 [00:10<00:00, 14.14it/s]


Epoch 11: train_loss=0.1315, val_loss=0.1298



Training 13/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/30:   1%|          | 1/143 [00:08<21:04,  8.90s/it]
Training 13/30:  13%|█▎        | 19/143 [00:09<00:41,  2.95it/s]
Training 13/30:  28%|██▊       | 40/143 [00:09<00:13,  7.52it/s]
Training 13/30:  41%|████▏     | 59/143 [00:09<00:06, 13.08it/s]
Training 13/30:  62%|██████▏   | 89/143 [00:09<00:02, 25.06it/s]
Training 13/30: 100%|██████████| 143/143 [00:09<00:00, 14.51it/s]


Epoch 12: train_loss=0.1315, val_loss=0.1285



Training 14/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/30:   1%|          | 1/143 [00:09<23:34,  9.96s/it]
Training 14/30:  10%|█         | 15/143 [00:10<01:01,  2.08it/s]
Training 14/30:  22%|██▏       | 32/143 [00:10<00:20,  5.40it/s]
Training 14/30:  38%|███▊      | 54/143 [00:10<00:07, 11.29it/s]
Training 14/30:  58%|█████▊    | 83/143 [00:10<00:02, 21.78it/s]
Training 14/30: 100%|██████████| 143/143 [00:10<00:00, 13.10it/s]


Epoch 13: train_loss=0.1290, val_loss=0.1272



Training 15/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/30:   1%|          | 1/143 [00:09<21:35,  9.12s/it]
Training 15/30:  10%|█         | 15/143 [00:09<00:56,  2.27it/s]
Training 15/30:  24%|██▍       | 35/143 [00:09<00:16,  6.53it/s]
Training 15/30:  41%|████      | 58/143 [00:09<00:06, 13.20it/s]
Training 15/30:  60%|██████    | 86/143 [00:09<00:02, 24.09it/s]
Training 15/30: 100%|██████████| 143/143 [00:10<00:00, 14.15it/s]


Epoch 14: train_loss=0.1272, val_loss=0.1274



Training 16/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/30:   1%|          | 1/143 [00:08<20:54,  8.83s/it]
Training 16/30:  12%|█▏        | 17/143 [00:08<00:47,  2.66it/s]
Training 16/30:  23%|██▎       | 33/143 [00:09<00:17,  6.15it/s]
Training 16/30:  42%|████▏     | 60/143 [00:09<00:05, 14.25it/s]
Training 16/30:  62%|██████▏   | 89/143 [00:09<00:02, 25.82it/s]
Training 16/30: 100%|██████████| 143/143 [00:09<00:00, 14.63it/s]


Epoch 15: train_loss=0.1276, val_loss=0.1259



Training 17/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/30:   1%|          | 1/143 [00:09<22:12,  9.39s/it]
Training 17/30:  13%|█▎        | 19/143 [00:09<00:44,  2.80it/s]
Training 17/30:  31%|███       | 44/143 [00:09<00:12,  7.97it/s]
Training 17/30:  47%|████▋     | 67/143 [00:09<00:05, 14.41it/s]
Training 17/30:  68%|██████▊   | 97/143 [00:09<00:01, 25.75it/s]
Training 17/30: 100%|██████████| 143/143 [00:10<00:00, 13.88it/s]


Epoch 16: train_loss=0.1306, val_loss=0.1580



Training 18/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/30:   1%|          | 1/143 [00:08<21:06,  8.92s/it]
Training 18/30:  14%|█▍        | 20/143 [00:09<00:39,  3.11it/s]
Training 18/30:  30%|███       | 43/143 [00:09<00:12,  8.09it/s]
Training 18/30:  46%|████▌     | 66/143 [00:09<00:05, 14.85it/s]
Training 18/30:  64%|██████▍   | 92/143 [00:09<00:02, 25.07it/s]
Training 18/30: 100%|██████████| 143/143 [00:09<00:00, 14.51it/s]


Epoch 17: train_loss=0.1292, val_loss=0.1314



Training 19/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/30:   1%|          | 1/143 [00:08<21:08,  8.93s/it]
Training 19/30:  10%|█         | 15/143 [00:09<00:55,  2.31it/s]
Training 19/30:  23%|██▎       | 33/143 [00:09<00:17,  6.21it/s]
Training 19/30:  40%|███▉      | 57/143 [00:09<00:06, 13.32it/s]
Training 19/30:  57%|█████▋    | 82/143 [00:09<00:02, 23.14it/s]
Training 19/30: 100%|██████████| 143/143 [00:09<00:00, 14.43it/s]


Epoch 18: train_loss=0.1277, val_loss=0.1312



Training 20/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/30:   1%|          | 1/143 [00:09<21:30,  9.09s/it]
Training 20/30:  13%|█▎        | 18/143 [00:09<00:45,  2.74it/s]
Training 20/30:  26%|██▌       | 37/143 [00:09<00:15,  6.78it/s]
Training 20/30:  45%|████▌     | 65/143 [00:09<00:05, 14.95it/s]
Training 20/30:  64%|██████▎   | 91/143 [00:09<00:02, 24.99it/s]
Training 20/30: 100%|██████████| 143/143 [00:10<00:00, 14.26it/s]


Epoch 19: train_loss=0.1273, val_loss=0.1400



Training 21/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/30:   1%|          | 1/143 [00:08<21:06,  8.92s/it]
Training 21/30:  15%|█▍        | 21/143 [00:09<00:37,  3.26it/s]
Training 21/30:  31%|███       | 44/143 [00:09<00:11,  8.25it/s]
Training 21/30:  47%|████▋     | 67/143 [00:09<00:05, 15.01it/s]
Training 21/30:  70%|██████▉   | 100/143 [00:09<00:01, 28.13it/s]
Training 21/30: 100%|██████████| 143/143 [00:09<00:00, 14.52it/s]


Epoch 20: train_loss=0.1258, val_loss=0.1295



Training 22/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/30:   1%|          | 1/143 [00:09<21:26,  9.06s/it]
Training 22/30:  13%|█▎        | 18/143 [00:09<00:45,  2.74it/s]
Training 22/30:  29%|██▉       | 42/143 [00:09<00:12,  7.88it/s]
Training 22/30:  48%|████▊     | 68/143 [00:09<00:04, 15.44it/s]
Training 22/30:  66%|██████▋   | 95/143 [00:09<00:01, 25.89it/s]
Training 22/30: 100%|██████████| 143/143 [00:10<00:00, 14.28it/s]


Epoch 21: train_loss=0.1260, val_loss=0.1242



Training 23/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/30:   1%|          | 1/143 [00:09<21:26,  9.06s/it]
Training 23/30:  11%|█         | 16/143 [00:09<00:52,  2.44it/s]
Training 23/30:  25%|██▌       | 36/143 [00:09<00:15,  6.71it/s]
Training 23/30:  42%|████▏     | 60/143 [00:09<00:06, 13.70it/s]
Training 23/30:  61%|██████    | 87/143 [00:09<00:02, 24.22it/s]
Training 23/30: 100%|██████████| 143/143 [00:10<00:00, 14.28it/s]


Epoch 22: train_loss=0.1276, val_loss=0.1260



Training 24/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/30:   1%|          | 1/143 [00:09<21:28,  9.08s/it]
Training 24/30:  13%|█▎        | 18/143 [00:09<00:45,  2.74it/s]
Training 24/30:  29%|██▉       | 42/143 [00:09<00:12,  7.88it/s]
Training 24/30:  47%|████▋     | 67/143 [00:09<00:05, 15.13it/s]
Training 24/30:  69%|██████▊   | 98/143 [00:09<00:01, 27.24it/s]
Training 24/30: 100%|██████████| 143/143 [00:09<00:00, 14.34it/s]


Epoch 23: train_loss=0.1246, val_loss=0.1230



Training 25/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/30:   1%|          | 1/143 [00:09<21:22,  9.03s/it]
Training 25/30:  13%|█▎        | 18/143 [00:09<00:45,  2.75it/s]
Training 25/30:  27%|██▋       | 39/143 [00:09<00:14,  7.26it/s]
Training 25/30:  47%|████▋     | 67/143 [00:09<00:04, 15.45it/s]
Training 25/30:  69%|██████▉   | 99/143 [00:09<00:01, 27.98it/s]
Training 25/30: 100%|██████████| 143/143 [00:09<00:00, 14.40it/s]


Epoch 24: train_loss=0.1245, val_loss=0.1499



Training 26/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/30:   1%|          | 1/143 [00:09<21:55,  9.26s/it]
Training 26/30:  10%|█         | 15/143 [00:09<00:57,  2.23it/s]
Training 26/30:  23%|██▎       | 33/143 [00:09<00:18,  6.00it/s]
Training 26/30:  41%|████▏     | 59/143 [00:09<00:06, 13.45it/s]
Training 26/30:  60%|██████    | 86/143 [00:09<00:02, 23.77it/s]
Training 26/30: 100%|██████████| 143/143 [00:10<00:00, 13.99it/s]


Epoch 25: train_loss=0.1254, val_loss=0.1300



Training 27/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/30:   1%|          | 1/143 [00:09<21:28,  9.07s/it]
Training 27/30:  10%|▉         | 14/143 [00:09<01:00,  2.12it/s]
Training 27/30:  22%|██▏       | 32/143 [00:09<00:18,  5.97it/s]
Training 27/30:  37%|███▋      | 53/143 [00:09<00:07, 12.07it/s]
Training 27/30:  55%|█████▌    | 79/143 [00:09<00:02, 22.24it/s]
Training 27/30: 100%|██████████| 143/143 [00:10<00:00, 14.17it/s]


Epoch 26: train_loss=0.1246, val_loss=0.1452



Training 28/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/30:   1%|          | 1/143 [00:09<21:47,  9.21s/it]
Training 28/30:  12%|█▏        | 17/143 [00:09<00:49,  2.55it/s]
Training 28/30:  27%|██▋       | 39/143 [00:09<00:14,  7.19it/s]
Training 28/30:  43%|████▎     | 62/143 [00:09<00:05, 13.76it/s]
Training 28/30:  64%|██████▎   | 91/143 [00:09<00:02, 24.93it/s]
Training 28/30: 100%|██████████| 143/143 [00:10<00:00, 14.04it/s]


Epoch 27: train_loss=0.1273, val_loss=0.1240



Training 29/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/30:   1%|          | 1/143 [00:09<21:22,  9.03s/it]
Training 29/30:  13%|█▎        | 18/143 [00:09<00:45,  2.75it/s]
Training 29/30:  25%|██▌       | 36/143 [00:09<00:16,  6.61it/s]
Training 29/30:  40%|███▉      | 57/143 [00:09<00:06, 12.72it/s]
Training 29/30:  59%|█████▊    | 84/143 [00:09<00:02, 23.29it/s]
Training 29/30: 100%|██████████| 143/143 [00:09<00:00, 14.31it/s]


Epoch 28: train_loss=0.1247, val_loss=0.1288



Training 30/30:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/30:   1%|          | 1/143 [00:09<22:04,  9.33s/it]
Training 30/30:  10%|█         | 15/143 [00:09<00:57,  2.22it/s]
Training 30/30:  28%|██▊       | 40/143 [00:09<00:13,  7.44it/s]
Training 30/30:  41%|████      | 58/143 [00:09<00:06, 12.48it/s]
Training 30/30:  59%|█████▉    | 85/143 [00:09<00:02, 22.79it/s]
Training 30/30: 100%|██████████| 143/143 [00:10<00:00, 13.90it/s]


Epoch 29: train_loss=0.1214, val_loss=0.1300


2025-06-02 11:17:22,189 - __main__ - INFO - Saved linear probe for layer 11 to cache\probes\phase1_ijepa_viewpoint_probing\linear_layer_11_probe.pth
2025-06-02 11:17:41,159 - __main__ - INFO - Running mlp probe on layer 11...
2025-06-02 11:17:41,160 - __main__ - INFO - Running mlp probe on layer 11 (feature_dim: 1280)

Training 1/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 1/40:   1%|          | 1/143 [00:10<23:57, 10.12s/it]
Training 1/40:   6%|▋         | 9/143 [00:10<01:50,  1.21it/s]
Training 1/40:  13%|█▎        | 19/143 [00:10<00:39,  3.14it/s]
Training 1/40: 100%|██████████| 143/143 [00:10<00:00, 13.13it/s]


Epoch 0: train_loss=0.4857, val_loss=0.1688



Training 2/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 2/40:   1%|          | 1/143 [00:09<23:24,  9.89s/it]
Training 2/40:   8%|▊         | 11/143 [00:10<01:26,  1.52it/s]
Training 2/40:  15%|█▍        | 21/143 [00:10<00:35,  3.48it/s]
Training 2/40: 100%|██████████| 143/143 [00:10<00:00, 13.36it/s]


Epoch 1: train_loss=0.1621, val_loss=0.1592



Training 3/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 3/40:   1%|          | 1/143 [00:09<21:32,  9.10s/it]
Training 3/40:   8%|▊         | 11/143 [00:09<01:19,  1.65it/s]
Training 3/40:  15%|█▍        | 21/143 [00:09<00:32,  3.77it/s]
Training 3/40: 100%|██████████| 143/143 [00:09<00:00, 14.53it/s]


Epoch 2: train_loss=0.1603, val_loss=0.1573



Training 4/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 4/40:   1%|          | 1/143 [00:09<22:40,  9.58s/it]
Training 4/40:   8%|▊         | 11/143 [00:09<01:24,  1.57it/s]
Training 4/40:  17%|█▋        | 24/143 [00:09<00:28,  4.20it/s]
Training 4/40: 100%|██████████| 143/143 [00:10<00:00, 13.78it/s]


Epoch 3: train_loss=0.1572, val_loss=0.1531



Training 5/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 5/40:   1%|          | 1/143 [00:09<21:58,  9.28s/it]
Training 5/40:   7%|▋         | 10/143 [00:09<01:30,  1.47it/s]
Training 5/40:  15%|█▍        | 21/143 [00:09<00:32,  3.76it/s]
Training 5/40:  24%|██▍       | 34/143 [00:09<00:14,  7.45it/s]
Training 5/40:  37%|███▋      | 53/143 [00:09<00:06, 14.76it/s]
Training 5/40:  55%|█████▍    | 78/143 [00:09<00:02, 27.45it/s]
Training 5/40:  72%|███████▏  | 103/143 [00:09<00:00, 43.36it/s]
Training 5/40: 100%|██████████| 143/143 [00:10<00:00, 13.62it/s]


Epoch 4: train_loss=0.1530, val_loss=0.1482



Training 6/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 6/40:   1%|          | 1/143 [00:09<21:58,  9.29s/it]
Training 6/40:   8%|▊         | 12/143 [00:09<01:13,  1.77it/s]
Training 6/40:  16%|█▌        | 23/143 [00:09<00:29,  4.06it/s]
Training 6/40: 100%|██████████| 143/143 [00:10<00:00, 14.24it/s]


Epoch 5: train_loss=0.1459, val_loss=0.1515



Training 7/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 7/40:   1%|          | 1/143 [00:09<21:27,  9.06s/it]
Training 7/40:   8%|▊         | 11/143 [00:09<01:19,  1.66it/s]
Training 7/40:  16%|█▌        | 23/143 [00:09<00:28,  4.22it/s]
Training 7/40:  25%|██▌       | 36/143 [00:09<00:13,  7.97it/s]
Training 7/40: 100%|██████████| 143/143 [00:09<00:00, 14.38it/s]


Epoch 6: train_loss=0.1421, val_loss=0.1347



Training 8/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 8/40:   1%|          | 1/143 [00:09<21:19,  9.01s/it]
Training 8/40:   8%|▊         | 12/143 [00:09<01:11,  1.82it/s]
Training 8/40:  17%|█▋        | 25/143 [00:09<00:25,  4.61it/s]
Training 8/40:  29%|██▉       | 42/143 [00:09<00:10,  9.59it/s]
Training 8/40: 100%|██████████| 143/143 [00:09<00:00, 14.56it/s]


Epoch 7: train_loss=0.1355, val_loss=0.1360



Training 9/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 9/40:   1%|          | 1/143 [00:09<21:49,  9.22s/it]
Training 9/40:   8%|▊         | 12/143 [00:09<01:13,  1.78it/s]
Training 9/40:  17%|█▋        | 25/143 [00:09<00:26,  4.51it/s]
Training 9/40:  28%|██▊       | 40/143 [00:09<00:11,  8.79it/s]
Training 9/40:  40%|███▉      | 57/143 [00:09<00:05, 15.26it/s]
Training 9/40:  54%|█████▍    | 77/143 [00:09<00:02, 25.28it/s]
Training 9/40:  69%|██████▊   | 98/143 [00:09<00:01, 38.59it/s]
Training 9/40: 100%|██████████| 143/143 [00:10<00:00, 13.79it/s]


Epoch 8: train_loss=0.1343, val_loss=0.1306



Training 10/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 10/40:   1%|          | 1/143 [00:08<21:02,  8.89s/it]
Training 10/40:   8%|▊         | 11/143 [00:08<01:18,  1.69it/s]
Training 10/40:  15%|█▌        | 22/143 [00:09<00:29,  4.08it/s]
Training 10/40: 100%|██████████| 143/143 [00:09<00:00, 14.80it/s]


Epoch 9: train_loss=0.1316, val_loss=0.1332



Training 11/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 11/40:   1%|          | 1/143 [00:09<22:06,  9.34s/it]
Training 11/40:   7%|▋         | 10/143 [00:09<01:31,  1.46it/s]
Training 11/40:  13%|█▎        | 19/143 [00:09<00:37,  3.32it/s]
Training 11/40: 100%|██████████| 143/143 [00:10<00:00, 14.15it/s]


Epoch 10: train_loss=0.1326, val_loss=0.1284



Training 12/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 12/40:   1%|          | 1/143 [00:09<21:45,  9.19s/it]
Training 12/40:   9%|▉         | 13/143 [00:09<01:07,  1.94it/s]
Training 12/40:  16%|█▌        | 23/143 [00:09<00:29,  4.03it/s]
Training 12/40:  34%|███▍      | 49/143 [00:09<00:08, 11.59it/s]
Training 12/40: 100%|██████████| 143/143 [00:10<00:00, 14.28it/s]


Epoch 11: train_loss=0.1287, val_loss=0.1242



Training 13/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 13/40:   1%|          | 1/143 [00:09<21:26,  9.06s/it]
Training 13/40:   6%|▌         | 8/143 [00:09<01:52,  1.20it/s]
Training 13/40:  13%|█▎        | 18/143 [00:09<00:37,  3.34it/s]
Training 13/40:  20%|██        | 29/143 [00:09<00:17,  6.53it/s]
Training 13/40:  31%|███▏      | 45/143 [00:09<00:07, 12.83it/s]
Training 13/40:  45%|████▍     | 64/143 [00:09<00:03, 22.60it/s]
Training 13/40:  57%|█████▋    | 82/143 [00:09<00:01, 34.01it/s]
Training 13/40:  76%|███████▌  | 109/143 [00:09<00:00, 56.18it/s]
Training 13/40: 100%|██████████| 143/143 [00:10<00:00, 13.92it/s]


Epoch 12: train_loss=0.1252, val_loss=0.1241



Training 14/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 14/40:   1%|          | 1/143 [00:09<21:45,  9.20s/it]
Training 14/40:   8%|▊         | 12/143 [00:09<01:13,  1.79it/s]
Training 14/40:  17%|█▋        | 25/143 [00:09<00:26,  4.52it/s]
Training 14/40:  27%|██▋       | 38/143 [00:09<00:12,  8.22it/s]
Training 14/40:  39%|███▉      | 56/143 [00:09<00:05, 15.15it/s]
Training 14/40:  54%|█████▍    | 77/143 [00:09<00:02, 25.78it/s]
Training 14/40:  72%|███████▏  | 103/143 [00:09<00:00, 42.63it/s]
Training 14/40: 100%|██████████| 143/143 [00:10<00:00, 13.75it/s]


Epoch 13: train_loss=0.1277, val_loss=0.1327



Training 15/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 15/40:   1%|          | 1/143 [00:09<21:42,  9.17s/it]
Training 15/40:   8%|▊         | 12/143 [00:09<01:13,  1.79it/s]
Training 15/40:  16%|█▌        | 23/143 [00:09<00:29,  4.11it/s]
Training 15/40:  26%|██▌       | 37/143 [00:09<00:13,  8.12it/s]
Training 15/40:  41%|████▏     | 59/143 [00:09<00:05, 16.71it/s]
Training 15/40:  56%|█████▌    | 80/143 [00:09<00:02, 27.29it/s]
Training 15/40:  71%|███████▏  | 102/143 [00:09<00:00, 41.16it/s]
Training 15/40: 100%|██████████| 143/143 [00:10<00:00, 13.85it/s]


Epoch 14: train_loss=0.1247, val_loss=0.1238



Training 16/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 16/40:   1%|          | 1/143 [00:09<21:20,  9.02s/it]
Training 16/40:   8%|▊         | 12/143 [00:09<01:11,  1.82it/s]
Training 16/40:  16%|█▌        | 23/143 [00:09<00:28,  4.17it/s]
Training 16/40:  35%|███▍      | 50/143 [00:09<00:07, 12.17it/s]
Training 16/40: 100%|██████████| 143/143 [00:09<00:00, 14.55it/s]


Epoch 15: train_loss=0.1222, val_loss=0.1199



Training 17/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 17/40:   1%|          | 1/143 [00:09<21:41,  9.17s/it]
Training 17/40:   8%|▊         | 12/143 [00:09<01:12,  1.80it/s]
Training 17/40:  16%|█▌        | 23/143 [00:09<00:29,  4.11it/s]
Training 17/40: 100%|██████████| 143/143 [00:09<00:00, 14.38it/s]


Epoch 16: train_loss=0.1222, val_loss=0.1273



Training 18/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 18/40:   1%|          | 1/143 [00:08<21:04,  8.91s/it]
Training 18/40:  10%|▉         | 14/143 [00:09<00:59,  2.16it/s]
Training 18/40:  18%|█▊        | 26/143 [00:09<00:24,  4.74it/s]
Training 18/40: 100%|██████████| 143/143 [00:09<00:00, 14.73it/s]


Epoch 17: train_loss=0.1223, val_loss=0.1208



Training 19/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 19/40:   1%|          | 1/143 [00:08<21:07,  8.93s/it]
Training 19/40:   7%|▋         | 10/143 [00:09<01:27,  1.53it/s]
Training 19/40:  16%|█▌        | 23/143 [00:09<00:27,  4.34it/s]
Training 19/40:  24%|██▍       | 35/143 [00:09<00:13,  7.85it/s]
Training 19/40:  36%|███▌      | 51/143 [00:09<00:06, 14.16it/s]
Training 19/40:  48%|████▊     | 69/143 [00:09<00:03, 23.39it/s]
Training 19/40:  62%|██████▏   | 89/143 [00:09<00:01, 36.39it/s]
Training 19/40: 100%|██████████| 143/143 [00:10<00:00, 14.18it/s]


Epoch 18: train_loss=0.1232, val_loss=0.1238



Training 20/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 20/40:   1%|          | 1/143 [00:09<21:26,  9.06s/it]
Training 20/40:   8%|▊         | 11/143 [00:09<01:19,  1.66it/s]
Training 20/40:  20%|██        | 29/143 [00:09<00:20,  5.53it/s]
Training 20/40: 100%|██████████| 143/143 [00:09<00:00, 14.61it/s]


Epoch 19: train_loss=0.1231, val_loss=0.1210



Training 21/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 21/40:   1%|          | 1/143 [00:08<21:12,  8.96s/it]
Training 21/40:   9%|▉         | 13/143 [00:09<01:05,  1.99it/s]
Training 21/40:  17%|█▋        | 24/143 [00:09<00:27,  4.35it/s]
Training 21/40:  27%|██▋       | 38/143 [00:09<00:12,  8.45it/s]
Training 21/40: 100%|██████████| 143/143 [00:09<00:00, 14.61it/s]


Epoch 20: train_loss=0.1194, val_loss=0.1182



Training 22/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 22/40:   1%|          | 1/143 [00:09<22:11,  9.38s/it]
Training 22/40:   8%|▊         | 12/143 [00:09<01:14,  1.75it/s]
Training 22/40:  16%|█▌        | 23/143 [00:09<00:29,  4.02it/s]
Training 22/40: 100%|██████████| 143/143 [00:10<00:00, 14.07it/s]


Epoch 21: train_loss=0.1167, val_loss=0.1156



Training 23/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 23/40:   1%|          | 1/143 [00:09<23:35,  9.97s/it]
Training 23/40:   7%|▋         | 10/143 [00:10<01:37,  1.37it/s]
Training 23/40:  15%|█▍        | 21/143 [00:10<00:34,  3.52it/s]
Training 23/40: 100%|██████████| 143/143 [00:10<00:00, 13.34it/s]


Epoch 22: train_loss=0.1189, val_loss=0.1160



Training 24/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 24/40:   1%|          | 1/143 [00:09<21:26,  9.06s/it]
Training 24/40:   7%|▋         | 10/143 [00:09<01:28,  1.50it/s]
Training 24/40:  15%|█▌        | 22/143 [00:09<00:29,  4.07it/s]
Training 24/40:  25%|██▌       | 36/143 [00:09<00:13,  8.13it/s]
Training 24/40:  38%|███▊      | 54/143 [00:09<00:05, 15.17it/s]
Training 24/40:  52%|█████▏    | 74/143 [00:09<00:02, 25.29it/s]
Training 24/40:  69%|██████▉   | 99/143 [00:09<00:01, 41.64it/s]
Training 24/40: 100%|██████████| 143/143 [00:10<00:00, 13.99it/s]


Epoch 23: train_loss=0.1179, val_loss=0.1304



Training 25/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 25/40:   1%|          | 1/143 [00:09<21:32,  9.11s/it]
Training 25/40:   9%|▉         | 13/143 [00:09<01:06,  1.96it/s]
Training 25/40:  17%|█▋        | 24/143 [00:09<00:27,  4.28it/s]
Training 25/40: 100%|██████████| 143/143 [00:09<00:00, 14.46it/s]


Epoch 24: train_loss=0.1186, val_loss=0.1340



Training 26/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 26/40:   1%|          | 1/143 [00:09<21:48,  9.22s/it]
Training 26/40:   8%|▊         | 11/143 [00:09<01:20,  1.63it/s]
Training 26/40:  15%|█▍        | 21/143 [00:09<00:32,  3.72it/s]
Training 26/40:  23%|██▎       | 33/143 [00:09<00:15,  7.11it/s]
Training 26/40:  32%|███▏      | 46/143 [00:09<00:08, 12.03it/s]
Training 26/40:  43%|████▎     | 62/143 [00:09<00:04, 20.03it/s]
Training 26/40:  59%|█████▊    | 84/143 [00:09<00:01, 34.34it/s]
Training 26/40:  75%|███████▍  | 107/143 [00:09<00:00, 52.35it/s]
Training 26/40: 100%|██████████| 143/143 [00:10<00:00, 13.60it/s]


Epoch 25: train_loss=0.1171, val_loss=0.1313



Training 27/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 27/40:   1%|          | 1/143 [00:09<21:20,  9.02s/it]
Training 27/40:   7%|▋         | 10/143 [00:09<01:28,  1.51it/s]
Training 27/40:  14%|█▍        | 20/143 [00:09<00:33,  3.65it/s]
Training 27/40:  22%|██▏       | 32/143 [00:09<00:15,  7.15it/s]
Training 27/40:  36%|███▋      | 52/143 [00:09<00:06, 15.09it/s]
Training 27/40:  51%|█████     | 73/143 [00:09<00:02, 25.91it/s]
Training 27/40:  69%|██████▊   | 98/143 [00:09<00:01, 42.21it/s]
Training 27/40: 100%|██████████| 143/143 [00:10<00:00, 14.09it/s]


Epoch 26: train_loss=0.1189, val_loss=0.1186



Training 28/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 28/40:   1%|          | 1/143 [00:09<21:35,  9.12s/it]
Training 28/40:   8%|▊         | 11/143 [00:09<01:20,  1.65it/s]
Training 28/40:  15%|█▌        | 22/143 [00:09<00:30,  3.97it/s]
Training 28/40:  25%|██▌       | 36/143 [00:09<00:13,  8.03it/s]
Training 28/40:  37%|███▋      | 53/143 [00:09<00:06, 14.63it/s]
Training 28/40:  53%|█████▎    | 76/143 [00:09<00:02, 26.44it/s]
Training 28/40:  74%|███████▍  | 106/143 [00:09<00:00, 46.24it/s]
Training 28/40: 100%|██████████| 143/143 [00:10<00:00, 13.93it/s]


Epoch 27: train_loss=0.1156, val_loss=0.1153



Training 29/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 29/40:   1%|          | 1/143 [00:08<20:59,  8.87s/it]
Training 29/40:   8%|▊         | 12/143 [00:08<01:10,  1.85it/s]
Training 29/40:  15%|█▌        | 22/143 [00:09<00:30,  4.01it/s]
Training 29/40: 100%|██████████| 143/143 [00:09<00:00, 14.75it/s]


Epoch 28: train_loss=0.1166, val_loss=0.1178



Training 30/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 30/40:   1%|          | 1/143 [00:09<22:08,  9.35s/it]
Training 30/40:   8%|▊         | 12/143 [00:09<01:14,  1.76it/s]
Training 30/40:  16%|█▌        | 23/143 [00:09<00:29,  4.03it/s]
Training 30/40:  25%|██▌       | 36/143 [00:09<00:13,  7.67it/s]
Training 30/40:  41%|████      | 58/143 [00:09<00:05, 16.11it/s]
Training 30/40:  55%|█████▍    | 78/143 [00:09<00:02, 25.98it/s]
Training 30/40:  69%|██████▉   | 99/143 [00:09<00:01, 38.99it/s]
Training 30/40: 100%|██████████| 143/143 [00:10<00:00, 13.62it/s]


Epoch 29: train_loss=0.1186, val_loss=0.1369



Training 31/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 31/40:   1%|          | 1/143 [00:09<21:22,  9.03s/it]
Training 31/40:   8%|▊         | 12/143 [00:09<01:11,  1.82it/s]
Training 31/40:  17%|█▋        | 24/143 [00:09<00:27,  4.38it/s]
Training 31/40:  25%|██▌       | 36/143 [00:09<00:13,  7.85it/s]
Training 31/40:  41%|████▏     | 59/143 [00:09<00:04, 16.96it/s]
Training 31/40:  55%|█████▍    | 78/143 [00:09<00:02, 26.52it/s]
Training 31/40:  76%|███████▌  | 109/143 [00:09<00:00, 47.14it/s]
Training 31/40: 100%|██████████| 143/143 [00:10<00:00, 14.08it/s]


Epoch 30: train_loss=0.1209, val_loss=0.1145



Training 32/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 32/40:   1%|          | 1/143 [00:09<21:34,  9.12s/it]
Training 32/40:   8%|▊         | 12/143 [00:09<01:12,  1.80it/s]
Training 32/40:  15%|█▌        | 22/143 [00:09<00:30,  3.91it/s]
Training 32/40:  24%|██▍       | 35/143 [00:09<00:14,  7.67it/s]
Training 32/40:  36%|███▌      | 51/143 [00:09<00:06, 13.86it/s]
Training 32/40:  50%|█████     | 72/143 [00:09<00:02, 24.61it/s]
Training 32/40:  67%|██████▋   | 96/143 [00:09<00:01, 40.17it/s]
Training 32/40: 100%|██████████| 143/143 [00:10<00:00, 13.84it/s]


Epoch 31: train_loss=0.1190, val_loss=0.1160



Training 33/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 33/40:   1%|          | 1/143 [00:09<22:12,  9.38s/it]
Training 33/40:   8%|▊         | 11/143 [00:09<01:22,  1.60it/s]
Training 33/40:  16%|█▌        | 23/143 [00:09<00:29,  4.09it/s]
Training 33/40:  25%|██▌       | 36/143 [00:09<00:13,  7.74it/s]
Training 33/40:  37%|███▋      | 53/143 [00:09<00:06, 14.20it/s]
Training 33/40:  52%|█████▏    | 74/143 [00:09<00:02, 24.65it/s]
Training 33/40:  68%|██████▊   | 97/143 [00:09<00:01, 39.27it/s]
Training 33/40: 100%|██████████| 143/143 [00:10<00:00, 13.55it/s]


Epoch 32: train_loss=0.1166, val_loss=0.1227



Training 34/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 34/40:   1%|          | 1/143 [00:08<20:57,  8.86s/it]
Training 34/40:   7%|▋         | 10/143 [00:08<01:26,  1.54it/s]
Training 34/40:  15%|█▍        | 21/143 [00:09<00:30,  3.94it/s]
Training 34/40: 100%|██████████| 143/143 [00:09<00:00, 14.78it/s]


Epoch 33: train_loss=0.1173, val_loss=0.1146



Training 35/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 35/40:   1%|          | 1/143 [00:08<20:53,  8.83s/it]
Training 35/40:   7%|▋         | 10/143 [00:08<01:26,  1.54it/s]
Training 35/40:  15%|█▍        | 21/143 [00:09<00:30,  3.95it/s]
Training 35/40:  26%|██▌       | 37/143 [00:09<00:12,  8.74it/s]
Training 35/40:  38%|███▊      | 55/143 [00:09<00:05, 15.92it/s]
Training 35/40:  54%|█████▍    | 77/143 [00:09<00:02, 27.45it/s]
Training 35/40:  72%|███████▏  | 103/143 [00:09<00:00, 44.81it/s]
Training 35/40: 100%|██████████| 143/143 [00:09<00:00, 14.37it/s]


Epoch 34: train_loss=0.1167, val_loss=0.1149



Training 36/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 36/40:   1%|          | 1/143 [00:08<21:10,  8.95s/it]
Training 36/40:   8%|▊         | 11/143 [00:09<01:18,  1.68it/s]
Training 36/40:  15%|█▌        | 22/143 [00:09<00:29,  4.06it/s]
Training 36/40:  34%|███▍      | 49/143 [00:09<00:07, 12.14it/s]
Training 36/40: 100%|██████████| 143/143 [00:09<00:00, 14.64it/s]


Epoch 35: train_loss=0.1172, val_loss=0.1221



Training 37/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 37/40:   1%|          | 1/143 [00:09<21:30,  9.09s/it]
Training 37/40:   8%|▊         | 12/143 [00:09<01:12,  1.81it/s]
Training 37/40:  15%|█▍        | 21/143 [00:09<00:32,  3.71it/s]
Training 37/40:  25%|██▌       | 36/143 [00:09<00:13,  8.08it/s]
Training 37/40:  36%|███▋      | 52/143 [00:09<00:06, 14.28it/s]
Training 37/40:  52%|█████▏    | 75/143 [00:09<00:02, 26.15it/s]
Training 37/40:  69%|██████▉   | 99/143 [00:09<00:01, 41.71it/s]
Training 37/40: 100%|██████████| 143/143 [00:10<00:00, 13.92it/s]


Epoch 36: train_loss=0.1135, val_loss=0.1145



Training 38/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 38/40:   1%|          | 1/143 [00:08<21:05,  8.91s/it]
Training 38/40:   8%|▊         | 11/143 [00:09<01:18,  1.69it/s]
Training 38/40:  15%|█▌        | 22/143 [00:09<00:29,  4.07it/s]
Training 38/40:  24%|██▍       | 35/143 [00:09<00:13,  7.89it/s]
Training 38/40:  38%|███▊      | 54/143 [00:09<00:05, 15.44it/s]
Training 38/40:  53%|█████▎    | 76/143 [00:09<00:02, 26.83it/s]
Training 38/40:  73%|███████▎  | 104/143 [00:09<00:00, 45.54it/s]
Training 38/40: 100%|██████████| 143/143 [00:10<00:00, 14.18it/s]


Epoch 37: train_loss=0.1168, val_loss=0.1147



Training 39/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 39/40:   1%|          | 1/143 [00:09<21:35,  9.13s/it]
Training 39/40:   8%|▊         | 12/143 [00:09<01:12,  1.80it/s]
Training 39/40:  17%|█▋        | 24/143 [00:09<00:27,  4.34it/s]
Training 39/40: 100%|██████████| 143/143 [00:09<00:00, 14.43it/s]


Epoch 38: train_loss=0.1146, val_loss=0.1129



Training 40/40:   0%|          | 0/143 [00:00<?, ?it/s]
Training 40/40:   1%|          | 1/143 [00:09<21:36,  9.13s/it]
Training 40/40:   8%|▊         | 12/143 [00:09<01:12,  1.80it/s]
Training 40/40:  16%|█▌        | 23/143 [00:09<00:29,  4.13it/s]
Training 40/40:  26%|██▌       | 37/143 [00:09<00:12,  8.17it/s]
Training 40/40:  38%|███▊      | 54/143 [00:09<00:06, 14.76it/s]
Training 40/40:  51%|█████     | 73/143 [00:09<00:02, 24.39it/s]
Training 40/40:  69%|██████▊   | 98/143 [00:09<00:01, 40.78it/s]
Training 40/40: 100%|██████████| 143/143 [00:10<00:00, 13.88it/s]


Epoch 39: train_loss=0.1138, val_loss=0.1138


2025-06-02 11:30:49,755 - __main__ - INFO - Saved mlp probe for layer 11 to cache\probes\phase1_ijepa_viewpoint_probing\mlp_layer_11_probe.pth
100%|██████████| 6/6 [4:30:24<00:00, 2704.14s/it]


In [8]:
logger.info("Saving results...")
result_path = experiment.save_results(results)

2025-06-02 11:31:10,400 - __main__ - INFO - Saving results...
2025-06-02 11:31:10,418 - __main__ - INFO - Results saved to results\phase1_ijepa_viewpoint_probing\results.json


In [9]:
from src.analysis.layer_analysis import analyze_experiment_results

logger.info("Creating analysis and visualizations...")
analyze_experiment_results(result_path, output_dir=result_path.parent)

logger.info("Results analyzed! Please see the results and analysis_results folders for the outcomes.")

2025-06-02 11:31:10,434 - __main__ - INFO - Creating analysis and visualizations...
2025-06-02 11:31:12,821 - src.analysis.layer_analysis - INFO - Analysis report saved to results\phase1_ijepa_viewpoint_probing\layer_analysis_report.json
2025-06-02 11:31:12,821 - __main__ - INFO - Results analyzed! Please see the results and analysis_results folders for the outcomes.
