In [1]:
import os
import time
import wandb
import torch
import os
import pickle
import wandb
import pandas as pd
import numpy as np
import random
import timm
import ultralytics
from typing import Dict, List, Tuple, Any

# Torch imports
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, transforms, models

from torchvision.models import (
    vgg16, vgg19, mobilenet_v2, densenet121, inception_v3, 
    efficientnet_b0, efficientnet_b3, efficientnet_b7,
    resnet50,  
    VGG16_Weights, VGG19_Weights, MobileNet_V2_Weights, 
    DenseNet121_Weights, Inception_V3_Weights, 
    EfficientNet_B0_Weights, EfficientNet_B3_Weights, 
    EfficientNet_B7_Weights,
    ResNet50_Weights  # Add ResNet50 weights
)
SAMPLE_FRACTION = 1
PROJECT_NAME = "MosquitoLarvae-Classification-All3"
EPOCH1 = 50
EPOCH2 = 200
PATIENCE1 = 10
PATIENCE2 = 50


üîÑ Cell started at: 2025-07-30 21:33:45
‚úÖ Cell finished in 2.91 seconds



In [2]:
class WandbLogger:
    def __init__(self, local_dir: str = './wandb_logs'):
        """
        Initialize a robust wandb logger with local backup
        
        Args:
            local_dir (str): Directory to store local logs
        """
        self.local_dir = local_dir
        
        # Create local logging directory
        os.makedirs(self.local_dir, exist_ok=True)
        
        # Tracking for local logs
        self.current_run_name = None
        self.logs = []
    
    def _save_local_log(self, log_data: Dict[str, Any]):
        """
        Save log data locally
        
        Args:
            log_data (Dict): Log data to save
        """
        try:
            # Generate unique filename
            import pandas as pd
            timestamp = pd.Timestamp.now().strftime("%Y%m%d_%H%M%S")
            filename = f"{self.current_run_name}_{timestamp}_log.pkl"
            filepath = os.path.join(self.local_dir, filename)
            
            # Save log data
            with open(filepath, 'wb') as f:
                pickle.dump(log_data, f)
            
            print(f"Local log saved: {filename}")
        except Exception as e:
            print(f"Error saving local log: {e}")
    
    def _check_and_sync_logs(self, operation: str):
        """
        Attempt to sync local logs before a wandb operation
        
        Args:
            operation (str): The wandb operation being attempted
        """
        try:
            # List local log files
            local_logs = [f for f in os.listdir(self.local_dir) if f.endswith('.pkl')]
            
            if local_logs:
                print(f"Attempting to sync {len(local_logs)} local logs before {operation}")
                
                # Ensure login
                wandb.login(timeout=10)
                
                for log_file in local_logs:
                    filepath = os.path.join(self.local_dir, log_file)
                    
                    try:
                        # Load local log
                        with open(filepath, 'rb') as f:
                            log_data = pickle.load(f)
                        
                        # Extract run name from filename
                        run_name = '_'.join(log_file.split('_')[:4])  # Capture full run name
                        
                        # Initialize run if not already initialized
                        if not wandb.run:
                            wandb.init(
                                project=PROJECT_NAME,
                                name=run_name
                            )
                        
                        # Log data
                        wandb.log(log_data)
                        
                        # Remove successfully synced log
                        os.remove(filepath)
                        print(f"Synced and removed: {log_file}")
                    
                    except Exception as sync_error:
                        print(f"Error syncing {log_file}: {sync_error}")
                
                # Finish the run if it was temporarily initialized
                if wandb.run:
                    wandb.finish()
        
        except Exception as e:
            print(f"Sync attempt before {operation} failed: {e}")
    
    def init(self, run_name: str, config: Dict[str, Any] = None):
        """
        Initialize wandb run with network disconnection handling and log syncing
        
        Args:
            run_name (str): Name of the run
            config (Dict, optional): Configuration dictionary
        
        Returns:
            bool: Whether initialization was successful
        """
        # Attempt to sync logs before initialization
        self._check_and_sync_logs("init")
        
        self.current_run_name = run_name
        
        try:
            # Attempt wandb login
            wandb.login(timeout=10)
            
            # Initialize run
            wandb.init(
                project=PROJECT_NAME,
                name=run_name,
                config=config or {}
            )
            return True
        
        except Exception as e:
            print(f"Wandb initialization failed: {e}")
            print("Falling back to local logging")
            return False
    
    def log(self, log_data: Dict[str, Any], force_local: bool = False):
        """
        Log data with network disconnection handling
        
        Args:
            log_data (Dict): Data to log
            force_local (bool, optional): Force local logging
        
        Returns:
            bool: Whether logging was successful
        """
        try:
            # If not force_local, try wandb logging
            if not force_local:
                wandb.log(log_data)
                return True
            
            # Fallback to local logging
            raise Exception("Forced local logging")
        
        except Exception as e:
            print(f"Wandb logging failed: {e}")
            print("Saving log locally")
            
            # Save to local storage
            self._save_local_log(log_data)
            return False
    
    def finish(self):
        """
        Finish the current run with network disconnection handling
        """
        try:
            wandb.finish()
        except Exception as e:
            print(f"Wandb finish failed: {e}")
    
    def sync_local_logs(self):
        """
        Sync local logs to wandb when network is available
        """
        try:
            # Ensure login
            wandb.login()
            
            # Find all local log files
            local_logs = [f for f in os.listdir(self.local_dir) if f.endswith('.pkl')]
            
            for log_file in local_logs:
                filepath = os.path.join(self.local_dir, log_file)
                
                try:
                    # Load local log
                    with open(filepath, 'rb') as f:
                        log_data = pickle.load(f)
                    
                    # Extract full run name
                    run_name = '_'.join(log_file.split('_')[:4])
                    
                    # Initialize run if needed
                    if not wandb.run:
                        wandb.init(
                            project=PROJECT_NAME,
                            name=run_name
                        )
                    
                    # Log data
                    wandb.log(log_data)
                    
                    # Remove successfully synced log
                    os.remove(filepath)
                    print(f"Synced and removed: {log_file}")
                
                except Exception as sync_error:
                    print(f"Error syncing {log_file}: {sync_error}")
        
        except Exception as e:
            print(f"Sync failed: {e}")


üîÑ Cell started at: 2025-07-30 21:33:50
‚úÖ Cell finished in 0.00 seconds



In [3]:
# Configuration and Setup
def configure_device():
    """
    Detect and configure the available computing device.
    
    Returns:
        torch.device: CUDA device if available, else CPU
    """
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    
    if device.type == 'cuda':
        # Print GPU details
        print(f"GPU Name: {torch.cuda.get_device_name(0)}")
        print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9} GB")
    
    return device




üîÑ Cell started at: 2025-07-30 21:34:01
‚úÖ Cell finished in 0.00 seconds



In [4]:
def get_data_transforms(input_size: int):
    """
    Create data transformations for training and validation.
    
    Args:
        input_size (int): Size to resize images
    
    Returns:
        tuple: Training and validation transforms
    """
    train_transforms = transforms.Compose([
        transforms.Resize((input_size, input_size)),
        transforms.RandomRotation(45),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    val_transforms = transforms.Compose([
        transforms.Resize((input_size, input_size)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    return train_transforms, val_transforms




üîÑ Cell started at: 2025-07-30 21:34:39
‚úÖ Cell finished in 0.00 seconds



In [5]:
def create_data_loaders(
    base_dir: str, 
    dataset_type: str, 
    split_num: int, 
    input_size: int, 
    batch_size: int = 32,
    sample_fraction: float = 0.01  # default onl 1% but SAMPLE_FRACTION overrules this
):
    """
    Create data loaders with optional dataset sampling.
    
    Args:
        base_dir (str): Base directory of dataset
        dataset_type (str): 'Original' or 'Augmented'
        split_num (int): Dataset split number
        input_size (int): Image input size
        batch_size (int, optional): Batch size. Defaults to 32.
        sample_fraction (float, optional): Fraction of dataset to use. Defaults to 0.01 (1%).
    
    Returns:
        tuple: Train, validation, test, and blurred test data loaders
    """
    # Construct directory paths
    split_dir = os.path.join(base_dir, f"{dataset_type}_Larvae_Split{split_num}")
    train_dir = os.path.join(split_dir, 'train')
    val_dir = os.path.join(split_dir, 'val')
    test_dir = os.path.join(split_dir, 'test')
    blurred_test_dir = os.path.join(split_dir, 'test-blurred')
    
    # Get transforms
    train_transforms, val_transforms = get_data_transforms(input_size)
    
    # Create full datasets
    train_dataset = datasets.ImageFolder(train_dir, transform=train_transforms)
    val_dataset = datasets.ImageFolder(val_dir, transform=val_transforms)
    test_dataset = datasets.ImageFolder(test_dir, transform=val_transforms)
    blurred_test_dataset = datasets.ImageFolder(blurred_test_dir, transform=val_transforms)
    
    # Sample datasets if sample_fraction < 1
    def sample_dataset(dataset, fraction):
        """
        Sample a fraction of the dataset while maintaining class balance
        """
        if fraction >= 1:
            return dataset
        
        # Get indices for each class
        class_indices = {}
        for idx, (_, label) in enumerate(dataset.samples):
            if label not in class_indices:
                class_indices[label] = []
            class_indices[label].append(idx)
        
        # Sample balanced subset
        sampled_indices = []
        for label, indices in class_indices.items():
            num_samples = max(1, int(len(indices) * fraction))
            sampled_indices.extend(random.sample(indices, num_samples))
        
        return Subset(dataset, sampled_indices)
    
    # Sample datasets
    train_dataset = sample_dataset(train_dataset, sample_fraction)
    val_dataset = sample_dataset(val_dataset, sample_fraction)
    test_dataset = sample_dataset(test_dataset, sample_fraction)
    blurred_test_dataset = sample_dataset(blurred_test_dataset, sample_fraction)
    
    # Print dataset sizes
    print(f"Dataset Sampling for {dataset_type} Split {split_num}:")
    print(f"Train dataset size: {len(train_dataset)} samples")
    print(f"Validation dataset size: {len(val_dataset)} samples")
    print(f"Test dataset size: {len(test_dataset)} samples")
    print(f"Blurred test dataset size: {len(blurred_test_dataset)} samples")
    
    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
    blurred_test_loader = DataLoader(blurred_test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
    
    return train_loader, val_loader, test_loader, blurred_test_loader



üîÑ Cell started at: 2025-07-30 21:34:39
‚úÖ Cell finished in 0.00 seconds



In [6]:
def create_model(
    model_name: str, 
    num_classes: int, 
    input_size: int, 
    device: torch.device, 
    freeze_backbone: bool = True,
    custom_path: str = None
):
    """
    Create a transfer learning model with modified classifier.
    
    Args:
        model_name (str): Name of the model
        num_classes (int): Number of output classes
        input_size (int): Input image size
        device (torch.device): Device to load model on
        freeze_backbone (bool, optional): Freeze backbone layers. Defaults to True.
        custom_path (str, optional): Path to custom trained model
    
    Returns:
        nn.Module: Modified pre-trained model
    """
    # Existing model dictionary, InceptionV3, InceptionResNetV2, and the YOLOs need their customize code
    model_dict = {
        'VGG16': (models.vgg16, models.VGG16_Weights.IMAGENET1K_V1),
        'VGG19': (models.vgg19, models.VGG19_Weights.IMAGENET1K_V1),
        'MobileNet': (models.mobilenet_v2, models.MobileNet_V2_Weights.IMAGENET1K_V2),
        'DenseNet121': (models.densenet121, models.DenseNet121_Weights.IMAGENET1K_V1),
        'ResNet50': (models.resnet50, models.ResNet50_Weights.IMAGENET1K_V1),
        'EfficientNetB0': (models.efficientnet_b0, models.EfficientNet_B0_Weights.IMAGENET1K_V1),
        'EfficientNetB3': (models.efficientnet_b3, models.EfficientNet_B3_Weights.IMAGENET1K_V1),
    }
    
    # YOLO model handling
    if model_name in ['YOLOv8n-cls', 'Dilated-YOLOv8n-cls', 'YOLO11n-cls'] :
        from ultralytics import YOLO
        
        # Load YOLO model
        model = YOLO(custom_path)
        
        # Convert to PyTorch model
        yolo_model = model.model
        
        # Create a wrapper to handle YOLO's classification model
        class YOLOClassificationWrapper(nn.Module):
            def __init__(self, yolo_model, num_classes, model_name):
                super().__init__()
                
                # Store the original YOLO model
                self.yolo_model = yolo_model
                
                # Feature extraction layer is all layers except the last classifier
                self.feature_layer = self.yolo_model.model[:-1]
                
                # Original classifier is always the last layer
                self.original_classifier = self.yolo_model.model[-1]
                
                # Create a new classification head that mimics the original structure
                self.head = nn.Sequential(
                    # Convolutional layer matching the original
                    nn.Conv2d(256, 1280, kernel_size=1, bias=False),
                    nn.BatchNorm2d(1280, eps=1e-05, momentum=0.1),
                    nn.SiLU(inplace=True),
                    
                    # Adaptive Average Pooling
                    nn.AdaptiveAvgPool2d(1),
                    
                    # Dropout (optional, can be configured)
                    nn.Dropout(p=0.0, inplace=True),
                    
                    # Flatten
                    nn.Flatten(),
                    
                    # Linear layer for classification
                    nn.Linear(1280, num_classes)
                )
            
            def forward(self, x):
                # Extract features from the YOLO model
                try:
                    # Extract features using all layers except the last classifier
                    features = self.feature_layer(x)
                    
                    # Apply the new classification head
                    return self.head(features)
                
                except Exception as e:
                    print(f"Feature extraction error: {e}")
                    # If extraction fails, try a different approach
                    features = self.yolo_model(x)
                    
                    # If features are still not right, raise the error
                    if not isinstance(features, torch.Tensor):
                        raise ValueError("Unable to extract features from YOLO model")
                    
                    return self.head(features)
        
        # Wrap the YOLO model
        model = YOLOClassificationWrapper(yolo_model, num_classes, model_name)    
            
        # Freeze backbone if required
        if freeze_backbone:
            for param in model.feature_layer.parameters():
                param.requires_grad = False
            
            # Ensure head is trainable
            for param in model.head.parameters():
                param.requires_grad = True            
               
               
    elif model_name == 'InceptionV3':
        # Load InceptionV3 with pretrained weights
        model = models.inception_v3(weights=models.Inception_V3_Weights.IMAGENET1K_V1)
        
        # Modify the model to handle training vs. evaluation differently
        original_forward = model.forward
        
        def custom_forward(x):
            # During training, InceptionV3 returns a special output type
            if model.training:
                outputs = original_forward(x)
                # Return the main logits during training
                return outputs.logits
            else:
                # During evaluation, return standard output
                return original_forward(x)
        
        # Replace the forward method
        model.forward = custom_forward
        
        # Modify the classification head
        num_features = model.fc.in_features
        model.fc = nn.Linear(num_features, num_classes)
        
        # Freeze backbone if required
        if freeze_backbone:
            for param in model.parameters():
                param.requires_grad = False
            
            # Unfreeze classification head
            for param in model.fc.parameters():
                param.requires_grad = True
    
    elif model_name == 'InceptionResNetV2':
        import timm
        
        # Load TIMM model
        model = timm.create_model('inception_resnet_v2', pretrained=True, num_classes=0)
        
        # Modify the model to ensure proper feature extraction and classification
        class InceptionResNetV2Wrapper(nn.Module):
            def __init__(self, base_model, num_classes, input_size):
                super().__init__()
                self.features = base_model
                
                # Remove any existing classification head
                if hasattr(self.features, 'head'):
                    delattr(self.features, 'head')
                
                # Global average pooling to reduce feature dimensions
                self.global_pool = nn.AdaptiveAvgPool2d(1)
                
                # Get number of features
                num_features = self.features.num_features
                
                # Create new classification head
                self.head = nn.Sequential(
                    nn.Flatten(),
                    nn.Linear(num_features, 512),
                    nn.ReLU(),
                    nn.Dropout(0.5),
                    nn.Linear(512, num_classes)
                )
            
            def forward(self, x):
                # Extract features
                features = self.features.forward_features(x)
                
                # Apply global pooling
                pooled_features = self.global_pool(features)
                
                # Classification
                return self.head(pooled_features)
        
        # Wrap the model
        model = InceptionResNetV2Wrapper(model, num_classes, input_size)
        
        # Freeze backbone if required
        if freeze_backbone:
            for param in model.features.parameters():
                param.requires_grad = False
        
        # Ensure head parameters are trainable
        for param in model.head.parameters():
            param.requires_grad = True                
    else:
        # Existing model creation logic
        if model_name not in model_dict:
            raise ValueError(f"Unsupported model: {model_name}")
        
        model_func, model_weights = model_dict[model_name]
        
        # Load model with specific weights
        model = model_func(weights=model_weights)
        
        # Freeze backbone if required
        if freeze_backbone:
            for param in model.parameters():
                param.requires_grad = False
        
        # Modify classifier based on model architecture
        if model_name.startswith('VGG'):
            num_features = model.classifier[0].in_features
            model.classifier = nn.Sequential(
                nn.Linear(num_features, 512),
                nn.ReLU(),
                nn.Dropout(0.5),
                nn.Linear(512, num_classes)
            )
        
        elif model_name.startswith(('MobileNet', 'EfficientNet')):
            num_features = model.classifier[1].in_features
            model.classifier = nn.Linear(num_features, num_classes)
        
        elif model_name == 'DenseNet121':
            num_features = model.classifier.in_features
            model.classifier = nn.Linear(num_features, num_classes)
                
        elif model_name == 'ResNet50':
            num_features = model.fc.in_features
            model.fc = nn.Sequential(
                nn.Linear(num_features, 512),
                nn.ReLU(),
                nn.Dropout(0.5),
                nn.Linear(512, num_classes)
            )
    
    
    
    # Print model information
    print(f"Created {model_name} model:")
    print(f"Backbone frozen: {freeze_backbone}")
    print(f"Input size: {input_size}")
    print(f"Number of classes: {num_classes}")
    
    # Count trainable parameters
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total_params = sum(p.numel() for p in model.parameters())
    print(f"Trainable parameters: {trainable_params}")
    print(f"Total parameters: {total_params}")
    print(f"Trainable percentage: {100 * trainable_params / total_params:.2f}%")
    
    return model.to(device)



üîÑ Cell started at: 2025-07-30 21:34:39
‚úÖ Cell finished in 0.00 seconds



In [7]:
def evaluate_model(
    model: nn.Module, 
    test_loader: DataLoader, 
    blurred_test_loader: DataLoader, 
    device: torch.device,
    model_name: str,
    dataset_type: str,
    split_num: int,
    wandb_logger: WandbLogger = None
):
    """
    Evaluate model performance on test and blurred test sets.
    """
    # Prepare run name with Evaluation phase
    run_name = f"{dataset_type}_Split{split_num}_{model_name}_Evaluation"
    
    # Prepare configuration for logging
    config = {
        "architecture": model_name,
        "dataset": dataset_type,
        "split": split_num,
        "stage": "Evaluation"
    }
    
    # Initialize wandb run with specific name
    if wandb_logger:
        wandb.init(
            project=PROJECT_NAME,
            name=run_name,
            config=config
        )
    
    try:
        model.eval()
        
        def compute_detailed_metrics(loader, set_name):
            """
            Compute comprehensive metrics for a given dataset
            """
            correct = 0
            total = 0
            class_correct = [0] * 4  # Assuming 4 classes
            class_total = [0] * 4
            
            # Use torch.int to ensure integer type
            confusion_matrix = torch.zeros(4, 4, dtype=torch.int)
            
            with torch.no_grad():
                for inputs, labels in loader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    outputs = model(inputs)
                    
                    # Get predictions
                    _, predicted = torch.max(outputs.data, 1)
                    
                    # Update metrics
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()
                    
                    # Class-wise accuracy
                    for t, p in zip(labels, predicted):
                        confusion_matrix[t.long(), p.long()] += 1
                    
                    for i in range(4):
                        class_mask = (labels == i)
                        class_correct[i] += (predicted[class_mask] == labels[class_mask]).sum().item()
                        class_total[i] += class_mask.sum().item()
            
            # Compute metrics
            accuracy = 100 * correct / total
            class_accuracies = [
                100 * class_correct[i] / class_total[i] if class_total[i] > 0 else 0 
                for i in range(4)
            ]
            
            # Log metrics
            if wandb_logger:
                wandb.log({
                    f'{set_name}_overall_accuracy': accuracy,
                    **{f'{set_name}_class{i}_accuracy': acc for i, acc in enumerate(class_accuracies)}
                })
            
            return {
                'accuracy': accuracy,
                'class_accuracies': class_accuracies,
                'confusion_matrix': confusion_matrix.numpy()
            }
        
        # Compute metrics for test and blurred test sets
        test_metrics = compute_detailed_metrics(test_loader, 'test')
        blurred_test_metrics = compute_detailed_metrics(blurred_test_loader, 'blurred_test')
        
        # Final logging
        if wandb_logger:
            wandb.log({
                'test_accuracy': test_metrics['accuracy'],
                'blurred_test_accuracy': blurred_test_metrics['accuracy']
            })
            
            # Finish the wandb run
            wandb.finish()
    
    except Exception as e:
        print(f"Error in evaluation: {e}")
        if wandb_logger:
            wandb.log({"evaluation_error": str(e)})
            wandb.finish()
        raise
    
    # Prepare return dictionary
    result_dict = {
        'test_accuracy': test_metrics['accuracy'],
        'blurred_test_accuracy': blurred_test_metrics['accuracy'],
        'test_class_accuracies': test_metrics['class_accuracies'],
        'blurred_test_class_accuracies': blurred_test_metrics['class_accuracies']
    }
    
    return result_dict


üîÑ Cell started at: 2025-07-30 21:34:39
‚úÖ Cell finished in 0.00 seconds



In [8]:
def generate_summary_report(results_df):
    """
    Generate a comprehensive summary report of model performances.
    
    Args:
        results_df (pd.DataFrame): DataFrame containing training results
    """
    # Ensure matplotlib uses a backend that doesn't require a display
    import matplotlib
    matplotlib.use('Agg')
    
    import matplotlib.pyplot as plt
    import seaborn as sns
    
    # Create a directory for reports if it doesn't exist
    os.makedirs('reports', exist_ok=True)
    
    # 1. Basic Statistical Summary
    summary = results_df.groupby(['Model', 'Dataset']).agg({
        'test_accuracy': ['mean', 'std'],
        'blurred_test_accuracy': ['mean', 'std'],
        'Phase2_Training_Time': 'mean'
    }).reset_index()
    
    # Flatten multi-level column names
    summary.columns = [
        'Model', 'Dataset', 
        'Test_Accuracy_Mean', 'Test_Accuracy_Std', 
        'Blurred_Test_Accuracy_Mean', 'Blurred_Test_Accuracy_Std', 
        'Avg_Training_Time'
    ]
    
    # Save summary to CSV
    summary.to_csv('reports/model_performance_summary.csv', index=False)
    
    # 2. Detailed Visualization
    try:
        # Prepare data for plotting
        plt.figure(figsize=(15, 10))
        
        # Subplot 1: Test Accuracy Comparison
        plt.subplot(2, 2, 1)
        sns.barplot(
            x='Model', 
            y='test_accuracy', 
            hue='Dataset', 
            data=results_df,
            errorbar='sd'  # Show standard deviation
        )
        plt.title('Test Accuracy Across Models and Datasets')
        plt.xticks(rotation=45, ha='right')
        plt.tight_layout()
        
        # Subplot 2: Blurred Test Accuracy Comparison
        plt.subplot(2, 2, 2)
        sns.barplot(
            x='Model', 
            y='blurred_test_accuracy', 
            hue='Dataset', 
            data=results_df,
            errorbar='sd'
        )
        plt.title('Blurred Test Accuracy Across Models and Datasets')
        plt.xticks(rotation=45, ha='right')
        plt.tight_layout()
        
        # Subplot 3: Training Time Comparison
        plt.subplot(2, 2, 3)
        sns.boxplot(
            x='Model', 
            y='Phase2_Training_Time', 
            hue='Dataset', 
            data=results_df
        )
        plt.title('Training Time Across Models and Datasets')
        plt.xticks(rotation=45, ha='right')
        plt.tight_layout()
        
        # Subplot 4: Accuracy Difference (Test vs Blurred)
        plt.subplot(2, 2, 4)
        results_df['Accuracy_Difference'] = results_df['test_accuracy'] - results_df['blurred_test_accuracy']
        sns.boxplot(
            x='Model', 
            y='Accuracy_Difference', 
            hue='Dataset', 
            data=results_df
        )
        plt.title('Accuracy Drop (Test vs Blurred)')
        plt.xticks(rotation=45, ha='right')
        plt.tight_layout()
        
        # Save the comprehensive plot
        plt.savefig('reports/model_performance_comparison.png', dpi=300, bbox_inches='tight')
        plt.close()
    
    except Exception as e:
        print(f"Error generating visualizations: {e}")
    
    # 3. Detailed Performance Analysis
    try:
        # Performance Ranking
        performance_ranking = results_df.groupby('Model').agg({
            'test_accuracy': 'mean',
            'blurred_test_accuracy': 'mean'
        }).sort_values('test_accuracy', ascending=False)
        
        # Save ranking
        performance_ranking.to_csv('reports/model_performance_ranking.csv')
        
        # Print ranking to console
        print("\nModel Performance Ranking:")
        print(performance_ranking)
    
    except Exception as e:
        print(f"Error generating performance ranking: {e}")
    
    # 4. Detailed LaTeX Summary Table
    try:
        # Generate LaTeX table
        latex_summary = summary.to_latex(
            index=False, 
            float_format="{:.2f}".format,
            caption="Model Performance Summary"
        )
        
        with open('reports/latex_summary_table.tex', 'w') as f:
            f.write(latex_summary)
    
    except Exception as e:
        print(f"Error generating LaTeX summary: {e}")



üîÑ Cell started at: 2025-07-30 21:34:39
‚úÖ Cell finished in 0.00 seconds



In [9]:
def train_model(
    model: nn.Module, 
    train_loader: DataLoader, 
    val_loader: DataLoader, 
    device: torch.device, 
    model_name: str, 
    dataset_type: str, 
    split_num: int,
    phase: str,
    num_epochs: int = 50, 
    patience: int = 10, 
    learning_rate: float = 1e-4,
    wandb_logger: WandbLogger = None
):
    """
    Train the model with comprehensive wandb logging.
    """
    # Prepare run name with phase
    run_name = f"{dataset_type}_Split{split_num}_{model_name}_{phase}"
    
    # Prepare configuration for logging
    config = {
        "architecture": model_name,
        "dataset": dataset_type,
        "split": split_num,
        "phase": phase,
        "epochs": num_epochs,
        "patience": patience,
        "initial_learning_rate": learning_rate
    }
    
    # Use WandbLogger for initialization
    if wandb_logger:
        # Attempt to initialize with the specific run name
        wandb_logger.current_run_name = run_name
        init_success = wandb_logger.init(run_name, config)
    
    try:
        # Loss and optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
        
        # Learning rate scheduler
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, 
            mode='min', 
            factor=0.1, 
            patience=max(5, num_epochs // 10)
        )
        
        # Training tracking
        best_val_loss = float('inf')
        patience_counter = 0
        start_time = time.time()
        
        # Metrics tracking
        train_losses = []
        val_losses = []
        train_accuracies = []
        val_accuracies = []
        learning_rates = []
        
        # Actual epochs might be less due to early stopping
        actual_epochs = 0
        
        for epoch in range(num_epochs):
            # Set model to training mode
            model.train()
            
            # Training phase
            epoch_train_loss = 0.0
            correct_train = 0
            total_train = 0
            
            for inputs, labels in train_loader:
                # Move data to device
                inputs, labels = inputs.to(device), labels.to(device)
                
                # Zero the parameter gradients
                optimizer.zero_grad()
                
                # Forward pass
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                
                # Backward pass and optimize
                loss.backward()
                optimizer.step()
                
                # Tracking training metrics
                epoch_train_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total_train += labels.size(0)
                correct_train += (predicted == labels).sum().item()
            
            # Calculate training metrics
            train_loss = epoch_train_loss / len(train_loader)
            train_accuracy = 100 * correct_train / total_train
            
            # Validation phase
            model.eval()
            epoch_val_loss = 0.0
            correct_val = 0
            total_val = 0
            
            with torch.no_grad():
                for inputs, labels in val_loader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    
                    epoch_val_loss += loss.item()
                    _, predicted = torch.max(outputs.data, 1)
                    total_val += labels.size(0)
                    correct_val += (predicted == labels).sum().item()
            
            # Calculate validation metrics
            val_loss = epoch_val_loss / len(val_loader)
            val_accuracy = 100 * correct_val / total_val
            
            # Update learning rate scheduler
            scheduler.step(val_loss)
            current_lr = optimizer.param_groups[0]['lr']
            
            # Store metrics
            train_losses.append(train_loss)
            val_losses.append(val_loss)
            train_accuracies.append(train_accuracy)
            val_accuracies.append(val_accuracy)
            learning_rates.append(current_lr)
            
            # Wandb logging
            if wandb_logger:
                wandb_logger.log({
                    'epoch': epoch,
                    'train_loss': train_loss,
                    'train_accuracy': train_accuracy,
                    'val_loss': val_loss,
                    'val_accuracy': val_accuracy,
                    'learning_rate': current_lr
                })
            
            # Early stopping logic
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                patience_counter = 0
                # Save the best model
                torch.save({
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'loss': best_val_loss
                }, f'{dataset_type}_Split{split_num}_{model_name}_{phase}_best_model.pth')
            else:
                patience_counter += 1
            
            actual_epochs += 1
            
            # Break if no improvement
            if patience_counter >= patience:
                print(f"Early stopping at epoch {epoch}")
                break
        
        # Calculate total training time
        training_time = time.time() - start_time
        
        # Final logging
        if wandb_logger:
            wandb_logger.log({
                'total_training_time': training_time,
                'actual_epochs': actual_epochs,
                'best_val_loss': best_val_loss
            })
            
            # Finish the wandb run
            wandb_logger.finish()
    
    except Exception as e:
        print(f"Error in training: {e}")
        if wandb_logger:
            wandb_logger.log({"training_error": str(e)})
            wandb_logger.finish()
        raise
    
    return {
        'model': model,
        'training_time': training_time,
        'actual_epochs': actual_epochs,
        'train_losses': train_losses,
        'val_losses': val_losses,
        'train_accuracies': train_accuracies,
        'val_accuracies': val_accuracies,
        'learning_rates': learning_rates
    }



üîÑ Cell started at: 2025-07-30 21:34:39
‚úÖ Cell finished in 0.00 seconds



In [10]:
def set_seed(seed: int = 42):
    """
    Set random seeds for reproducibility across various libraries.
    
    Args:
        seed (int, optional): Random seed value. Defaults to 42.
    """
    # Python's built-in random module
    import random
    random.seed(seed)
    
    # Numpy
    import numpy as np
    np.random.seed(seed)
    
    # PyTorch
    import torch
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # For multi-GPU setups
    
    # PyTorch reproducibility settings
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
    # Optional: Set environment variable for further reproducibility
    import os
    os.environ['PYTHONHASHSEED'] = str(seed)


üîÑ Cell started at: 2025-07-30 21:34:39
‚úÖ Cell finished in 0.00 seconds



In [11]:
import json

device = configure_device()

def main():
    # Ensure reproducibility
    set_seed()

    # Configuration
    base_dir = './datasets'
    dataset_types = ['Original', 'Augmented']
    splits = range(1, 6)
    num_classes = 4
    completed_file = 'completed_runs.json'

    # Load or initialize completed runs
    if os.path.exists(completed_file):
        with open(completed_file, 'r') as f:
            completed_runs = set(json.load(f))
    else:
        completed_runs = set()

    # Initialize wandb
    wandb_logger = WandbLogger()

    # Results tracking
    results = []

    # Define models (make sure this matches other cells or consolidate)
    models_config = [
 #       {'name': 'YOLOv8n-cls', 'input_size': 224, 'learning_rate': 0.01, 'weights': 'yolov8n-cls.pt'},
 #       {'name': 'Dilated-YOLOv8n-cls', 'input_size': 224, 'learning_rate': 0.01, 'weights': 'dilated-yolov8n-cls.pt'},
 #       {'name': 'YOLO11n-cls', 'input_size': 224, 'learning_rate': 0.01, 'weights': 'yolo11n-cls.pt'},
        {'name': 'VGG16', 'input_size': 224, 'learning_rate': 0.001},
        {'name': 'VGG19', 'input_size': 224, 'learning_rate': 0.001},
        {'name': 'MobileNet', 'input_size': 224, 'learning_rate': 0.001},
        {'name': 'DenseNet121', 'input_size': 224, 'learning_rate': 0.001},
        {'name': 'InceptionV3', 'input_size': 299, 'learning_rate': 0.001},
        {'name': 'ResNet50', 'input_size': 224, 'learning_rate': 0.001},
        {'name': 'InceptionResNetV2', 'input_size': 299, 'learning_rate': 0.001},
        {'name': 'EfficientNetB0', 'input_size': 224, 'learning_rate': 0.001},
        {'name': 'EfficientNetB3', 'input_size': 300, 'learning_rate': 0.001},
    ]
    
    for dataset_type in dataset_types:
        for split in splits:
            for model_config in models_config:
                run_id = f"{dataset_type}_Split{split}_{model_config['name']}"
                if run_id in completed_runs:
                    print(f"Skipping completed run: {run_id}")
                    continue

                try:
                    # Create data loaders
                    train_loader, val_loader, test_loader, blurred_test_loader = create_data_loaders(
                        base_dir,
                        dataset_type,
                        split,
                        model_config['input_size'],
                        sample_fraction=SAMPLE_FRACTION
                    )

                    custom_path = model_config['weights'] if 'YOLO' in model_config['name'] else None

                    # Phase 1
                    model_phase1 = create_model(
                        model_config['name'],
                        num_classes,
                        model_config['input_size'],
                        device,
                        freeze_backbone=True,
                        custom_path=custom_path
                    )
                    phase1_result = train_model(
                        model_phase1,
                        train_loader,
                        val_loader,
                        device,
                        model_config['name'],
                        dataset_type,
                        split,
                        phase='Transfer',
                        num_epochs=EPOCH1,
                        patience=PATIENCE1,
                        learning_rate=model_config['learning_rate'],
                        wandb_logger=wandb_logger
                    )

                    # Unfreeze all layers for Phase 2
                    for param in model_phase1.parameters():
                        param.requires_grad = True

                    # Train the same model (fine-tune it)
                    phase2_result = train_model(
                        model_phase1,  # continue with the trained model
                        train_loader,
                        val_loader,
                        device,
                        model_config['name'],
                        dataset_type,
                        split,
                        phase='Retrain',
                        num_epochs=EPOCH2,
                        patience=PATIENCE2,
                        learning_rate=model_config['learning_rate']/10,
                        wandb_logger=wandb_logger
                    )

                    # Evaluation
                    eval_metrics = evaluate_model(
                        model_phase1,
                        test_loader,
                        blurred_test_loader,
                        device,
                        model_config['name'],
                        dataset_type,
                        split,
                        wandb_logger=wandb_logger
                    )

                    # Record results
                    result_entry = {
                        'Dataset': dataset_type,
                        'Split': split,
                        'Model': model_config['name'],
                        'Phase1_Epochs': phase1_result['actual_epochs'],
                        'Phase1_Training_Time': phase1_result['training_time'],
                        'Phase2_Epochs': phase2_result['actual_epochs'],
                        'Phase2_Training_Time': phase2_result['training_time'],
                        **eval_metrics
                    }
                    results.append(result_entry)

                    # Mark run as completed
                    completed_runs.add(run_id)
                    with open(completed_file, 'w') as f:
                        json.dump(sorted(completed_runs), f, indent=2)

                except Exception as e:
                    print(f"‚ùå Error processing {run_id}: {e}")
                    import traceback
                    traceback.print_exc()



üîÑ Cell started at: 2025-07-30 21:34:39
Using device: cuda
GPU Name: NVIDIA GeForce RTX 4090
GPU Memory: 25.262096384 GB
‚úÖ Cell finished in 0.06 seconds



In [12]:
# Ensure reproducibility when running the script
if __name__ == '__main__':
    
    # Run main training pipeline
    main()
    
    


üîÑ Cell started at: 2025-07-30 21:34:39
Skipping completed run: Original_Split1_VGG16
Skipping completed run: Original_Split1_VGG19
Skipping completed run: Original_Split1_MobileNet
Skipping completed run: Original_Split1_DenseNet121
Skipping completed run: Original_Split1_InceptionV3
Skipping completed run: Original_Split1_ResNet50
Skipping completed run: Original_Split1_InceptionResNetV2
Skipping completed run: Original_Split1_EfficientNetB0
Skipping completed run: Original_Split1_EfficientNetB3
Skipping completed run: Original_Split2_VGG16
Skipping completed run: Original_Split2_VGG19
Skipping completed run: Original_Split2_MobileNet
Skipping completed run: Original_Split2_DenseNet121
Skipping completed run: Original_Split2_InceptionV3
Skipping completed run: Original_Split2_ResNet50
Skipping completed run: Original_Split2_InceptionResNetV2
Skipping completed run: Original_Split2_EfficientNetB0
Skipping completed run: Original_Split2_EfficientNetB3
Skipping completed run: Origina

[34m[1mwandb[0m: Currently logged in as: [33mwanchp[0m ([33mwanchp-chulalongkorn-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Early stopping at epoch 12


0,1
actual_epochs,‚ñÅ
best_val_loss,‚ñÅ
epoch,‚ñÅ‚ñÇ‚ñÇ‚ñÉ‚ñÉ‚ñÑ‚ñÖ‚ñÖ‚ñÜ‚ñÜ‚ñá‚ñá‚ñà
learning_rate,‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ
total_training_time,‚ñÅ
train_accuracy,‚ñÅ‚ñÑ‚ñÖ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñà‚ñà‚ñà‚ñà
train_loss,‚ñà‚ñÖ‚ñÑ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÇ‚ñÅ‚ñÅ‚ñÅ‚ñÅ
val_accuracy,‚ñÅ‚ñÑ‚ñÖ‚ñÉ‚ñÅ‚ñÑ‚ñÇ‚ñÉ‚ñÉ‚ñá‚ñÉ‚ñá‚ñà
val_loss,‚ñÅ‚ñÇ‚ñÅ‚ñÉ‚ñÉ‚ñÉ‚ñÖ‚ñÑ‚ñÑ‚ñÇ‚ñà‚ñÅ‚ñÅ

0,1
actual_epochs,13.0
best_val_loss,1.255
epoch,12.0
learning_rate,0.0001
total_training_time,676.05559
train_accuracy,81.68677
train_loss,0.46306
val_accuracy,75.69864
val_loss,1.58693


Early stopping at epoch 68


0,1
actual_epochs,‚ñÅ
best_val_loss,‚ñÅ
epoch,‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñà‚ñà
learning_rate,‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÅ
total_training_time,‚ñÅ
train_accuracy,‚ñÅ‚ñÜ‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñà‚ñá‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà
train_loss,‚ñà‚ñÉ‚ñÉ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ
val_accuracy,‚ñÜ‚ñÉ‚ñÅ‚ñà‚ñá‚ñá‚ñà‚ñà‚ñá‚ñà‚ñà‚ñá‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñá‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà
val_loss,‚ñÅ‚ñÇ‚ñà‚ñá‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ

0,1
actual_epochs,69.0
best_val_loss,0.00769
epoch,68.0
learning_rate,0.0
total_training_time,10167.03213
train_accuracy,99.99681
train_loss,7e-05
val_accuracy,99.80997
val_loss,0.02567


0,1
blurred_test_accuracy,‚ñÅ
blurred_test_class0_accuracy,‚ñÅ
blurred_test_class1_accuracy,‚ñÅ
blurred_test_class2_accuracy,‚ñÅ
blurred_test_class3_accuracy,‚ñÅ
blurred_test_overall_accuracy,‚ñÅ
test_accuracy,‚ñÅ
test_class0_accuracy,‚ñÅ
test_class1_accuracy,‚ñÅ
test_class2_accuracy,‚ñÅ

0,1
blurred_test_accuracy,72.18514
blurred_test_class0_accuracy,43.29897
blurred_test_class1_accuracy,83.96396
blurred_test_class2_accuracy,65.41219
blurred_test_class3_accuracy,97.64493
blurred_test_overall_accuracy,72.18514
test_accuracy,99.9555
test_class0_accuracy,99.91409
test_class1_accuracy,99.90991
test_class2_accuracy,100.0


Dataset Sampling for Augmented Split 5:
Train dataset size: 31338 samples
Validation dataset size: 8946 samples
Test dataset size: 4494 samples
Blurred test dataset size: 4494 samples
Created EfficientNetB0 model:
Backbone frozen: True
Input size: 224
Number of classes: 4
Trainable parameters: 5124
Total parameters: 4012672
Trainable percentage: 0.13%


Early stopping at epoch 30


0,1
actual_epochs,‚ñÅ
best_val_loss,‚ñÅ
epoch,‚ñÅ‚ñÅ‚ñÅ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñá‚ñá‚ñá‚ñá‚ñà‚ñà‚ñà
learning_rate,‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ
total_training_time,‚ñÅ
train_accuracy,‚ñÅ‚ñÖ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñà‚ñá‚ñà‚ñà‚ñá‚ñá‚ñà‚ñá‚ñà
train_loss,‚ñà‚ñÑ‚ñÉ‚ñÉ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÅ‚ñÇ‚ñÅ‚ñÅ‚ñÅ‚ñÇ‚ñÅ‚ñÅ‚ñÅ
val_accuracy,‚ñÅ‚ñÅ‚ñÇ‚ñÑ‚ñÑ‚ñÖ‚ñá‚ñÜ‚ñÑ‚ñÇ‚ñá‚ñÜ‚ñá‚ñÖ‚ñÖ‚ñÜ‚ñá‚ñÑ‚ñá‚ñá‚ñà‚ñá‚ñà‚ñÜ‚ñÜ‚ñá‚ñá‚ñá‚ñÖ‚ñÜ‚ñÜ
val_loss,‚ñà‚ñá‚ñÖ‚ñÑ‚ñÑ‚ñÉ‚ñÉ‚ñÉ‚ñÑ‚ñÖ‚ñÇ‚ñÉ‚ñÇ‚ñÉ‚ñÉ‚ñÇ‚ñÇ‚ñÉ‚ñÇ‚ñÇ‚ñÅ‚ñÇ‚ñÅ‚ñÇ‚ñÇ‚ñÅ‚ñÇ‚ñÇ‚ñÉ‚ñÇ‚ñÇ

0,1
actual_epochs,31.0
best_val_loss,0.39859
epoch,30.0
learning_rate,1e-05
total_training_time,429.39334
train_accuracy,79.48178
train_loss,0.51079
val_accuracy,83.13213
val_loss,0.41314


Early stopping at epoch 72


0,1
actual_epochs,‚ñÅ
best_val_loss,‚ñÅ
epoch,‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñá‚ñá‚ñá‚ñá‚ñà‚ñà‚ñà
learning_rate,‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÅ
total_training_time,‚ñÅ
train_accuracy,‚ñÅ‚ñÖ‚ñá‚ñà‚ñá‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà
train_loss,‚ñà‚ñÑ‚ñÉ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ
val_accuracy,‚ñÅ‚ñÑ‚ñÖ‚ñÖ‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñà‚ñá‚ñà‚ñá‚ñà‚ñà‚ñà‚ñà‚ñá‚ñà‚ñà‚ñà‚ñá‚ñá‚ñá‚ñá‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà
val_loss,‚ñà‚ñÑ‚ñÑ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÇ‚ñÅ‚ñÉ‚ñÇ‚ñÇ‚ñÉ‚ñÅ‚ñÇ‚ñÇ‚ñÉ‚ñÅ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÇ

0,1
actual_epochs,73.0
best_val_loss,0.01007
epoch,72.0
learning_rate,0.0
total_training_time,2143.15134
train_accuracy,99.99043
train_loss,0.00036
val_accuracy,99.66465
val_loss,0.01694


0,1
blurred_test_accuracy,‚ñÅ
blurred_test_class0_accuracy,‚ñÅ
blurred_test_class1_accuracy,‚ñÅ
blurred_test_class2_accuracy,‚ñÅ
blurred_test_class3_accuracy,‚ñÅ
blurred_test_overall_accuracy,‚ñÅ
test_accuracy,‚ñÅ
test_class0_accuracy,‚ñÅ
test_class1_accuracy,‚ñÅ
test_class2_accuracy,‚ñÅ

0,1
blurred_test_accuracy,90.74321
blurred_test_class0_accuracy,86.2543
blurred_test_class1_accuracy,89.0991
blurred_test_class2_accuracy,98.83513
blurred_test_class3_accuracy,88.94928
blurred_test_overall_accuracy,90.74321
test_accuracy,99.71073
test_class0_accuracy,99.82818
test_class1_accuracy,99.0991
test_class2_accuracy,100.0


Dataset Sampling for Augmented Split 5:
Train dataset size: 31338 samples
Validation dataset size: 8946 samples
Test dataset size: 4494 samples
Blurred test dataset size: 4494 samples
Created EfficientNetB3 model:
Backbone frozen: True
Input size: 300
Number of classes: 4
Trainable parameters: 6148
Total parameters: 10702380
Trainable percentage: 0.06%


0,1
actual_epochs,‚ñÅ
best_val_loss,‚ñÅ
epoch,‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñà‚ñà‚ñà
learning_rate,‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ
total_training_time,‚ñÅ
train_accuracy,‚ñÅ‚ñÖ‚ñÜ‚ñÜ‚ñÜ‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà
train_loss,‚ñà‚ñÉ‚ñÉ‚ñÉ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ
val_accuracy,‚ñÅ‚ñÇ‚ñÉ‚ñÖ‚ñÑ‚ñÖ‚ñÜ‚ñá‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñÜ‚ñá‚ñá‚ñà‚ñá‚ñá‚ñá‚ñà‚ñá‚ñá‚ñà‚ñá‚ñá‚ñá‚ñá‚ñà‚ñá‚ñà‚ñá‚ñá‚ñá
val_loss,‚ñà‚ñá‚ñÖ‚ñÑ‚ñÑ‚ñÉ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÉ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÇ‚ñÅ‚ñÅ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÇ

0,1
actual_epochs,50.0
best_val_loss,0.32322
epoch,49.0
learning_rate,0.0
total_training_time,1912.14667
train_accuracy,83.97473
train_loss,0.39951
val_accuracy,86.57501
val_loss,0.33577


Early stopping at epoch 76


0,1
actual_epochs,‚ñÅ
best_val_loss,‚ñÅ
epoch,‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñá‚ñá‚ñá‚ñá‚ñá‚ñà‚ñà‚ñà
learning_rate,‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ
total_training_time,‚ñÅ
train_accuracy,‚ñÅ‚ñÑ‚ñÜ‚ñá‚ñÜ‚ñá‚ñá‚ñá‚ñá‚ñá‚ñà‚ñà‚ñà‚ñá‚ñá‚ñá‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà
train_loss,‚ñà‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ
val_accuracy,‚ñÅ‚ñÑ‚ñá‚ñá‚ñá‚ñá‚ñà‚ñà‚ñà‚ñà‚ñá‚ñá‚ñà‚ñá‚ñá‚ñà‚ñá‚ñá‚ñà‚ñá‚ñà‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñà‚ñá‚ñá‚ñá‚ñà‚ñà‚ñá‚ñá‚ñà‚ñá‚ñà‚ñá‚ñà
val_loss,‚ñà‚ñÑ‚ñÇ‚ñÑ‚ñÇ‚ñÅ‚ñÇ‚ñÅ‚ñÇ‚ñÉ‚ñÇ‚ñÅ‚ñÇ‚ñÅ‚ñÇ‚ñÅ‚ñÇ‚ñÇ‚ñÅ‚ñÉ‚ñÅ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÉ‚ñÉ‚ñÇ‚ñÉ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ

0,1
actual_epochs,77.0
best_val_loss,0.0066
epoch,76.0
learning_rate,0.0
total_training_time,7920.74207
train_accuracy,100.0
train_loss,3e-05
val_accuracy,99.77644
val_loss,0.01606


0,1
blurred_test_accuracy,‚ñÅ
blurred_test_class0_accuracy,‚ñÅ
blurred_test_class1_accuracy,‚ñÅ
blurred_test_class2_accuracy,‚ñÅ
blurred_test_class3_accuracy,‚ñÅ
blurred_test_overall_accuracy,‚ñÅ
test_accuracy,‚ñÅ
test_class0_accuracy,‚ñÅ
test_class1_accuracy,‚ñÅ
test_class2_accuracy,‚ñÅ

0,1
blurred_test_accuracy,91.67779
blurred_test_class0_accuracy,82.13058
blurred_test_class1_accuracy,95.94595
blurred_test_class2_accuracy,95.07168
blurred_test_class3_accuracy,94.02174
blurred_test_overall_accuracy,91.67779
test_accuracy,99.86649
test_class0_accuracy,100.0
test_class1_accuracy,99.54955
test_class2_accuracy,100.0


‚úÖ Cell finished in 6h 28m 20.13s



In [13]:
# Save results to CSV
results_df = pd.DataFrame(results)
results_df.to_csv('results_1percent.csv', index=False)

# Generate summary report
generate_summary_report(results_df)



üîÑ Cell started at: 2025-07-31 04:02:59


NameError: name 'results' is not defined

‚úÖ Cell finished in 0.07 seconds



In [16]:
import torch
print(torch.__version__)
import ultralytics
print(ultralytics.__version__)
import wandb
print(wandb.__version__)


üîÑ Cell started at: 2025-08-22 15:28:35
2.7.0+cu128
8.3.166
0.21.0
‚úÖ Cell finished in 0.00 seconds

