# 10 - Common Utilities

This notebook contains shared utility functions used across the project.

**Usage in other notebooks:**
```python
%run ../notebooks/10_common_utils.ipynb
```

**Provides:**
- Seed setting for reproducibility
- Standard image transforms
- System information writer
- Timing utilities
- Resource monitoring thread
- Logging helpers

In [None]:
import os
import sys
import json
import time
import random
import platform
import subprocess
import threading
from pathlib import Path
from datetime import datetime
from typing import Optional, Dict, Any

import numpy as np
import pandas as pd
import psutil
import torch
import torchvision.transforms as transforms
from PIL import Image

## 1. Seed Setting for Reproducibility

In [None]:
def set_seed(seed: int = 42):
    """
    Set random seeds for reproducibility across Python, NumPy, and PyTorch.
    
    Args:
        seed: Random seed value
    """
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    
    # Make PyTorch deterministic (may impact performance)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
    # Set environment variable for Python hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    
    print(f"✓ Random seed set to {seed}")

## 2. Standard Image Transforms

In [None]:
def get_transforms(augment: bool = False):
    """
    Get standard image transforms for the project.
    
    Args:
        augment: Whether to include data augmentation (for training)
    
    Returns:
        torchvision.transforms.Compose object
    """
    # ImageNet normalization stats
    normalize = transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
    
    if augment:
        # Training transforms with augmentation
        transform = transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
    else:
        # Standard transforms (no augmentation for fair comparison)
        transform = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ])
    
    return transform


# Create standard transform instance
STANDARD_TRANSFORM = get_transforms(augment=False)

## 3. System Information Writer

In [None]:
def get_gpu_info():
    """
    Get GPU information using nvidia-smi.
    
    Returns:
        Dict with GPU info or None if not available
    """
    if not torch.cuda.is_available():
        return None
    
    try:
        # Try to get driver version
        result = subprocess.run(
            ['nvidia-smi', '--query-gpu=driver_version', '--format=csv,noheader'],
            capture_output=True, text=True, timeout=5
        )
        driver_version = result.stdout.strip() if result.returncode == 0 else 'unknown'
    except:
        driver_version = 'unknown'
    
    return {
        'name': torch.cuda.get_device_name(0),
        'count': torch.cuda.device_count(),
        'memory_gb': round(torch.cuda.get_device_properties(0).total_memory / (1024**3), 2),
        'cuda_version': torch.version.cuda,
        'cudnn_version': torch.backends.cudnn.version(),
        'driver_version': driver_version,
    }


def write_sysinfo(output_path: Path):
    """
    Write system information to JSON file.
    
    Args:
        output_path: Path to output JSON file
    """
    sysinfo = {
        'timestamp': datetime.now().isoformat(),
        'hostname': platform.node(),
        'os': {
            'system': platform.system(),
            'release': platform.release(),
            'version': platform.version(),
            'platform': platform.platform(),
        },
        'cpu': {
            'processor': platform.processor(),
            'physical_cores': psutil.cpu_count(logical=False),
            'logical_cores': psutil.cpu_count(logical=True),
            'frequency_mhz': psutil.cpu_freq().current if psutil.cpu_freq() else None,
        },
        'memory': {
            'total_gb': round(psutil.virtual_memory().total / (1024**3), 2),
            'available_gb': round(psutil.virtual_memory().available / (1024**3), 2),
        },
        'disk': {
            'total_gb': round(psutil.disk_usage('/').total / (1024**3), 2),
            'free_gb': round(psutil.disk_usage('/').free / (1024**3), 2),
        },
        'python': {
            'version': platform.python_version(),
            'implementation': platform.python_implementation(),
        },
        'pytorch': {
            'version': torch.__version__,
            'cuda_available': torch.cuda.is_available(),
        },
        'gpu': get_gpu_info(),
    }
    
    output_path = Path(output_path)
    output_path.parent.mkdir(parents=True, exist_ok=True)
    output_path.write_text(json.dumps(sysinfo, indent=2))
    print(f"✓ System info written to {output_path}")

## 4. Timing Utilities

In [None]:
def time_first_batch(dataloader, device='cuda'):
    """
    Measure time to load and transfer first batch to device.
    
    Args:
        dataloader: PyTorch DataLoader
        device: Target device ('cuda' or 'cpu')
    
    Returns:
        Time in seconds
    """
    device = torch.device(device if torch.cuda.is_available() else 'cpu')
    
    start = time.time()
    batch = next(iter(dataloader))
    
    # Transfer to device if needed
    if isinstance(batch, (list, tuple)):
        batch = [b.to(device) if isinstance(b, torch.Tensor) else b for b in batch]
    elif isinstance(batch, torch.Tensor):
        batch = batch.to(device)
    
    # Synchronize if using CUDA
    if device.type == 'cuda':
        torch.cuda.synchronize()
    
    elapsed = time.time() - start
    return elapsed


class Timer:
    """
    Simple context manager for timing code blocks.
    
    Usage:
        with Timer("operation") as t:
            # code to time
            pass
        print(f"Took {t.elapsed:.2f}s")
    """
    def __init__(self, name: str = "Operation", verbose: bool = True):
        self.name = name
        self.verbose = verbose
        self.elapsed = 0
    
    def __enter__(self):
        self.start = time.time()
        return self
    
    def __exit__(self, *args):
        self.elapsed = time.time() - self.start
        if self.verbose:
            print(f"{self.name} took {self.elapsed:.2f}s")

## 5. Resource Monitoring Thread

In [None]:
def get_gpu_utilization():
    """
    Get current GPU utilization and memory using nvidia-smi.
    
    Returns:
        Tuple of (utilization_percent, memory_used_mb) or (None, None)
    """
    if not torch.cuda.is_available():
        return None, None
    
    try:
        result = subprocess.run(
            ['nvidia-smi', '--query-gpu=utilization.gpu,memory.used',
             '--format=csv,noheader,nounits'],
            capture_output=True, text=True, timeout=2
        )
        if result.returncode == 0:
            util, mem = result.stdout.strip().split(',')
            return float(util), float(mem)
    except:
        pass
    
    return None, None


def monitor_resources(log_path: Path, interval: float, stop_event: threading.Event):
    """
    Monitor system resources in a background thread.
    
    Args:
        log_path: Path to CSV log file
        interval: Sampling interval in seconds
        stop_event: Threading event to signal stop
    """
    log_path = Path(log_path)
    log_path.parent.mkdir(parents=True, exist_ok=True)
    
    # Initialize CSV with header
    with open(log_path, 'w') as f:
        f.write('ts,gpu_util,gpu_mem_mb,cpu_pct,rss_mb,read_mb,write_mb\n')
    
    process = psutil.Process()
    io_start = process.io_counters() if hasattr(process, 'io_counters') else None
    
    while not stop_event.is_set():
        timestamp = time.time()
        
        # GPU metrics
        gpu_util, gpu_mem = get_gpu_utilization()
        
        # CPU metrics
        cpu_pct = process.cpu_percent()
        
        # Memory metrics
        rss_mb = process.memory_info().rss / (1024 * 1024)
        
        # Disk I/O metrics
        if io_start and hasattr(process, 'io_counters'):
            io_now = process.io_counters()
            read_mb = (io_now.read_bytes - io_start.read_bytes) / (1024 * 1024)
            write_mb = (io_now.write_bytes - io_start.write_bytes) / (1024 * 1024)
        else:
            read_mb, write_mb = 0, 0
        
        # Write to log
        with open(log_path, 'a') as f:
            f.write(f'{timestamp},{gpu_util},{gpu_mem},{cpu_pct},{rss_mb:.1f},{read_mb:.2f},{write_mb:.2f}\n')
        
        time.sleep(interval)


def start_monitor(log_path: Path, interval: float = 0.5) -> tuple:
    """
    Start resource monitoring in background thread.
    
    Args:
        log_path: Path to CSV log file
        interval: Sampling interval in seconds
    
    Returns:
        Tuple of (thread, stop_event) for later cleanup
    """
    stop_event = threading.Event()
    thread = threading.Thread(
        target=monitor_resources,
        args=(log_path, interval, stop_event),
        daemon=True
    )
    thread.start()
    print(f"✓ Resource monitoring started (logging to {log_path})")
    return thread, stop_event


def stop_monitor(thread: threading.Thread, stop_event: threading.Event):
    """
    Stop resource monitoring thread.
    
    Args:
        thread: Monitoring thread
        stop_event: Stop event to signal
    """
    stop_event.set()
    thread.join(timeout=2)
    print("✓ Resource monitoring stopped")

## 6. Logging Helpers

In [None]:
def append_to_summary(summary_path: Path, row_dict: Dict[str, Any]):
    """
    Append a row to summary CSV file.
    
    Args:
        summary_path: Path to summary CSV
        row_dict: Dictionary of column:value pairs
    """
    summary_path = Path(summary_path)
    summary_path.parent.mkdir(parents=True, exist_ok=True)
    
    # Add timestamp if not present
    if 'timestamp' not in row_dict:
        row_dict['timestamp'] = datetime.now().isoformat()
    
    # Create DataFrame
    df = pd.DataFrame([row_dict])
    
    # Append to file
    if summary_path.exists() and summary_path.stat().st_size > 0:
        df.to_csv(summary_path, mode='a', header=False, index=False)
    else:
        df.to_csv(summary_path, mode='w', header=True, index=False)


def compute_metrics_from_logs(log_path: Path) -> Dict[str, float]:
    """
    Compute summary metrics from resource monitoring logs.
    
    Args:
        log_path: Path to logs_metrics.csv
    
    Returns:
        Dictionary of computed metrics
    """
    df = pd.read_csv(log_path)
    
    metrics = {}
    
    # GPU metrics
    if 'gpu_util' in df.columns and df['gpu_util'].notna().any():
        metrics['gpu_util_mean'] = df['gpu_util'].mean()
        metrics['gpu_idle_pct'] = (df['gpu_util'] < 10).mean() * 100
        metrics['gpu_mem_mb_peak'] = df['gpu_mem_mb'].max()
    else:
        metrics['gpu_util_mean'] = None
        metrics['gpu_idle_pct'] = None
        metrics['gpu_mem_mb_peak'] = None
    
    # CPU metrics
    metrics['cpu_util_mean'] = df['cpu_pct'].mean()
    
    # Memory metrics
    metrics['rss_mb_peak'] = df['rss_mb'].max()
    
    # Disk I/O metrics (rate per second)
    if len(df) > 1:
        duration = df['ts'].iloc[-1] - df['ts'].iloc[0]
        metrics['disk_read_mb_s_mean'] = df['read_mb'].iloc[-1] / duration if duration > 0 else 0
        metrics['disk_write_mb_s_mean'] = df['write_mb'].iloc[-1] / duration if duration > 0 else 0
    else:
        metrics['disk_read_mb_s_mean'] = 0
        metrics['disk_write_mb_s_mean'] = 0
    
    return metrics

## 7. Utility Functions

In [None]:
def get_device():
    """
    Get the best available device (CUDA > MPS > CPU).
    
    Returns:
        torch.device
    """
    if torch.cuda.is_available():
        return torch.device('cuda')
    elif torch.backends.mps.is_available():
        return torch.device('mps')
    else:
        return torch.device('cpu')


def format_bytes(bytes_val: int) -> str:
    """
    Format bytes as human-readable string.
    
    Args:
        bytes_val: Number of bytes
    
    Returns:
        Formatted string (e.g., "1.5 GB")
    """
    for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
        if bytes_val < 1024.0:
            return f"{bytes_val:.1f} {unit}"
        bytes_val /= 1024.0
    return f"{bytes_val:.1f} PB"


def count_parameters(model: torch.nn.Module) -> int:
    """
    Count trainable parameters in a model.
    
    Args:
        model: PyTorch model
    
    Returns:
        Number of trainable parameters
    """
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

## ✅ Utilities Loaded

All utility functions are now available. Use `%run ../notebooks/10_common_utils.ipynb` in other notebooks to import these functions.

In [None]:
# Test that everything loaded correctly
if __name__ == "__main__":
    print("✓ Common utilities loaded successfully")
    print("\nAvailable functions:")
    print("  - set_seed(seed)")
    print("  - get_transforms(augment)")
    print("  - write_sysinfo(path)")
    print("  - time_first_batch(dataloader, device)")
    print("  - start_monitor(log_path, interval)")
    print("  - stop_monitor(thread, stop_event)")
    print("  - append_to_summary(path, row_dict)")
    print("  - compute_metrics_from_logs(log_path)")
    print("  - get_device()")
    print("  - format_bytes(bytes)")
    print("  - count_parameters(model)")
    print("\nConstants:")
    print("  - STANDARD_TRANSFORM")