# Quick Setup and Verification

Run this notebook first to verify your setup is correct before training.

In [None]:
# Add project root to path
import sys
import os

# Get current directory (should be notebooks/)
current_dir = os.getcwd()
if current_dir.endswith('notebooks'):
    project_root = os.path.dirname(current_dir)
else:
    project_root = current_dir

sys.path.insert(0, os.path.join(project_root, 'src'))
sys.path.insert(0, project_root)

print(f"Project root: {project_root}")
print(f"Python path includes: {sys.path[:3]}")

## 1. Check Dependencies

In [None]:
import importlib

required = {
    'torch': 'PyTorch',
    'torchvision': 'TorchVision',
    'insightface': 'InsightFace',
    'numpy': 'NumPy',
    'yaml': 'PyYAML',
    'cv2': 'OpenCV',
    'PIL': 'Pillow',
}

missing = []
for pkg, name in required.items():
    try:
        importlib.import_module(pkg)
        print(f"✓ {name}")
    except ImportError:
        print(f"✗ {name} - NOT INSTALLED")
        missing.append(name)

if missing:
    print(f"\n⚠ Missing: {', '.join(missing)}")
    print("Install with: pip install " + " ".join(missing))
else:
    print("\n✓ All dependencies installed!")

## 2. Check GPU

In [None]:
import torch

if torch.cuda.is_available():
    print(f"✓ CUDA available")
    print(f"  Device: {torch.cuda.get_device_name(0)}")
    print(f"  Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
    device = 'cuda'
else:
    print("⚠ CUDA not available, will use CPU (slower)")
    device = 'cpu'

## 3. Check InsightFace Model

In [None]:
try:
    from insightface.app import FaceAnalysis
    
    app = FaceAnalysis(name='buffalo_l')
    app.prepare(ctx_id=0, det_size=(640, 640))
    print("✓ InsightFace buffalo_l model loaded successfully!")
except Exception as e:
    print(f"✗ Failed to load InsightFace model: {e}")
    print("  Try: insightface.model_zoo.download('buffalo_l')")

## 4. Load and Check Configuration

In [None]:
import yaml

config_path = os.path.join(project_root, 'config.yaml')

if os.path.exists(config_path):
    with open(config_path, 'r') as f:
        config = yaml.safe_load(f)
    print(f"✓ Config loaded from: {config_path}")
    
    # Show key settings
    print("\nKey settings:")
    print(f"  Model: {config.get('model', {}).get('insightface_model', 'N/A')}")
    print(f"  Dataset path: {config.get('data', {}).get('ms1mv2', {}).get('path', 'N/A')}")
    print(f"  Latent dim: {config.get('linearizer', {}).get('latent_dim', 'N/A')}")
    print(f"  Batch size: {config.get('linearizer', {}).get('batch_size', 'N/A')}")
else:
    print(f"✗ Config not found at: {config_path}")
    config = None

## 5. Check Dataset Path

In [None]:
if config:
    dataset_path = config.get('data', {}).get('ms1mv2', {}).get('path', '')
    
    if dataset_path and os.path.exists(dataset_path):
        print(f"✓ Dataset found at: {dataset_path}")
        
        # Check structure
        images_dir = os.path.join(dataset_path, 'images')
        if os.path.exists(images_dir):
            print(f"✓ Images directory found")
            # Try loading a sample
            try:
                from src.data.dataset import MS1MV2Dataset
                dataset = MS1MV2Dataset(dataset_path, is_training=False)
                print(f"✓ Dataset loaded: {len(dataset)} samples")
            except Exception as e:
                print(f"⚠ Could not load dataset: {e}")
        else:
            print(f"⚠ Images directory not found: {images_dir}")
    else:
        print(f"✗ Dataset NOT found at: {dataset_path}")
        print("  Please update config.yaml with correct path")
else:
    print("⚠ Config not loaded, skipping dataset check")

## 6. Test Model Loading

In [None]:
if config:
    try:
        from utils.model_loader import load_model_from_config
        
        print("Loading model...")
        model = load_model_from_config(config)
        model = model.to(device)
        model.eval()
        
        print("✓ Model loaded successfully!")
        print(f"  Embedding size: {getattr(model, 'embedding_size', 'N/A')}")
        
    except Exception as e:
        print(f"✗ Failed to load model: {e}")
        import traceback
        traceback.print_exc()
else:
    print("⚠ Config not loaded, skipping model test")

## 7. Test Linearizer Creation

In [None]:
if config and 'model' in locals():
    try:
        from linearizer.linearizer import Linearizer
        
        linearizer_config = config['linearizer']
        embedding_size = config['model'].get('embedding_size', 512)
        
        print("Creating Linearizer...")
        linearizer = Linearizer(
            model=model,
            embedding_size=embedding_size,
            latent_dim=linearizer_config.get('latent_dim', 512),
            num_blocks=linearizer_config.get('num_blocks', 4),
            hidden_dim=linearizer_config.get('hidden_dim', 1024),
            num_layers=linearizer_config.get('num_layers', 3),
            image_size=(112, 112)
        )
        
        print("✓ Linearizer created successfully!")
        
        # Count parameters
        total_params = sum(p.numel() for p in linearizer.parameters())
        print(f"  Total parameters: {total_params:,}")
        
    except Exception as e:
        print(f"✗ Failed to create Linearizer: {e}")
        import traceback
        traceback.print_exc()
else:
    print("⚠ Prerequisites not met, skipping Linearizer test")

## Summary

If all checks passed, you're ready to run training!

**Next steps:**
1. Open `notebooks/03_linearization.ipynb` for interactive training
2. Or run: `python scripts/train_linearizer.py --config config.yaml`