# Comprehensive Evaluation

This notebook evaluates models on standard face recognition benchmarks:
- LFW, CFP-FP, AgeDB-30, CALFW, CPLFW
- Unlearning-specific metrics

## Objectives
1. Evaluate face verification performance
2. Compute unlearning metrics
3. Compare original vs unlearned models


In [None]:
import sys
import os
sys.path.append(os.path.join(os.path.dirname(os.getcwd()), 'src'))

import torch
import numpy as np
import matplotlib.pyplot as plt
import yaml
import pandas as pd

from utils.model_loader import load_model_from_config
from linearizer.linearizer import Linearizer
from evaluation.benchmark import BenchmarkRunner
from evaluation.verification import evaluate_lfw

# Load configuration
with open('../config.yaml', 'r') as f:
    config = yaml.safe_load(f)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")


## 1. Evaluate Original Model

In [None]:
# Load original model
original_model = load_model_from_config(config)
original_model = original_model.to(device)
original_model.eval()

# Evaluate on LFW
lfw_path = config['data']['evaluation']['lfw']
lfw_pairs_file = os.path.join(lfw_path, 'pairs.txt')

try:
    if os.path.exists(lfw_path):
        print("Evaluating on LFW...")
        lfw_results = evaluate_lfw(original_model, lfw_path, lfw_pairs_file, device)
        print("\nLFW Results:")
        for key, value in lfw_results.items():
            if isinstance(value, dict):
                print(f"{key}:")
                for k, v in value.items():
                    print(f"  {k}: {v:.4f}")
            else:
                print(f"{key}: {value:.4f}")
    else:
        print(f"LFW dataset not found at {lfw_path}")
except Exception as e:
    print(f"Error evaluating LFW: {e}")


## 2. Evaluate Unlearned Model

In [None]:
# Load linearized model (assuming already trained and unlearned)
# In practice, you would load from checkpoint
linearizer_config = config['linearizer']
linearizer = Linearizer(
    model=original_model,
    embedding_size=config['model'].get('embedding_size', 512),
    num_blocks=linearizer_config.get('num_blocks', 4)
)
linearizer = linearizer.to(device)
linearizer.eval()

# Evaluate unlearned model
try:
    if os.path.exists(lfw_path):
        print("Evaluating unlearned model on LFW...")
        lfw_results_unlearned = evaluate_lfw(linearizer, lfw_path, lfw_pairs_file, device)
        print("\nLFW Results (Unlearned):")
        for key, value in lfw_results_unlearned.items():
            if isinstance(value, dict):
                print(f"{key}:")
                for k, v in value.items():
                    print(f"  {k}: {v:.4f}")
            else:
                print(f"{key}: {value:.4f}")
except Exception as e:
    print(f"Error evaluating unlearned model: {e}")
