# Face Detection Model Comparison
## So sánh các model Object Detection cho dataset 300 folders khuôn mặt sinh viên

### Models được so sánh:
1. Vanilla CNN
2. AdaBoost + Haar Features  
3. ResNet50-based Detection
4. Modern YOLO-style approach

### Metrics đánh giá:
- Accuracy
- Precision/Recall
- Training time
- Inference time
- Model size


In [None]:
# Install required packages for Google Colab
!pip install -q torch torchvision
!pip install -q opencv-python-headless
!pip install -q scikit-learn
!pip install -q seaborn
!pip install -q pandas

print("[OK] All packages installed successfully!")


 All packages installed successfully!


In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Import libraries
import os
import sys
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from torchvision.models import resnet50, ResNet50_Weights
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import time
import pickle
import json
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
import pandas as pd
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Setup paths with error handling
DATASET_DIR_DRIVE = "/content/drive/MyDrive/aligned_faces"
RESULTS_DIR = "/content/drive/MyDrive/face_detection_results"

# Check if dataset directory exists
if not os.path.exists(DATASET_DIR_DRIVE):
    print(f" Warning: Dataset directory not found at {DATASET_DIR_DRIVE}")
    print("Please ensure your dataset is uploaded to Google Drive at the correct path.")
    print("Alternative paths to check:")
    print("  - /content/drive/My Drive/aligned_faces")
    print("  - /content/drive/MyDrive/your_dataset_folder")
    # Try to list available directories in Google Drive
    try:
        drive_path = "/content/drive/MyDrive/"
        if os.path.exists(drive_path):
            print(f"\nAvailable folders in {drive_path}:")
            folders = [f for f in os.listdir(drive_path) if os.path.isdir(os.path.join(drive_path, f))]
            for folder in folders[:10]:  # Show first 10 folders
                print(f"  - {folder}")
    except:
        pass
else:
    print(f"[OK] Dataset found at: {DATASET_DIR_DRIVE}")
    # Count number of folders
    try:
        num_folders = len([f for f in os.listdir(DATASET_DIR_DRIVE) if os.path.isdir(os.path.join(DATASET_DIR_DRIVE, f))])
        print(f"  Number of student folders found: {num_folders}")
    except:
        pass

os.makedirs(RESULTS_DIR, exist_ok=True)
print(f"[OK] Results will be saved to: {RESULTS_DIR}")


Mounted at /content/drive
 Dataset found at: /content/drive/MyDrive/aligned_faces
  Number of student folders found: 294
 Results will be saved to: /content/drive/MyDrive/face_detection_results


In [None]:
# Check system info and GPU availability
print("="*50)
print("SYSTEM INFORMATION")
print("="*50)

# Check GPU
if torch.cuda.is_available():
    print(f"[GPU] Available: {torch.cuda.get_device_name(0)}")
    print(f"  - GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
    print(f"  - CUDA Version: {torch.version.cuda}")
else:
    print("[WARNING] No GPU detected. Training will be slower on CPU.")
    print("  To enable GPU in Colab: Runtime -> Change runtime type -> GPU")

# Check RAM
try:
    import psutil
    ram = psutil.virtual_memory()
    print(f"\n[RAM] {ram.total / 1024**3:.2f} GB (Available: {ram.available / 1024**3:.2f} GB)")
except:
    print("\n[INFO] RAM info not available")

# PyTorch version
print(f"\n[INFO] PyTorch Version: {torch.__version__}")
print(f"[INFO] OpenCV Version: {cv2.__version__}")

print("="*50)


SYSTEM INFORMATION
 GPU Available: NVIDIA L4
  - GPU Memory: 22.16 GB
  - CUDA Version: 12.6

 RAM: 52.96 GB (Available: 50.60 GB)

 PyTorch Version: 2.8.0+cu126
 OpenCV Version: 4.12.0


## 1. Dataset Preparation


In [None]:
class FaceDataset(Dataset):
    """Dataset for face recognition with 300 student folders"""

    def __init__(self, root_dir, transform=None, max_samples_per_class=None):
        self.root_dir = root_dir
        self.transform = transform
        self.classes = []
        self.class_to_idx = {}
        self.samples = []

        # Check if root directory exists
        if not os.path.exists(root_dir):
            raise ValueError(f"Dataset directory not found: {root_dir}")

        # Get all student folders
        try:
            folders = [f for f in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, f))]
            folders.sort()
        except Exception as e:
            print(f"Error reading dataset directory: {e}")
            folders = []

        print(f"Found {len(folders)} student folders")

        for idx, folder in enumerate(folders):
            if len(self.classes) >= 300:  # Limit to 300 classes
                break

            folder_path = os.path.join(root_dir, folder)
            
            # Try to read folder with error handling for connection issues
            try:
                images = [f for f in os.listdir(folder_path)
                         if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
                
                # Skip empty folders
                if len(images) == 0:
                    print(f"Skipping empty folder: {folder}")
                    continue
                
                # Add this class
                self.classes.append(folder)
                current_idx = len(self.classes) - 1  # Use actual class index
                self.class_to_idx[folder] = current_idx

                if max_samples_per_class:
                    images = images[:max_samples_per_class]

                for img_name in images:
                    img_path = os.path.join(folder_path, img_name)
                    self.samples.append((img_path, current_idx))
                    
            except (ConnectionAbortedError, OSError, PermissionError) as e:
                print(f"Warning: Skipping folder '{folder}' due to error: {e}")
                continue
            except Exception as e:
                print(f"Warning: Unexpected error with folder '{folder}': {e}")
                continue

        print(f"Total samples: {len(self.samples)}")
        print(f"Number of classes: {len(self.classes)}")

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_path, label = self.samples[idx]

        try:
            image = Image.open(img_path).convert('RGB')
            if self.transform:
                image = self.transform(image)
            return image, label
        except Exception as e:
            print(f"Error loading image {img_path}: {e}")
            # Return a dummy image
            dummy_image = Image.new('RGB', (224, 224), (0, 0, 0))
            if self.transform:
                dummy_image = self.transform(dummy_image)
            return dummy_image, label

# Data transforms
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(0.5),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create datasets
print("Creating datasets...")

try:
    # Load ALL images from each student folder (no limit)
    full_dataset = FaceDataset(DATASET_DIR_DRIVE, transform=train_transform, max_samples_per_class=None)

    if len(full_dataset) == 0:
        raise ValueError("Dataset is empty. Please check your dataset directory.")

    # Split dataset
    train_size = int(0.7 * len(full_dataset))
    val_size = int(0.15 * len(full_dataset))
    test_size = len(full_dataset) - train_size - val_size

    train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(
        full_dataset, [train_size, val_size, test_size],
        generator=torch.Generator().manual_seed(42)
    )

    # Update test dataset transform
    test_dataset.dataset.transform = test_transform
    val_dataset.dataset.transform = test_transform

    print(f"[OK] Train: {len(train_dataset)}, Val: {len(val_dataset)}, Test: {len(test_dataset)}")

    # Create data loaders with reduced num_workers for Colab
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=1)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=1)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=1)

    num_classes = len(full_dataset.classes)
    print(f"[OK] Number of classes: {num_classes}")

except Exception as e:
    print(f"\n ERROR: Failed to create dataset: {e}")
    print("\nPLEASE FOLLOW THESE STEPS:")
    print("1. Upload your face dataset to Google Drive")
    print("2. The dataset should be organized as:")
    print("   /content/drive/MyDrive/aligned_faces/")
    print("     ├── student_001/")
    print("     │   ├── image1.jpg")
    print("     │   ├── image2.jpg")
    print("     │   └── ...")
    print("     ├── student_002/")
    print("     └── ... (up to 300 folders)")
    print("\n3. Update DATASET_DIR_DRIVE variable if your path is different")
    raise


Creating datasets...
Found 294 student folders
Total samples: 5880
Number of classes: 294
 Train: 4116, Val: 882, Test: 882
 Number of classes: 294


## 2. Model Definitions


In [None]:
# 1. Vanilla CNN Model
class VanillaCNN(nn.Module):
    def __init__(self, num_classes):
        super(VanillaCNN, self).__init__()
        self.features = nn.Sequential(
            # First block
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),

            # Second block
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),

            # Third block
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),

            # Fourth block
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
        )

        self.classifier = nn.Sequential(
            nn.AdaptiveAvgPool2d((7, 7)),
            nn.Flatten(),
            nn.Linear(256 * 7 * 7, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

# 2. ResNet50-based Model
class ResNet50Face(nn.Module):
    def __init__(self, num_classes):
        super(ResNet50Face, self).__init__()
        # Use weights parameter instead of deprecated pretrained
        self.backbone = resnet50(weights=ResNet50_Weights.IMAGENET1K_V1)
        self.backbone.fc = nn.Linear(self.backbone.fc.in_features, num_classes)

    def forward(self, x):
        return self.backbone(x)

# 3. Improved CNN with Attention
class AttentionCNN(nn.Module):
    def __init__(self, num_classes):
        super(AttentionCNN, self).__init__()
        self.features = nn.Sequential(
            # Block 1
            nn.Conv2d(3, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),

            # Block 2
            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),

            # Block 3
            nn.Conv2d(128, 256, 3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, 3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
        )

        # Attention mechanism
        self.attention = nn.Sequential(
            nn.Conv2d(256, 128, 1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 1, 1),
            nn.Sigmoid()
        )

        self.classifier = nn.Sequential(
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten(),
            nn.Linear(256, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        features = self.features(x)
        attention_weights = self.attention(features)
        attended_features = features * attention_weights
        output = self.classifier(attended_features)
        return output

print("Models defined successfully!")


Models defined successfully!


## 3. Configuration for 200 Epochs with Early Stopping

**Training Configuration:**
- **Max Epochs**: 200 (có Early Stopping)
- **Early Stopping Patience**: 15 epochs
- **Min Improvement**: 0.001 (0.1%)
- **Learning Rate Scheduler**: ReduceLROnPlateau
  - Initial LR: 0.001
  - Factor: 0.5 (giảm LR xuống 50%)
  - Patience: 7 epochs
- **Optimizer**: Adam
- **Batch Size**: 32

**Benefits:**
1. Tự động dừng khi model không cải thiện -> tiết kiệm thời gian
2. Lưu best model checkpoint -> đảm bảo không mất kết quả tốt nhất
3. Learning rate tự động giảm -> fine-tuning tốt hơn
4. Tracking đầy đủ metrics -> phân tích cho nghiên cứu khoa học


## 3. Training and Visualization Utilities (All-in-One)


In [None]:
# ===== EARLY STOPPING CLASS =====
class EarlyStopping:
    """Early stopping để ngăn overfitting"""
    def __init__(self, patience=15, min_delta=0.001, verbose=True):
        """
        Args:
            patience (int): Số epochs chờ đợi trước khi dừng
            min_delta (float): Mức cải thiện tối thiểu
            verbose (bool): Print thông báo
        """
        self.patience = patience
        self.min_delta = min_delta
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.best_epoch = 0
        
    def __call__(self, val_acc, epoch):
        score = val_acc
        
        if self.best_score is None:
            self.best_score = score
            self.best_epoch = epoch
        elif score < self.best_score + self.min_delta:
            self.counter += 1
            if self.verbose:
                print(f'EarlyStopping counter: {self.counter}/{self.patience} (Best: {self.best_score:.2f}% at epoch {self.best_epoch})')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            if self.verbose:
                print(f'[IMPROVED] Validation accuracy improved: {self.best_score:.2f}% -> {score:.2f}%')
            self.best_score = score
            self.best_epoch = epoch
            self.counter = 0
        
        return self.early_stop

# ===== TRAINING UTILITIES =====
def train_model(model, train_loader, val_loader, num_epochs=200, model_name="model", results_dir="./results", patience=15):
    """
    Train a PyTorch model with Early Stopping
    
    Args:
        model: PyTorch model
        train_loader: Training data loader
        val_loader: Validation data loader
        num_epochs (int): Maximum số epochs
        model_name (str): Tên model để save
        results_dir (str): Thư mục lưu kết quả
        patience (int): Early stopping patience
    """
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    # ReduceLROnPlateau: giảm LR khi val acc không cải thiện
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=7)
    
    # Early stopping
    early_stopping = EarlyStopping(patience=patience, min_delta=0.001, verbose=True)

    train_losses = []
    val_losses = []
    val_accuracies = []
    train_accuracies = []
    learning_rates = []
    
    best_val_acc = 0.0
    best_model_state = None

    start_time = time.time()
    
    print(f"\n{'='*80}")
    print(f"[TRAINING] {model_name}")
    print(f"{'='*80}")
    print(f"Max Epochs: {num_epochs} | Early Stopping Patience: {patience}")
    print(f"Device: {device}")
    print(f"{'='*80}\n")

    for epoch in range(num_epochs):
        # ===== TRAINING PHASE =====
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)

            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(output.data, 1)
            total_train += target.size(0)
            correct_train += (predicted == target).sum().item()

        # ===== VALIDATION PHASE =====
        model.eval()
        correct_val = 0
        total_val = 0
        val_running_loss = 0.0

        with torch.no_grad():
            for data, target in val_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                loss = criterion(output, target)
                val_running_loss += loss.item()
                
                _, predicted = torch.max(output.data, 1)
                total_val += target.size(0)
                correct_val += (predicted == target).sum().item()

        # Calculate metrics
        epoch_loss = running_loss / len(train_loader)
        val_loss = val_running_loss / len(val_loader)
        train_acc = 100. * correct_train / total_train
        val_acc = 100. * correct_val / total_val
        current_lr = optimizer.param_groups[0]['lr']

        train_losses.append(epoch_loss)
        val_losses.append(val_loss)
        train_accuracies.append(train_acc)
        val_accuracies.append(val_acc)
        learning_rates.append(current_lr)
        
        # Save best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_model_state = model.state_dict().copy()

        # Print progress mỗi 5 epochs hoặc epoch cuối
        if (epoch + 1) % 5 == 0 or epoch == 0 or epoch == num_epochs - 1:
            print(f'Epoch [{epoch+1:3d}/{num_epochs}] | '
                  f'Train Loss: {epoch_loss:.4f} | Train Acc: {train_acc:.2f}% | '
                  f'Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}% | '
                  f'LR: {current_lr:.6f}')

        # Learning rate scheduler
        scheduler.step(val_acc)
        
        # Early stopping check
        if early_stopping(val_acc, epoch + 1):
            print(f'\n[EARLY STOP] Triggered at epoch {epoch+1}')
            print(f'Best Val Accuracy: {early_stopping.best_score:.2f}% at epoch {early_stopping.best_epoch}')
            break

    training_time = time.time() - start_time
    
    print(f"\n{'='*80}")
    print(f"[COMPLETED] Training {model_name} completed!")
    print(f"{'='*80}")
    print(f"Total Training Time: {training_time/60:.2f} minutes")
    print(f"Best Val Accuracy: {best_val_acc:.2f}%")
    print(f"Total Epochs Trained: {len(train_losses)}")
    print(f"{'='*80}\n")

    # Save best model
    best_model_path = os.path.join(results_dir, f'{model_name}_best_model.pth')
    torch.save(best_model_state, best_model_path)
    print(f"[SAVED] Best model saved to: {best_model_path}")
    
    # Load best model for final evaluation
    model.load_state_dict(best_model_state)

    return {
        'model': model,
        'training_time': training_time,
        'train_losses': train_losses,
        'val_losses': val_losses,
        'train_accuracies': train_accuracies,
        'val_accuracies': val_accuracies,
        'learning_rates': learning_rates,
        'best_val_acc': best_val_acc,
        'total_epochs': len(train_losses),
        'early_stopped': early_stopping.early_stop,
        'best_epoch': early_stopping.best_epoch
    }

def evaluate_model(model, test_loader, model_name="model"):
    """Evaluate a PyTorch model"""
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    model.eval()

    all_predictions = []
    all_targets = []

    start_time = time.time()

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            _, predicted = torch.max(output, 1)

            all_predictions.extend(predicted.cpu().numpy())
            all_targets.extend(target.cpu().numpy())

    inference_time = time.time() - start_time

    accuracy = accuracy_score(all_targets, all_predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(all_targets, all_predictions, average='weighted')

    return {
        'accuracy': accuracy * 100,
        'precision': precision * 100,
        'recall': recall * 100,
        'f1_score': f1 * 100,
        'inference_time': inference_time,
        'predictions': all_predictions,
        'targets': all_targets
    }

def calculate_model_size(model):
    """Calculate model size in MB"""
    param_size = 0
    for param in model.parameters():
        param_size += param.nelement() * param.element_size()

    buffer_size = 0
    for buffer in model.buffers():
        buffer_size += buffer.nelement() * buffer.element_size()

    size_all_mb = (param_size + buffer_size) / 1024**2
    return size_all_mb

class AdaBoostFaceClassifier:
    def __init__(self, n_estimators=50):
        self.n_estimators = n_estimators
        self.model = None

    def extract_features(self, images):
        """Extract features from images for AdaBoost"""
        features = []

        for img in images:
            # Convert tensor to numpy if needed
            if torch.is_tensor(img):
                img = img.permute(1, 2, 0).numpy()
                img = (img * 255).astype(np.uint8)

            # Convert to grayscale
            if len(img.shape) == 3:
                gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
            else:
                gray = img

            # Resize to fixed size
            gray = cv2.resize(gray, (64, 64))

            # Extract multiple types of features
            feature_vector = []

            # 1. Raw pixel values (subsampled)
            pixel_features = gray[::4, ::4].flatten()
            feature_vector.extend(pixel_features)

            # 2. Histogram features
            hist = cv2.calcHist([gray], [0], None, [16], [0, 256])
            feature_vector.extend(hist.flatten())

            # 3. LBP-like features (simplified)
            lbp_features = []
            for i in range(1, gray.shape[0]-1):
                for j in range(1, gray.shape[1]-1):
                    center = gray[i, j]
                    pattern = 0
                    pattern += (gray[i-1, j-1] > center) * 1
                    pattern += (gray[i-1, j] > center) * 2
                    pattern += (gray[i-1, j+1] > center) * 4
                    pattern += (gray[i, j+1] > center) * 8
                    lbp_features.append(pattern)

            # Sample LBP features
            if len(lbp_features) > 100:
                lbp_features = lbp_features[::len(lbp_features)//100][:100]
            feature_vector.extend(lbp_features)

            features.append(feature_vector)

        return np.array(features)

    def fit(self, train_loader, val_loader=None):
        """Train AdaBoost classifier"""
        print("Extracting features for AdaBoost training...")

        # Extract features from training data
        X_train, y_train = [], []
        for batch_idx, (images, labels) in enumerate(train_loader):
            if batch_idx % 10 == 0:
                print(f"Processing batch {batch_idx}/{len(train_loader)}")

            # Denormalize images
            mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
            std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)
            images = images * std + mean

            batch_features = self.extract_features(images)
            X_train.extend(batch_features)
            y_train.extend(labels.numpy())

            # Limit data for faster training
            if len(X_train) > 2000:
                break

        X_train = np.array(X_train)
        y_train = np.array(y_train)

        print(f"Training AdaBoost with {X_train.shape[0]} samples, {X_train.shape[1]} features")

        # Train AdaBoost
        # Use 'estimator' instead of deprecated 'base_estimator' (sklearn 1.2+)
        self.model = AdaBoostClassifier(
            estimator=DecisionTreeClassifier(max_depth=1),
            n_estimators=self.n_estimators,
            random_state=42
        )

        start_time = time.time()
        self.model.fit(X_train, y_train)
        self.training_time = time.time() - start_time

        print(f"AdaBoost training completed in {self.training_time:.2f} seconds")

        return self.training_time

    def predict(self, test_loader):
        """Make predictions on test data"""
        print("Extracting features for AdaBoost prediction...")

        X_test, y_test = [], []
        for batch_idx, (images, labels) in enumerate(test_loader):
            # Denormalize images
            mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
            std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)
            images = images * std + mean

            batch_features = self.extract_features(images)
            X_test.extend(batch_features)
            y_test.extend(labels.numpy())

        X_test = np.array(X_test)
        y_test = np.array(y_test)

        start_time = time.time()
        predictions = self.model.predict(X_test)
        inference_time = time.time() - start_time

        return predictions, y_test, inference_time

# ===== VISUALIZATION UTILITIES =====
def create_individual_training_plots(training_curves, results_dir):
    """Tạo biểu đồ training riêng cho từng model (chi tiết cho nghiên cứu khoa học)"""
    
    cnn_models = [m for m in training_curves.keys() if m != 'AdaBoost']
    
    for model_name in cnn_models:
        if model_name not in training_curves:
            continue
            
        curves = training_curves[model_name]
        epochs = range(1, len(curves['train_accuracies']) + 1)
        
        fig, axes = plt.subplots(2, 2, figsize=(16, 12))
        fig.suptitle(f'{model_name} - Detailed Training Analysis', fontsize=16, fontweight='bold')
        
        # 1. Accuracy Curves
        ax = axes[0, 0]
        ax.plot(epochs, curves['train_accuracies'], 'b-', linewidth=2, label='Train Accuracy', marker='o', markersize=3, markevery=max(1, len(epochs)//20))
        ax.plot(epochs, curves['val_accuracies'], 'r-', linewidth=2, label='Validation Accuracy', marker='s', markersize=3, markevery=max(1, len(epochs)//20))
        ax.set_xlabel('Epoch', fontsize=12)
        ax.set_ylabel('Accuracy (%)', fontsize=12)
        ax.set_title('Training vs Validation Accuracy', fontsize=13, fontweight='bold')
        ax.legend(loc='lower right', fontsize=10)
        ax.grid(True, alpha=0.3, linestyle='--')
        
        # Đánh dấu best epoch
        if 'best_epoch' in curves:
            best_epoch = curves['best_epoch']
            if best_epoch <= len(epochs):
                ax.axvline(x=best_epoch, color='green', linestyle='--', linewidth=2, alpha=0.7, label=f'Best Epoch: {best_epoch}')
                ax.legend(loc='lower right', fontsize=10)
        
        # 2. Loss Curves
        ax = axes[0, 1]
        ax.plot(epochs, curves['train_losses'], 'b-', linewidth=2, label='Train Loss', marker='o', markersize=3, markevery=max(1, len(epochs)//20))
        if 'val_losses' in curves:
            ax.plot(epochs, curves['val_losses'], 'r-', linewidth=2, label='Validation Loss', marker='s', markersize=3, markevery=max(1, len(epochs)//20))
        ax.set_xlabel('Epoch', fontsize=12)
        ax.set_ylabel('Loss', fontsize=12)
        ax.set_title('Training vs Validation Loss', fontsize=13, fontweight='bold')
        ax.legend(loc='upper right', fontsize=10)
        ax.grid(True, alpha=0.3, linestyle='--')
        
        # 3. Learning Rate Schedule
        ax = axes[1, 0]
        if 'learning_rates' in curves:
            ax.plot(epochs, curves['learning_rates'], 'g-', linewidth=2, marker='o', markersize=3, markevery=max(1, len(epochs)//20))
            ax.set_xlabel('Epoch', fontsize=12)
            ax.set_ylabel('Learning Rate', fontsize=12)
            ax.set_title('Learning Rate Schedule', fontsize=13, fontweight='bold')
            ax.set_yscale('log')
            ax.grid(True, alpha=0.3, linestyle='--')
        else:
            ax.text(0.5, 0.5, 'Learning Rate data not available', ha='center', va='center', transform=ax.transAxes, fontsize=12)
        
        # 4. Overfitting Analysis (Train-Val Gap)
        ax = axes[1, 1]
        gap = np.array(curves['train_accuracies']) - np.array(curves['val_accuracies'])
        ax.plot(epochs, gap, 'purple', linewidth=2, marker='o', markersize=3, markevery=max(1, len(epochs)//20))
        ax.axhline(y=0, color='black', linestyle='--', linewidth=1, alpha=0.5)
        ax.fill_between(epochs, 0, gap, where=(gap >= 0), alpha=0.3, color='red', label='Overfitting')
        ax.set_xlabel('Epoch', fontsize=12)
        ax.set_ylabel('Accuracy Gap (Train - Val) %', fontsize=12)
        ax.set_title('Overfitting Analysis', fontsize=13, fontweight='bold')
        ax.legend(loc='upper right', fontsize=10)
        ax.grid(True, alpha=0.3, linestyle='--')
        
        plt.tight_layout()
        save_path = os.path.join(results_dir, f'{model_name.lower().replace(" ", "_")}_training_analysis.png')
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        plt.show()
        print(f"[SAVED] {save_path}")

def create_comparison_charts(all_results, training_curves, results_dir):
    """Tạo biểu đồ so sánh tổng quan giữa các models"""

    plt.style.use('default')
    fig = plt.figure(figsize=(20, 15))

    models = list(all_results.keys())
    colors = ['#3498db', '#e74c3c', '#2ecc71', '#f39c12']  # Modern colors

    # 1. Test Accuracy Comparison
    plt.subplot(3, 3, 1)
    accuracies = [all_results[model]['test_accuracy'] for model in models]
    bars = plt.bar(models, accuracies, color=colors, edgecolor='black', linewidth=1.5)
    plt.title('Test Accuracy Comparison', fontsize=14, fontweight='bold')
    plt.ylabel('Accuracy (%)', fontsize=11)
    plt.xticks(rotation=45, ha='right')
    plt.ylim([min(accuracies) - 5, 100])
    for i, bar in enumerate(bars):
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height + 0.5,
                 f'{height:.2f}%', ha='center', va='bottom', fontweight='bold', fontsize=9)
    plt.grid(axis='y', alpha=0.3, linestyle='--')

    # 2. Training Time Comparison
    plt.subplot(3, 3, 2)
    training_times = [all_results[model]['training_time']/60 for model in models]
    bars = plt.bar(models, training_times, color=colors, edgecolor='black', linewidth=1.5)
    plt.title('Training Time Comparison', fontsize=14, fontweight='bold')
    plt.ylabel('Time (minutes)', fontsize=11)
    plt.xticks(rotation=45, ha='right')
    for i, bar in enumerate(bars):
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height + 0.5,
                 f'{height:.1f}m', ha='center', va='bottom', fontweight='bold', fontsize=9)
    plt.grid(axis='y', alpha=0.3, linestyle='--')

    # 3. Model Size Comparison
    plt.subplot(3, 3, 3)
    model_sizes = [all_results[model]['model_size_mb'] for model in models]
    bars = plt.bar(models, model_sizes, color=colors, edgecolor='black', linewidth=1.5)
    plt.title('Model Size Comparison', fontsize=14, fontweight='bold')
    plt.ylabel('Size (MB)', fontsize=11)
    plt.xticks(rotation=45, ha='right')
    for i, bar in enumerate(bars):
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height + 0.5,
                 f'{height:.1f}MB', ha='center', va='bottom', fontweight='bold', fontsize=9)
    plt.grid(axis='y', alpha=0.3, linestyle='--')

    # 4. Precision-Recall-F1 Comparison
    plt.subplot(3, 3, 4)
    metrics = ['test_precision', 'test_recall', 'test_f1']
    metric_labels = ['Precision', 'Recall', 'F1-Score']
    x = np.arange(len(models))
    width = 0.25

    for i, (metric, label) in enumerate(zip(metrics, metric_labels)):
        values = [all_results[model][metric] for model in models]
        plt.bar(x + i*width, values, width, label=label, edgecolor='black', linewidth=1)

    plt.title('Precision, Recall, F1-Score Comparison', fontsize=14, fontweight='bold')
    plt.ylabel('Score (%)', fontsize=11)
    plt.xlabel('Models', fontsize=11)
    plt.xticks(x + width, models, rotation=45, ha='right')
    plt.legend(fontsize=10)
    plt.grid(axis='y', alpha=0.3, linestyle='--')
    plt.ylim([0, 105])

    # 5. Inference Time Comparison
    plt.subplot(3, 3, 5)
    inference_times = [all_results[model]['inference_time'] for model in models]
    bars = plt.bar(models, inference_times, color=colors, edgecolor='black', linewidth=1.5)
    plt.title('Inference Time Comparison', fontsize=14, fontweight='bold')
    plt.ylabel('Time (seconds)', fontsize=11)
    plt.xticks(rotation=45, ha='right')
    for i, bar in enumerate(bars):
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height + 0.01,
                 f'{height:.3f}s', ha='center', va='bottom', fontweight='bold', fontsize=9)
    plt.grid(axis='y', alpha=0.3, linestyle='--')

    # 6. Total Epochs Trained
    plt.subplot(3, 3, 6)
    epochs_trained = [all_results[model].get('total_epochs', 0) for model in models]
    bars = plt.bar(models, epochs_trained, color=colors, edgecolor='black', linewidth=1.5)
    plt.title('Total Epochs Trained', fontsize=14, fontweight='bold')
    plt.ylabel('Epochs', fontsize=11)
    plt.xticks(rotation=45, ha='right')
    for i, bar in enumerate(bars):
        height = bar.get_height()
        if height > 0:
            plt.text(bar.get_x() + bar.get_width()/2., height + 1,
                     f'{int(height)}', ha='center', va='bottom', fontweight='bold', fontsize=9)
    plt.grid(axis='y', alpha=0.3, linestyle='--')

    # 7-9. Training Curves for CNN models
    cnn_models = ['Vanilla CNN', 'ResNet50', 'Attention CNN']
    for idx, model_name in enumerate(cnn_models):
        plt.subplot(3, 3, 7 + idx)
        if model_name in training_curves:
            epochs = range(1, len(training_curves[model_name]['train_accuracies']) + 1)
            plt.plot(epochs, training_curves[model_name]['train_accuracies'], 'b-', linewidth=2, label='Train', alpha=0.8)
            plt.plot(epochs, training_curves[model_name]['val_accuracies'], 'r-', linewidth=2, label='Validation', alpha=0.8)
            
            # Mark best epoch
            if 'best_epoch' in training_curves[model_name]:
                best_epoch = training_curves[model_name]['best_epoch']
                if best_epoch <= len(epochs):
                    plt.axvline(x=best_epoch, color='green', linestyle='--', linewidth=1.5, alpha=0.7)
            
            plt.title(f'{model_name} - Learning Curves', fontsize=12, fontweight='bold')
            plt.xlabel('Epoch', fontsize=10)
            plt.ylabel('Accuracy (%)', fontsize=10)
            plt.legend(fontsize=9, loc='lower right')
            plt.grid(True, alpha=0.3, linestyle='--')

    plt.tight_layout()
    chart_path = os.path.join(results_dir, 'model_comparison_summary.png')
    plt.savefig(chart_path, dpi=300, bbox_inches='tight')
    plt.show()
    print(f"[SAVED] {chart_path}")

    return chart_path

def create_detailed_analysis(all_results, training_curves, results_dir, dataset_info):
    """Create detailed analysis report"""

    # Create results dataframe
    results_df = pd.DataFrame(all_results).T
    results_df = results_df.round(2)

    # Save results to CSV
    csv_path = os.path.join(results_dir, 'model_comparison_results.csv')
    results_df.to_csv(csv_path)

    # Generate recommendations
    best_accuracy_model = max(all_results.keys(), key=lambda x: all_results[x]['test_accuracy'])
    best_speed_model = min(all_results.keys(), key=lambda x: all_results[x]['inference_time'])
    most_efficient_model = min(all_results.keys(), key=lambda x: all_results[x]['model_size_mb'])

    # Calculate balanced score
    balanced_scores = {}
    for model in all_results.keys():
        acc = all_results[model]['test_accuracy']
        size = all_results[model]['model_size_mb']
        time = all_results[model]['inference_time']
        balanced_scores[model] = acc / (size + time * 100)

    best_balanced = max(balanced_scores.keys(), key=lambda x: balanced_scores[x])

    # Save complete analysis as JSON
    analysis_report = {
        'experiment_info': {
            'date': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
            'dataset_path': dataset_info.get('dataset_path', ''),
            'num_classes': dataset_info.get('num_classes', 0),
            'train_samples': dataset_info.get('train_samples', 0),
            'val_samples': dataset_info.get('val_samples', 0),
            'test_samples': dataset_info.get('test_samples', 0),
            'device': dataset_info.get('device', 'CPU')
        },
        'model_results': all_results,
        'training_curves': training_curves
    }

    json_path = os.path.join(results_dir, 'complete_analysis.json')
    with open(json_path, 'w') as f:
        # Convert numpy arrays to lists for JSON serialization
        def convert_numpy(obj):
            if isinstance(obj, np.ndarray):
                return obj.tolist()
            elif isinstance(obj, dict):
                return {key: convert_numpy(value) for key, value in obj.items()}
            elif isinstance(obj, list):
                return [convert_numpy(item) for item in obj]
            else:
                return obj

        json.dump(convert_numpy(analysis_report), f, indent=2)

    # Create summary report
    summary_report = f"""# FACE DETECTION MODEL COMPARISON REPORT
Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

## EXPERIMENT SETUP
- Dataset: {dataset_info.get('dataset_path', 'N/A')}
- Number of Classes: {dataset_info.get('num_classes', 'N/A')}
- Training Samples: {dataset_info.get('train_samples', 'N/A')}
- Validation Samples: {dataset_info.get('val_samples', 'N/A')}
- Test Samples: {dataset_info.get('test_samples', 'N/A')}
- Device: {dataset_info.get('device', 'N/A')}

## MODELS COMPARED
1. Vanilla CNN - Custom lightweight CNN architecture
2. ResNet50 - Pre-trained ResNet50 with transfer learning
3. Attention CNN - CNN with attention mechanism
4. AdaBoost - Classical machine learning with hand-crafted features

## RESULTS SUMMARY

| Model | Test Accuracy | Training Time | Inference Time | Model Size | F1-Score |
|-------|---------------|---------------|----------------|------------|----------|
"""

    for model_name, results in all_results.items():
        summary_report += f"| {model_name} | {results['test_accuracy']:.2f}% | {results['training_time']/60:.1f}m | {results['inference_time']:.3f}s | {results['model_size_mb']:.1f}MB | {results['test_f1']:.2f}% |\n"

    summary_report += f"""

## KEY FINDINGS
- Best Accuracy: {best_accuracy_model} ({all_results[best_accuracy_model]['test_accuracy']:.2f}%)
- Fastest Inference: {best_speed_model} ({all_results[best_speed_model]['inference_time']:.3f}s)
- Most Compact: {most_efficient_model} ({all_results[most_efficient_model]['model_size_mb']:.1f}MB)
- Best Balanced: {best_balanced}

## RECOMMENDATIONS
1. For highest accuracy: Use {best_accuracy_model}
2. For real-time applications: Use {best_speed_model}
3. For mobile deployment: Use {most_efficient_model}
4. For balanced performance: Use {best_balanced}

Generated by Face Detection Comparison Pipeline
"""

    # Save summary report
    summary_path = os.path.join(results_dir, 'SUMMARY_REPORT.md')
    with open(summary_path, 'w') as f:
        f.write(summary_report)

    return {
        'summary_path': summary_path,
        'csv_path': csv_path,
        'json_path': json_path,
        'best_models': {
            'accuracy': best_accuracy_model,
            'speed': best_speed_model,
            'efficiency': most_efficient_model,
            'balanced': best_balanced
        }
    }

def print_results_summary(all_results):
    """Print a formatted summary of results"""
    print("\n" + "="*80)
    print("FINAL RESULTS COMPARISON")
    print("="*80)

    # Create and print results dataframe
    results_df = pd.DataFrame(all_results).T
    results_df = results_df.round(2)
    print(results_df)
    print("="*80)

print("[OK] All utility functions loaded successfully!")


✅ All utility functions loaded successfully!


## 4. Run All Experiments


In [7]:
# Dictionary to store all results
all_results = {}
training_curves = {}

print("Starting model training and evaluation...")
print(f"Dataset: {num_classes} classes")
print(f"Train size: {len(train_dataset)}")
print(f"Validation size: {len(val_dataset)}")
print(f"Test size: {len(test_dataset)}")
print("="*50)

# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Dataset info for analysis
dataset_info = {
    'dataset_path': DATASET_DIR_DRIVE,
    'num_classes': num_classes,
    'train_samples': len(train_dataset),
    'val_samples': len(val_dataset),
    'test_samples': len(test_dataset),
    'device': 'GPU' if torch.cuda.is_available() else 'CPU'
}


Starting model training and evaluation...
Dataset: 294 classes
Train size: 4116
Validation size: 882
Test size: 882
Using device: cuda


In [None]:
# ===== 1. TRAIN VANILLA CNN =====
print("\n" + "="*80)
print("MODEL 1/4: VANILLA CNN")
print("="*80)

vanilla_cnn = VanillaCNN(num_classes)
vanilla_results = train_model(
    model=vanilla_cnn,
    train_loader=train_loader,
    val_loader=val_loader,
    num_epochs=200,
    model_name="vanilla_cnn",
    results_dir=RESULTS_DIR,
    patience=15  # Early stopping patience
)

# Evaluate on test set
print("\n[EVAL] Evaluating Vanilla CNN on test set...")
vanilla_eval = evaluate_model(vanilla_results['model'], test_loader, "vanilla_cnn")

# Store results
all_results['Vanilla CNN'] = {
    'training_time': vanilla_results['training_time'],
    'best_val_accuracy': vanilla_results['best_val_acc'],
    'test_accuracy': vanilla_eval['accuracy'],
    'test_precision': vanilla_eval['precision'],
    'test_recall': vanilla_eval['recall'],
    'test_f1': vanilla_eval['f1_score'],
    'inference_time': vanilla_eval['inference_time'],
    'model_size_mb': calculate_model_size(vanilla_results['model']),
    'total_epochs': vanilla_results['total_epochs'],
    'early_stopped': vanilla_results['early_stopped'],
    'best_epoch': vanilla_results['best_epoch']
}

training_curves['Vanilla CNN'] = {
    'train_losses': vanilla_results['train_losses'],
    'val_losses': vanilla_results['val_losses'],
    'train_accuracies': vanilla_results['train_accuracies'],
    'val_accuracies': vanilla_results['val_accuracies'],
    'learning_rates': vanilla_results['learning_rates'],
    'best_epoch': vanilla_results['best_epoch']
}

print(f"\n{'='*80}")
print(f"[RESULTS] VANILLA CNN:")
print(f"{'='*80}")
print(f"Test Accuracy: {vanilla_eval['accuracy']:.2f}%")
print(f"Test Precision: {vanilla_eval['precision']:.2f}%")
print(f"Test Recall: {vanilla_eval['recall']:.2f}%")
print(f"Test F1-Score: {vanilla_eval['f1_score']:.2f}%")
print(f"Training Time: {vanilla_results['training_time']/60:.2f} minutes")
print(f"Total Epochs: {vanilla_results['total_epochs']}")
print(f"Model Size: {calculate_model_size(vanilla_results['model']):.2f} MB")
print(f"{'='*80}\n")

# Clear GPU memory
del vanilla_cnn, vanilla_results
torch.cuda.empty_cache()



1. Training Vanilla CNN...


In [None]:
# ===== 2. TRAIN RESNET50 =====
print("\n" + "="*80)
print("MODEL 2/4: RESNET50 (Transfer Learning)")
print("="*80)

resnet_model = ResNet50Face(num_classes)
resnet_results = train_model(
    model=resnet_model,
    train_loader=train_loader,
    val_loader=val_loader,
    num_epochs=200,
    model_name="resnet50",
    results_dir=RESULTS_DIR,
    patience=15
)

# Evaluate on test set
print("\n[EVAL] Evaluating ResNet50 on test set...")
resnet_eval = evaluate_model(resnet_results['model'], test_loader, "resnet50")

# Store results
all_results['ResNet50'] = {
    'training_time': resnet_results['training_time'],
    'best_val_accuracy': resnet_results['best_val_acc'],
    'test_accuracy': resnet_eval['accuracy'],
    'test_precision': resnet_eval['precision'],
    'test_recall': resnet_eval['recall'],
    'test_f1': resnet_eval['f1_score'],
    'inference_time': resnet_eval['inference_time'],
    'model_size_mb': calculate_model_size(resnet_results['model']),
    'total_epochs': resnet_results['total_epochs'],
    'early_stopped': resnet_results['early_stopped'],
    'best_epoch': resnet_results['best_epoch']
}

training_curves['ResNet50'] = {
    'train_losses': resnet_results['train_losses'],
    'val_losses': resnet_results['val_losses'],
    'train_accuracies': resnet_results['train_accuracies'],
    'val_accuracies': resnet_results['val_accuracies'],
    'learning_rates': resnet_results['learning_rates'],
    'best_epoch': resnet_results['best_epoch']
}

print(f"\n{'='*80}")
print(f"[RESULTS] RESNET50:")
print(f"{'='*80}")
print(f"Test Accuracy: {resnet_eval['accuracy']:.2f}%")
print(f"Test Precision: {resnet_eval['precision']:.2f}%")
print(f"Test Recall: {resnet_eval['recall']:.2f}%")
print(f"Test F1-Score: {resnet_eval['f1_score']:.2f}%")
print(f"Training Time: {resnet_results['training_time']/60:.2f} minutes")
print(f"Total Epochs: {resnet_results['total_epochs']}")
print(f"Model Size: {calculate_model_size(resnet_results['model']):.2f} MB")
print(f"{'='*80}\n")

# Clear GPU memory
del resnet_model, resnet_results
torch.cuda.empty_cache()


In [None]:
# ===== 3. TRAIN ATTENTION CNN =====
print("\n" + "="*80)
print("MODEL 3/4: ATTENTION CNN")
print("="*80)

attention_cnn = AttentionCNN(num_classes)
attention_results = train_model(
    model=attention_cnn,
    train_loader=train_loader,
    val_loader=val_loader,
    num_epochs=200,
    model_name="attention_cnn",
    results_dir=RESULTS_DIR,
    patience=15
)

# Evaluate on test set
print("\n[EVAL] Evaluating Attention CNN on test set...")
attention_eval = evaluate_model(attention_results['model'], test_loader, "attention_cnn")

# Store results
all_results['Attention CNN'] = {
    'training_time': attention_results['training_time'],
    'best_val_accuracy': attention_results['best_val_acc'],
    'test_accuracy': attention_eval['accuracy'],
    'test_precision': attention_eval['precision'],
    'test_recall': attention_eval['recall'],
    'test_f1': attention_eval['f1_score'],
    'inference_time': attention_eval['inference_time'],
    'model_size_mb': calculate_model_size(attention_results['model']),
    'total_epochs': attention_results['total_epochs'],
    'early_stopped': attention_results['early_stopped'],
    'best_epoch': attention_results['best_epoch']
}

training_curves['Attention CNN'] = {
    'train_losses': attention_results['train_losses'],
    'val_losses': attention_results['val_losses'],
    'train_accuracies': attention_results['train_accuracies'],
    'val_accuracies': attention_results['val_accuracies'],
    'learning_rates': attention_results['learning_rates'],
    'best_epoch': attention_results['best_epoch']
}

print(f"\n{'='*80}")
print(f"[RESULTS] ATTENTION CNN:")
print(f"{'='*80}")
print(f"Test Accuracy: {attention_eval['accuracy']:.2f}%")
print(f"Test Precision: {attention_eval['precision']:.2f}%")
print(f"Test Recall: {attention_eval['recall']:.2f}%")
print(f"Test F1-Score: {attention_eval['f1_score']:.2f}%")
print(f"Training Time: {attention_results['training_time']/60:.2f} minutes")
print(f"Total Epochs: {attention_results['total_epochs']}")
print(f"Model Size: {calculate_model_size(attention_results['model']):.2f} MB")
print(f"{'='*80}\n")

# Clear GPU memory
del attention_cnn, attention_results
torch.cuda.empty_cache()


In [None]:
# ===== 4. TRAIN ADABOOST =====
print("\n" + "="*80)
print("MODEL 4/4: ADABOOST (Classical ML)")
print("="*80)

adaboost_model = AdaBoostFaceClassifier(n_estimators=100)
ada_training_time = adaboost_model.fit(train_loader, val_loader)

# Predict on test set
print("\n[EVAL] Evaluating AdaBoost on test set...")
ada_predictions, ada_targets, ada_inference_time = adaboost_model.predict(test_loader)

# Calculate metrics
ada_accuracy = accuracy_score(ada_targets, ada_predictions)
ada_precision, ada_recall, ada_f1, _ = precision_recall_fscore_support(ada_targets, ada_predictions, average='weighted')

# Save AdaBoost model to calculate size
adaboost_path = os.path.join(RESULTS_DIR, 'adaboost_best_model.pkl')
with open(adaboost_path, 'wb') as f:
    pickle.dump(adaboost_model, f)

# Calculate AdaBoost model size
adaboost_size_mb = os.path.getsize(adaboost_path) / (1024 * 1024)

# Store results
all_results['AdaBoost'] = {
    'training_time': ada_training_time,
    'best_val_accuracy': 0,  # Not applicable for AdaBoost
    'test_accuracy': ada_accuracy * 100,
    'test_precision': ada_precision * 100,
    'test_recall': ada_recall * 100,
    'test_f1': ada_f1 * 100,
    'inference_time': ada_inference_time,
    'model_size_mb': adaboost_size_mb,
    'total_epochs': 100,  # n_estimators
    'early_stopped': False,
    'best_epoch': 100
}

print(f"\n{'='*80}")
print(f"[RESULTS] ADABOOST:")
print(f"{'='*80}")
print(f"Test Accuracy: {ada_accuracy * 100:.2f}%")
print(f"Test Precision: {ada_precision * 100:.2f}%")
print(f"Test Recall: {ada_recall * 100:.2f}%")
print(f"Test F1-Score: {ada_f1 * 100:.2f}%")
print(f"Training Time: {ada_training_time/60:.2f} minutes")
print(f"Model Size: {adaboost_size_mb:.2f} MB")
print(f"{'='*80}\n")

print("\n" + "="*80)
print("ALL 4 MODELS TRAINED SUCCESSFULLY!")
print("="*80 + "\n")


## 5. Results Visualization and Analysis


In [None]:
print("\n" + "="*80)
print("[ANALYSIS] GENERATING COMPREHENSIVE ANALYSIS & VISUALIZATIONS")
print("="*80)

# 1. Print results summary
print("\n[1] Results Summary Table:")
print("-" * 80)
print_results_summary(all_results)

# 2. Create individual training plots (chi tiết cho từng model)
print("\n[2] Creating Individual Training Analysis Plots...")
print("-" * 80)
create_individual_training_plots(training_curves, RESULTS_DIR)

# 3. Create comparison charts
print("\n[3] Creating Model Comparison Charts...")
print("-" * 80)
chart_path = create_comparison_charts(all_results, training_curves, RESULTS_DIR)

# 4. Create detailed analysis report
print("\n[4] Generating Detailed Analysis Report...")
print("-" * 80)
analysis_results = create_detailed_analysis(all_results, training_curves, RESULTS_DIR, dataset_info)

print(f"\n{'='*80}")
print("[COMPLETED] EXPERIMENT COMPLETED SUCCESSFULLY!")
print(f"{'='*80}")
print(f"\n[FILES] All results saved to: {RESULTS_DIR}")
print(f"\n[REPORT] Summary report: {analysis_results['summary_path']}")
print(f"[CSV] CSV results: {analysis_results['csv_path']}")
print(f"[JSON] JSON analysis: {analysis_results['json_path']}")

# Print recommendations for research paper
best_models = analysis_results['best_models']
print(f"\n{'='*80}")
print("[RECOMMENDATIONS] FOR STUDENT ATTENDANCE SYSTEM")
print(f"{'='*80}")
print(f"\n[1] FOR HIGHEST ACCURACY:")
print(f"   Model: {best_models['accuracy']}")
print(f"   Accuracy: {all_results[best_models['accuracy']]['test_accuracy']:.2f}%")
print(f"   Use case: Khi cần độ chính xác cao nhất, ít quan tâm tốc độ")

print(f"\n[2] FOR REAL-TIME APPLICATIONS:")
print(f"   Model: {best_models['speed']}")
print(f"   Inference Time: {all_results[best_models['speed']]['inference_time']:.3f}s")
print(f"   Use case: Điểm danh thời gian thực, nhiều sinh viên cùng lúc")

print(f"\n[3] FOR MOBILE/EDGE DEPLOYMENT:")
print(f"   Model: {best_models['efficiency']}")
print(f"   Model Size: {all_results[best_models['efficiency']]['model_size_mb']:.2f} MB")
print(f"   Use case: Chạy trên thiết bị di động, Raspberry Pi")

print(f"\n[4] FOR BALANCED PERFORMANCE:")
print(f"   Model: {best_models['balanced']}")
print(f"   Use case: Cân bằng giữa accuracy, speed và model size")

print(f"\n{'='*80}")
print("[FILES] CREATED FOR RESEARCH PAPER:")
print(f"{'='*80}")

files_list = [
    ("Model Weights", [
        "vanilla_cnn_best_model.pth",
        "resnet50_best_model.pth",
        "attention_cnn_best_model.pth",
        "adaboost_best_model.pkl"
    ]),
    ("Analysis Reports", [
        "SUMMARY_REPORT.md",
        "model_comparison_results.csv",
        "complete_analysis.json"
    ]),
    ("Visualizations", [
        "model_comparison_summary.png",
        "vanilla_cnn_training_analysis.png",
        "resnet50_training_analysis.png",
        "attention_cnn_training_analysis.png",
        "detailed_analysis.png"
    ])
]

for category, files in files_list:
    print(f"\n[{category}]:")
    for file in files:
        print(f"   - {file}")

print(f"\n{'='*80}")
print("[TIP] Use these files for your research paper!")
print(f"{'='*80}")
print("\n[SECTIONS] Suggested sections:")
print("   - ABSTRACT: Use summary statistics from CSV")
print("   - METHODOLOGY: Refer to model architectures and training configs")
print("   - RESULTS: Use comparison charts and individual training plots")
print("   - DISCUSSION: Analyze overfitting, convergence, early stopping behavior")
print("   - CONCLUSION: Use recommendations for different deployment scenarios")

print(f"\n{'='*80}")
print("READY FOR RESEARCH PAPER WRITING!")
print(f"{'='*80}\n")


## 6.Analysis


In [None]:
# Optional: Create confusion matrices for best performing models
from sklearn.metrics import confusion_matrix
import seaborn as sns

# Get the best accuracy model results
best_model_name = analysis_results['best_models']['accuracy']
print(f"Creating detailed analysis for best model: {best_model_name}")

# Load the best model and get predictions
if best_model_name != 'AdaBoost':
    # For CNN models, we need to re-evaluate to get predictions
    print(f"Loading {best_model_name} for confusion matrix generation...")

    # Map model names to their classes
    model_mapping = {
        'Vanilla CNN': VanillaCNN,
        'ResNet50': ResNet50Face,
        'Attention CNN': AttentionCNN
    }

    if best_model_name in model_mapping:
        # Load the saved model
        model_class = model_mapping[best_model_name]
        model = model_class(num_classes)
        model_path = os.path.join(RESULTS_DIR, f'{best_model_name.lower().replace(" ", "_")}_model.pth')

        if os.path.exists(model_path):
            model.load_state_dict(torch.load(model_path))
            eval_results = evaluate_model(model, test_loader, best_model_name)
            predictions = eval_results['predictions']
            targets = eval_results['targets']
        else:
            print(f"Model file not found. Using sample data for visualization.")
            predictions = None
            targets = None
else:
    # For AdaBoost, we already have predictions
    predictions = ada_predictions
    targets = ada_targets

# Create visualizations
plt.figure(figsize=(15, 6))

# 1. Confusion Matrix
plt.subplot(1, 3, 1)
if predictions is not None and targets is not None:
    # Use actual predictions for confusion matrix (limited to first 10 classes for visibility)
    num_classes_to_show = min(10, num_classes)

    # Filter predictions and targets for first N classes
    mask = np.array(targets) < num_classes_to_show
    filtered_predictions = np.array(predictions)[mask]
    filtered_targets = np.array(targets)[mask]

    if len(filtered_predictions) > 0:
        cm = confusion_matrix(filtered_targets, filtered_predictions, labels=range(num_classes_to_show))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=True)
        plt.title(f'Confusion Matrix - {best_model_name}\n(First {num_classes_to_show} classes)')
    else:
        # Fallback to sample data
        sample_cm = np.random.randint(0, 10, size=(num_classes_to_show, num_classes_to_show))
        sns.heatmap(sample_cm, annot=True, fmt='d', cmap='Blues')
        plt.title(f'Confusion Matrix - {best_model_name} (Sample)')
else:
    # Create sample confusion matrix
    num_classes_to_show = min(10, num_classes)
    sample_cm = np.random.randint(0, 10, size=(num_classes_to_show, num_classes_to_show))
    sns.heatmap(sample_cm, annot=True, fmt='d', cmap='Blues')
    plt.title(f'Confusion Matrix - {best_model_name} (Sample)')

plt.xlabel('Predicted Class')
plt.ylabel('True Class')

# 2. Model Performance Comparison
plt.subplot(1, 3, 2)
metrics = ['Accuracy', 'Precision', 'Recall', 'F1-Score']
models_for_comparison = list(all_results.keys())

# Create bar chart for metrics comparison
x = np.arange(len(metrics))
width = 0.2

for i, model in enumerate(models_for_comparison):
    values = [
        all_results[model]['test_accuracy'],
        all_results[model]['test_precision'],
        all_results[model]['test_recall'],
        all_results[model]['test_f1']
    ]
    plt.bar(x + i*width, values, width, label=model, alpha=0.8)

plt.xlabel('Metrics')
plt.ylabel('Score (%)')
plt.title('Model Performance Metrics Comparison')
plt.xticks(x + width*1.5, metrics)
plt.legend(loc='lower right')
plt.grid(True, alpha=0.3)

# 3. Efficiency Analysis
plt.subplot(1, 3, 3)
efficiency_metrics = ['Inference Time (s)', 'Model Size (MB)', 'Training Time (min)']

# Normalize values for better visualization
norm_data = []
for model in models_for_comparison:
    norm_data.append([
        all_results[model]['inference_time'],
        all_results[model]['model_size_mb'],
        all_results[model]['training_time'] / 60
    ])

# Create grouped bar chart
x = np.arange(len(models_for_comparison))
width = 0.25

# Log scale for better visualization of different magnitudes
plt.yscale('log')

for i, metric in enumerate(efficiency_metrics):
    values = [norm_data[j][i] for j in range(len(models_for_comparison))]
    plt.bar(x + i*width, values, width, label=metric, alpha=0.8)

plt.xlabel('Models')
plt.ylabel('Value (log scale)')
plt.title('Model Efficiency Comparison')
plt.xticks(x + width, models_for_comparison, rotation=45)
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(os.path.join(RESULTS_DIR, 'detailed_analysis.png'), dpi=300, bbox_inches='tight')
plt.show()

print("[COMPLETED] Detailed analysis completed!")
print(f"[SAVED] Analysis saved to: {os.path.join(RESULTS_DIR, 'detailed_analysis.png')}")
