# Face detection and recognition training pipeline

The following example illustrates how to fine-tune an InceptionResnetV1 model on your own dataset. This will mostly follow standard pytorch training patterns.

In [1]:
from facenet_pytorch import MTCNN, InceptionResnetV1, fixed_image_standardization, training
import torch
from torch.utils.data import DataLoader, SubsetRandomSampler
from torch import optim
from torch.optim.lr_scheduler import MultiStepLR
from torch.utils.tensorboard import SummaryWriter
from torchvision import datasets, transforms
import numpy as np
import os

2025-07-03 23:04:44.294294: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-07-03 23:04:44.295414: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-07-03 23:04:44.317076: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


#### Define run parameters

The dataset should follow the VGGFace2/ImageNet-style directory layout. Modify `data_dir` to the location of the dataset on wish to finetune on.

In [2]:
data_dir = 'Dataset/raw'

batch_size = 32
epochs = 8
workers = 0 if os.name == 'nt' else 8

In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

Running on device: cuda:0


In [4]:
mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
    device=device
)

In [5]:
dataset = datasets.ImageFolder(data_dir, transform=transforms.Resize((512, 512)))
dataset.samples = [
    (p, p.replace(data_dir, data_dir + '_cropped'))
        for p, _ in dataset.samples
]
        
loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    collate_fn=training.collate_pil
)

for i, (x, y) in enumerate(loader):
    mtcnn(x, save_path=y)
    print('\rBatch {} of {}'.format(i + 1, len(loader)), end='')
    
# Remove mtcnn to reduce GPU memory usage
del mtcnn

Batch 1 of 1

In [6]:
resnet = InceptionResnetV1(
    classify=True,
    pretrained='vggface2',
    num_classes=len(dataset.class_to_idx)
).to(device)

In [7]:
optimizer = optim.Adam(resnet.parameters(), lr=0.001)
scheduler = MultiStepLR(optimizer, [5, 10])

trans = transforms.Compose([
    np.float32,
    transforms.ToTensor(),
    fixed_image_standardization
])
dataset = datasets.ImageFolder(data_dir + '_cropped', transform=trans)
img_inds = np.arange(len(dataset))
np.random.shuffle(img_inds)
train_inds = img_inds[:int(0.8 * len(img_inds))]
val_inds = img_inds[int(0.8 * len(img_inds)):]

train_loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(train_inds)
)
val_loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(val_inds)
)

In [8]:
loss_fn = torch.nn.CrossEntropyLoss()
metrics = {
    'fps': training.BatchTimer(),
    'acc': training.accuracy
}

In [9]:
writer = SummaryWriter()
writer.iteration, writer.interval = 0, 10

print('\n\nInitial')
print('-' * 10)
resnet.eval()
training.pass_epoch(
    resnet, loss_fn, val_loader,
    batch_metrics=metrics, show_running=True, device=device,
    writer=writer
)

for epoch in range(epochs):
    print('\nEpoch {}/{}'.format(epoch + 1, epochs))
    print('-' * 10)

    resnet.train()
    training.pass_epoch(
        resnet, loss_fn, train_loader, optimizer, scheduler,
        batch_metrics=metrics, show_running=True, device=device,
        writer=writer
    )

    resnet.eval()
    training.pass_epoch(
        resnet, loss_fn, val_loader,
        batch_metrics=metrics, show_running=True, device=device,
        writer=writer
    )

writer.close()

# Save the trained model after training completes
model_save_path = 'facenet_vantoan_vanhau.pth'
torch.save(resnet.state_dict(), model_save_path)
print(f'\nModel saved to: {model_save_path}')

# Save class names for inference
class_names_save_path = 'class_names.txt'
with open(class_names_save_path, 'w') as f:
    for class_name in dataset.classes:
        f.write(f"{class_name}\n")
print(f'Class names saved to: {class_names_save_path}')
print(f'Classes: {dataset.classes}')



Initial
----------
Valid |     1/1    | loss:    1.1322 | fps:   14.0406 | acc:    0.0000   

Epoch 1/8
----------
Valid |     1/1    | loss:    1.1322 | fps:   14.0406 | acc:    0.0000   

Epoch 1/8
----------
Train |     1/1    | loss:    0.7137 | fps:   36.6697 | acc:    0.4000   
Train |     1/1    | loss:    0.7137 | fps:   36.6697 | acc:    0.4000   
Valid |     1/1    | loss:    0.1622 | fps:   21.7795 | acc:    1.0000   

Epoch 2/8
----------
Valid |     1/1    | loss:    0.1622 | fps:   21.7795 | acc:    1.0000   

Epoch 2/8
----------
Train |     1/1    | loss:    0.0763 | fps:   68.4061 | acc:    1.0000   
Train |     1/1    | loss:    0.0763 | fps:   68.4061 | acc:    1.0000   
Valid |     1/1    | loss:    0.2446 | fps:   22.7397 | acc:    0.7500   

Epoch 3/8
----------
Valid |     1/1    | loss:    0.2446 | fps:   22.7397 | acc:    0.7500   

Epoch 3/8
----------
Train |     1/1    | loss:    0.0090 | fps:   64.6008 | acc:    1.0000   
Train |     1/1    | loss:    0.0

# Inference and Testing
Test the trained model on sample images.

In [10]:
from PIL import Image
import matplotlib.pyplot as plt

# Load trained model for inference
def load_trained_model(model_path, num_classes, device):
    model = InceptionResnetV1(
        classify=True,
        pretrained='vggface2',
        num_classes=num_classes
    ).to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()
    return model

# Initialize MTCNN for inference
mtcnn_inference = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
    device=device
)

# Load class names with error handling
class_names_path = 'class_names.txt'
model_path = 'facenet_vantoan_vanhau.pth'

try:
    # Try to load class names from file
    with open(class_names_path, 'r') as f:
        class_names = [line.strip() for line in f.readlines()]
    print(f"Loaded class names from file: {class_names}")
except FileNotFoundError:
    # Fallback: use current dataset classes
    try:
        class_names = dataset.classes
        print(f"Using current dataset classes: {class_names}")
    except NameError:
        print("Error: No dataset or class names file found. Please run training first.")
        class_names = []

# Load trained model with error handling
if class_names and os.path.exists(model_path):
    try:
        model_inference = load_trained_model(model_path, len(class_names), device)
        print(f"Model loaded successfully for inference.")
    except Exception as e:
        print(f"Error loading model: {e}")
        model_inference = None
else:
    if not class_names:
        print("Cannot load model: No class names available.")
    else:
        print(f"Cannot load model: Model file not found at {model_path}")
        print("Please run the training cells first to create the model.")
    model_inference = None

Loaded class names from file: ['vanhau', 'vantoan']
Model loaded successfully for inference.
Model loaded successfully for inference.


In [11]:
def predict_image(image_path, model, mtcnn, class_names, device):
    """Predict class of face in image"""
    try:
        img = Image.open(image_path).convert('RGB')
        img_cropped = mtcnn(img)
        
        if img_cropped is None:
            return "No face detected", 0.0
        
        img_cropped = img_cropped.unsqueeze(0).to(device)
        
        with torch.no_grad():
            outputs = model(img_cropped)
            probabilities = torch.nn.functional.softmax(outputs, dim=1)
            confidence, predicted = torch.max(probabilities, 1)
            
        predicted_class = class_names[predicted.item()]
        confidence_score = confidence.item()
        
        return predicted_class, confidence_score
        
    except Exception as e:
        return f"Error: {str(e)}", 0.0

def test_sample_images():
    """Test model on sample images from both classes"""
    if model_inference is None:
        print("Error: Model not loaded. Cannot run inference.")
        return []
    
    if not class_names:
        print("Error: No class names available.")
        return []
    
    test_results = []
    
    for class_name in class_names:
        class_dir = os.path.join(data_dir, class_name)
        if os.path.exists(class_dir):
            images = [f for f in os.listdir(class_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
            
            if not images:
                print(f"No images found in {class_dir}")
                continue
            
            # Test first few images from each class
            for img_file in images[:3]:
                img_path = os.path.join(class_dir, img_file)
                predicted_class, confidence = predict_image(
                    img_path, model_inference, mtcnn_inference, class_names, device
                )
                
                result = {
                    'true_class': class_name,
                    'predicted_class': predicted_class,
                    'confidence': confidence,
                    'correct': predicted_class == class_name,
                    'image_path': img_path
                }
                test_results.append(result)
                
                status = "✅" if result['correct'] else "❌"
                print(f"{status} {class_name} -> {predicted_class} (conf: {confidence:.3f})")
        else:
            print(f"Directory not found: {class_dir}")
    
    # Summary
    if test_results:
        correct_predictions = sum(1 for r in test_results if r['correct'])
        accuracy = correct_predictions / len(test_results)
        print(f"\nTest Results: {correct_predictions}/{len(test_results)} correct ({accuracy*100:.1f}%)")
    else:
        print("No test results available.")
    
    return test_results

# Run tests with error handling
print("Testing model on sample images...")
if model_inference is not None and class_names:
    test_results = test_sample_images()
else:
    print("Cannot run tests: Model or class names not available.")
    print("Please ensure training has completed successfully.")

Testing model on sample images...
✅ vanhau -> vanhau (conf: 0.946)
✅ vanhau -> vanhau (conf: 0.926)
✅ vanhau -> vanhau (conf: 1.000)
✅ vantoan -> vantoan (conf: 0.998)
✅ vantoan -> vantoan (conf: 1.000)
✅ vantoan -> vantoan (conf: 1.000)

Test Results: 6/6 correct (100.0%)
✅ vantoan -> vantoan (conf: 0.998)
✅ vantoan -> vantoan (conf: 1.000)
✅ vantoan -> vantoan (conf: 1.000)

Test Results: 6/6 correct (100.0%)
