# FaceNet Embeddings with MTCNN

State-of-the-art face recognition using FaceNet architecture and MTCNN face detection.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from mtcnn import MTCNN
import cv2

print('MTCNN imported successfully')
print('MTCNN: Multi-task Cascaded Convolutional Networks')

## MTCNN Face Detector

MTCNN simultaneously detects:
- **Faces** (bounding boxes)
- **Facial landmarks** (eyes, nose, mouth corners)
- **Confidence scores**

In [None]:
# Initialize MTCNN detector
detector = MTCNN()
print('✓ MTCNN detector initialized')

# MTCNN specifications
specs = {
    'Detector': 'Multi-task CNN',
    'Outputs': 'Boxes + Landmarks + Confidence',
    'Landmarks': '5 points (eyes, nose, mouth)',
    'Accuracy': '99%+ detection rate',
    'Use case': 'Face alignment for recognition'
}

print('\nMTCNN Specifications:')
for key, value in specs.items():
    print(f'  {key}: {value}')

## Demonstrate MTCNN Detection

Create a sample detection to show MTCNN capabilities:

In [None]:
# Create a demo image
demo_img = np.ones((300, 400, 3), dtype=np.uint8) * 255

# Simulated detection result
detection = {
    'box': [50, 50, 200, 200],
    'confidence': 0.9987,
    'keypoints': {
        'left_eye': (100, 100),
        'right_eye': (200, 100),
        'nose': (150, 150),
        'mouth_left': (120, 200),
        'mouth_right': (180, 200)
    }
}

# Draw bounding box
x, y, w, h = detection['box']
cv2.rectangle(demo_img, (x, y), (x+w, y+h), (0, 255, 0), 2)

# Draw landmarks
for name, (px, py) in detection['keypoints'].items():
    cv2.circle(demo_img, (px, py), 5, (255, 0, 0), -1)

# Add label
label = f"Confidence: {detection['confidence']:.2%}"
cv2.putText(demo_img, label, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

plt.figure(figsize=(8, 6))
plt.imshow(cv2.cvtColor(demo_img, cv2.COLOR_BGR2RGB))
plt.title('MTCNN Detection Demo')
plt.axis('off')
plt.show()

print('\n✓ MTCNN detects face + 5 landmarks')
print('✓ Landmarks used for face alignment')

## FaceNet Architecture

FaceNet uses Inception ResNet v1:
- **Input**: 160×160 RGB image
- **Output**: 512-dimensional embedding
- **Training**: Triplet loss
- **Accuracy**: 99.65% on LFW

In [None]:
# FaceNet architecture overview
architecture = {
    'Model': 'Inception ResNet v1',
    'Parameters': '23.6 million',
    'Input size': '160×160×3',
    'Embedding size': '512 dimensions',
    'Training dataset': 'VGGFace2 (3.31M images)',
    'Loss function': 'Triplet loss',
    'Accuracy (LFW)': '99.65%'
}

print('='*60)
print('FACENET ARCHITECTURE')
print('='*60)
for key, value in architecture.items():
    print(f'{key:20} : {value}')
print('='*60)

## Triplet Loss Explained

FaceNet is trained using triplet loss:
- **Anchor**: Reference face
- **Positive**: Same person (different photo)
- **Negative**: Different person

**Goal**: Make ||anchor - positive|| small and ||anchor - negative|| large

In [None]:
# Simulate triplet loss concept
anchor = np.random.randn(512)
positive = anchor + np.random.randn(512) * 0.1  # Same person, similar embedding
negative = np.random.randn(512)  # Different person

# Calculate distances
dist_positive = np.linalg.norm(anchor - positive)
dist_negative = np.linalg.norm(anchor - negative)

print('Triplet Loss Example:')
print(f'  Distance (anchor ↔ positive): {dist_positive:.4f}')
print(f'  Distance (anchor ↔ negative): {dist_negative:.4f}')
print()
print(f'  Margin: {dist_negative - dist_positive:.4f}')
print('  Goal: Maximize this margin!')
print()
print('✓ Triplet loss ensures same-person faces cluster together')
print('✓ Different people\'s faces stay far apart')

## Embedding Comparison

In [None]:
# Simulate FaceNet embeddings
person_x_emb1 = np.random.randn(512)
person_x_emb2 = person_x_emb1 + np.random.randn(512) * 0.15
person_y_emb = np.random.randn(512)

# Euclidean distance
dist_same = np.linalg.norm(person_x_emb1 - person_x_emb2)
dist_diff = np.linalg.norm(person_x_emb1 - person_y_emb)

# Cosine similarity
cos_same = np.dot(person_x_emb1, person_x_emb2) / (
    np.linalg.norm(person_x_emb1) * np.linalg.norm(person_x_emb2)
)
cos_diff = np.dot(person_x_emb1, person_y_emb) / (
    np.linalg.norm(person_x_emb1) * np.linalg.norm(person_y_emb)
)

print('\n' + '='*60)
print('EMBEDDING COMPARISON')
print('='*60)
print(f'Same Person (X vs X):')
print(f'  Euclidean distance: {dist_same:.4f} (threshold: <0.7)')
print(f'  Cosine similarity:  {cos_same:.4f} (threshold: >0.6)')
print()
print(f'Different People (X vs Y):')
print(f'  Euclidean distance: {dist_diff:.4f}')
print(f'  Cosine similarity:  {cos_diff:.4f}')
print('='*60)
print('\n✓ Clear separation between same/different people')
print('✓ FaceNet achieves 99.65% accuracy on LFW')