## DATASET LINK : https://www.kaggle.com/datasets/hearfool/vggface2

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# !unzip "/content/drive/MyDrive/face_data.zip" -d"/content/drive/MyDrive/FaceNet_Data"

In [None]:
!pip install facenet_pytorch

In [None]:
!pip install torch torchvision torchaudio
!pip install mtcnn pillow numpy


In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision import transforms, models
from PIL import Image
from facenet_pytorch import MTCNN
import numpy as np

In [None]:
# Paths
base_path = "/content/drive/MyDrive/FaceNet_Data/face_data/train"  #  dataset path
# output_path = "/content/drive/MyDrive/Face_Dataset/Output_path"  # output path
identities = os.listdir(base_path)[:100]  # First 100 identities
print(identities)

In [None]:

from torch.utils.data import Dataset

class VGGFace2Subset(Dataset):
    def __init__(self, data, labels, transform=None):
        self.data = data
        self.labels = labels
        self.transform = transform

    def __len__(self):
        # This is the required method!
        return len(self.data)

    def __getitem__(self, idx):
        # Load image; if self.data is a list of file paths, load with PIL
        img_path = self.data[idx]
        label = self.labels[idx]
        from PIL import Image
        img = Image.open(img_path).convert('RGB')
        if self.transform:
            img = self.transform(img)
        return img, label


In [None]:
# Frist 50 identities
base_path = "/content/drive/MyDrive/FaceNet_Data/face_data/train"    # dataset path
all_identities = os.listdir(base_path)[:50]  #  first 50 identities

In [None]:
all_identities

In [None]:
#  data and labels lists
data = []
labels = []
for label, identity in enumerate(all_identities):
    identity_path = os.path.join(base_path, identity)
    images = os.listdir(identity_path)[:110]
    for img_name in images:
        img_path = os.path.join(identity_path, img_name)
        data.append(img_path)
        labels.append(label)

#  torchvision transforms
from torchvision import transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

# Create dataset
full_dataset = VGGFace2Subset(data, labels, transform=transform)
print(full_dataset)

<__main__.VGGFace2Subset object at 0x7a48e1fcd990>


In [None]:
print("Number of samples in dataset:", len(full_dataset))


Number of samples in dataset: 5390


In [None]:
# Transformations for data augmentation and normalization
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),  # Data augmentation
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [None]:

# Train (80%), validation (10%), and test (10%)
train_size = int(0.8 * len(full_dataset))
val_size = int(0.1 * len(full_dataset))
test_size = len(full_dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = random_split(full_dataset, [train_size, val_size, test_size])

In [None]:
# Data loaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
# #  Pre-trained ResNet-50
# model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2)  # Updated to use 'weights' instead of 'pretrained'
# # model =torch.load('/content/drive/MyDrive/face_recognition_model.pth')
# num_ftrs = model.fc.in_features
# model.fc = nn.Linear(num_ftrs, len(all_identities))  # Output layer for number of identities


Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 104MB/s]


In [None]:
import torch
import torchvision.models as models
import torch.nn as nn

# Recreate model architecture
model = models.resnet50(weights=None)  # No pretrained weights
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, len(all_identities))  # Match output layer size

# Load weights
model.load_state_dict(torch.load('/content/drive/MyDrive/face_recognition_model.pth'))
model.eval()


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [None]:
# Freeze earlier layers to fine-tune only the final layer initially
for name, param in model.named_parameters():
    if 'fc' not in name:
        param.requires_grad = False

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)

In [None]:
# Training setup
num_epochs = 5
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)


In [None]:
device

device(type='cuda')

In [None]:
# Training loop
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / total
    epoch_acc = correct / total
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}')

Epoch 1/5, Loss: 0.0740, Accuracy: 0.9919
Epoch 2/5, Loss: 0.0555, Accuracy: 0.9947
Epoch 3/5, Loss: 0.0483, Accuracy: 0.9968
Epoch 4/5, Loss: 0.0455, Accuracy: 0.9970
Epoch 5/5, Loss: 0.0404, Accuracy: 0.9986


In [None]:
# Validation step
model.eval()
val_loss = 0.0
val_correct = 0
val_total = 0
with torch.no_grad():
    for inputs, labels in val_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        val_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        val_total += labels.size(0)
        val_correct += (predicted == labels).sum().item()

val_loss /= val_total
val_acc = val_correct / val_total
print(f'Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_acc:.4f}')

Validation Loss: 0.0686, Validation Accuracy: 0.9926


In [None]:
#  Save the trained model
save_path = '/content/drive/MyDrive/face_recognition_model.pth'
torch.save(model.state_dict(), save_path)

print(f"Model saved to Google Drive as '{save_path}'")

Model saved to Google Drive as '/content/drive/MyDrive/face_recognition_model.pth'


In [None]:
import torch
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))


True
Tesla T4


In [None]:
# Evaluation on test set
model.eval()
test_loss = 0.0
test_correct = 0
test_total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        test_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        test_total += labels.size(0)
        test_correct += (predicted == labels).sum().item()

test_loss /= test_total
test_acc = test_correct / test_total
print(f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.4f}')

Test Loss: 0.0832, Test Accuracy: 0.9926


In [None]:
# Function for inference on new images
def predict_identity(image_path, model, device, transform, identities):
    model.eval()
    detector = MTCNN()
    try:
        img = Image.open(image_path).convert('RGB')
        boxes, _ = detector.detect(img)
        if boxes is not None and len(boxes) > 0:
            box = boxes[0]  # Use first detected face
            left, top, right, bottom = map(int, (box[0], box[1], box[0] + box[2], box[1] + box[3]))
            face = img.crop((left, top, right, bottom))
            face = transform(face).unsqueeze(0).to(device)
            with torch.no_grad():
                outputs = model(face)
                _, predicted = torch.max(outputs, 1)
                confidence = torch.nn.functional.softmax(outputs, dim=1)[0][predicted].item()
                return identities[predicted.item()], confidence
        else:
            return None, None
    except Exception as e:
        print(f"Error during inference: {e}")
        return None, None

In [None]:
# Example
image_path = '/content/Screenshot 2025-06-26 125626.png'
identity, confidence = predict_identity(image_path, model, device, transform, all_identities)
if identity is not None:
    print(f'Predicted Identity: {identity}, Confidence: {confidence:.2f}')
else:
    print("No face detected or error occurred.")

Predicted Identity: n000070, Confidence: 1.00
