In [1]:
!mkdir faces_positive

In [2]:
import kagglehub
import os
import random
import time
import torch
import torch.nn as nn
import torch.optim as optim

from google.colab import files
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import ToTensor, RandomCrop, Normalize, Resize, Compose
from tqdm import trange

# --- Setup ---
torch.manual_seed(15)
random.seed(15)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

path = kagglehub.dataset_download("ashwingupta3012/human-faces")
print("Path to dataset files:", path)

Using device: cuda
Using Colab cache for faster access to the 'human-faces' dataset.
Path to dataset files: /kaggle/input/human-faces


In [3]:
# Find mean and stddev of dataset
NEGATIVES = os.path.join(path, 'Humans')
POSITIVES = 'faces_positive'

pixel_r = pixel_r2 = pixel_g = pixel_g2 = pixel_b = pixel_b2 = N = 0
tt_obj = ToTensor()

NEG_IMAGES = [os.path.join(NEGATIVES, img) for img in os.listdir(NEGATIVES)]
remove_idxs = []
for i in trange(len(NEG_IMAGES)):
  img = NEG_IMAGES[i]

  try:
    image = Image.open(img).convert('RGB')
  except:
    remove_idxs.append(i)
    continue

  image_tensor = tt_obj(image)
  if image_tensor.shape[0] != 3:
    remove_idxs.append(i)
    continue

  N += image_tensor.shape[1] * image_tensor.shape[2]
  pixel_r += torch.sum(image_tensor[0])
  pixel_r2 += torch.sum(image_tensor[0] ** 2)
  pixel_g += torch.sum(image_tensor[1])
  pixel_g2 += torch.sum(image_tensor[1] ** 2)
  pixel_b += torch.sum(image_tensor[2])
  pixel_b2 += torch.sum(image_tensor[2] ** 2)

POS_IMAGES = [os.path.join(POSITIVES, img) for img in os.listdir(POSITIVES)]
for i in trange(len(POS_IMAGES)):
  img = POS_IMAGES[i]
  image = Image.open(img).convert('RGB')
  image_tensor = tt_obj(image)

  N += image_tensor.shape[1] * image_tensor.shape[2]
  pixel_r += torch.sum(image_tensor[0])
  pixel_r2 += torch.sum(image_tensor[0] ** 2)
  pixel_g += torch.sum(image_tensor[1])
  pixel_g2 += torch.sum(image_tensor[1] ** 2)
  pixel_b += torch.sum(image_tensor[2])
  pixel_b2 += torch.sum(image_tensor[2] ** 2)

mean = (float(pixel_r / N), float(pixel_g / N), float(pixel_b / N))
stddev = (float(pixel_r2 / N - mean[0] ** 2) ** 0.5, float(pixel_g2 / N - mean[1] ** 2) ** 0.5, float(pixel_b2 / N - mean[2] ** 2) ** 0.5)
print(f'Mean: {mean}\nStd. Dev: {stddev}')

100%|██████████| 7219/7219 [04:26<00:00, 27.08it/s]
100%|██████████| 435/435 [00:01<00:00, 330.86it/s]

Mean: (0.5227308869361877, 0.46569907665252686, 0.4313724637031555)
Std. Dev: (0.3161585273687047, 0.3008185772213572, 0.2996168262495128)





In [4]:
# Clean up negatives dataset
print(f'Removing {len(remove_idxs)} images from the negatives dataset (incompatible number of channels)')
remove_idxs.reverse()
for idx in remove_idxs:
    NEG_IMAGES.pop(idx)

Removing 0 images from the negatives dataset (incompatible number of channels)


In [5]:
# Data transforms
IMAGE_SIZE = 224
TRAIN_TEST_SPLIT = 0.8

transform_train = Compose([
    Resize((IMAGE_SIZE, IMAGE_SIZE)),
    RandomCrop(IMAGE_SIZE, padding=4),
    ToTensor(),
    Normalize(mean, stddev),
])

transform_test = Compose([
    Resize((IMAGE_SIZE, IMAGE_SIZE)),
    ToTensor(),
    Normalize(mean, stddev),
])

class FaceDataset(Dataset):
    def __init__(self, positives, negatives, transform):
        self.images = [(p, 1) for p in positives] + [(n, 0) for n in negatives]
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        path, label = self.images[idx]
        image = Image.open(path).convert('RGB')

        return self.transform(image), label


random.shuffle(NEG_IMAGES)
random.shuffle(POS_IMAGES)
train_neg, test_neg = NEG_IMAGES[:int(len(NEG_IMAGES)*TRAIN_TEST_SPLIT)], NEG_IMAGES[int(len(NEG_IMAGES)*TRAIN_TEST_SPLIT):]
train_pos, test_pos = POS_IMAGES[:int(len(POS_IMAGES)*TRAIN_TEST_SPLIT)], POS_IMAGES[int(len(POS_IMAGES)*TRAIN_TEST_SPLIT):]
train_dataset = FaceDataset(train_pos, train_neg, transform_train)
test_dataset = FaceDataset(test_pos, test_neg, transform_test)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [24]:
# Helpers
def train_model(model, train_loader, epochs=10, learning_rate=1e-3):
    print(f"--- Starting Training for {model.__class__.__name__} ---")

    model.to(device)
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    for epoch in trange(epochs):
        start_time = time.time()
        running_loss = 0.0
        model.train()
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels.float().view(-1, 1))
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        epoch_loss = running_loss / len(train_loader)
        epoch_time = time.time() - start_time
        print(f"Epoch {epoch+1}/{epochs}, Training Loss: {epoch_loss:.4f}, Time: {epoch_time:.2f}s")

    print("--- Finished Training ---\n")
    return model

def test_model(model, test_loader):
    print(f"--- Starting Testing for {model.__class__.__name__} ---")

    model.to(device)
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            print(images.shape)
            outputs = model(images)
            predicted = (outputs.data > 0).int()
            print(predicted[0], labels.view(-1, 1)[0])
            total += labels.size(0)
            correct += (predicted == labels.view(-1, 1)).sum().item()

    accuracy = 100 * correct / total
    print(f"Test accuracy of model: {accuracy:.1f}%")
    return accuracy

In [7]:
# Model Definition
class FaceRecognition(nn.Module):
    def __init__(self):
        super().__init__()

        self.features = nn.Sequential(
            # Block 1
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2), # 224->112
            # Block 2
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2), # 112->56
            # Block 3
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2), # 56->28
            # Block 4
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2), # 28->14
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * 14 * 14, 1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 1)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

In [None]:
fr_model = FaceRecognition()
trained_cnn = train_model(fr_model, train_loader, epochs=5) # Could use just 3-4 epochs...

--- Starting Training for FaceRecognition ---


 20%|██        | 1/5 [02:35<10:23, 155.99s/it]

Epoch 1/5, Training Loss: 0.4400, Time: 155.99s


 40%|████      | 2/5 [05:12<07:48, 156.16s/it]

Epoch 2/5, Training Loss: 0.0014, Time: 156.28s


 60%|██████    | 3/5 [07:48<05:11, 155.97s/it]

Epoch 3/5, Training Loss: 0.0000, Time: 155.76s


 80%|████████  | 4/5 [10:23<02:35, 155.89s/it]

Epoch 4/5, Training Loss: 0.0000, Time: 155.77s


100%|██████████| 5/5 [12:59<00:00, 155.86s/it]

Epoch 5/5, Training Loss: 0.0000, Time: 155.50s
--- Finished Training ---


In [25]:
cnn_accuracy = test_model(trained_cnn, test_loader)

--- Starting Testing for FaceRecognition ---
torch.Size([64, 3, 224, 224])
tensor([1], device='cuda:0', dtype=torch.int32) tensor([1], device='cuda:0')
torch.Size([64, 3, 224, 224])
tensor([1], device='cuda:0', dtype=torch.int32) tensor([1], device='cuda:0')
torch.Size([64, 3, 224, 224])
tensor([0], device='cuda:0', dtype=torch.int32) tensor([0], device='cuda:0')
torch.Size([64, 3, 224, 224])
tensor([0], device='cuda:0', dtype=torch.int32) tensor([0], device='cuda:0')
torch.Size([64, 3, 224, 224])
tensor([0], device='cuda:0', dtype=torch.int32) tensor([0], device='cuda:0')
torch.Size([64, 3, 224, 224])
tensor([0], device='cuda:0', dtype=torch.int32) tensor([0], device='cuda:0')
torch.Size([64, 3, 224, 224])
tensor([0], device='cuda:0', dtype=torch.int32) tensor([0], device='cuda:0')




torch.Size([64, 3, 224, 224])
tensor([0], device='cuda:0', dtype=torch.int32) tensor([0], device='cuda:0')
torch.Size([64, 3, 224, 224])
tensor([0], device='cuda:0', dtype=torch.int32) tensor([0], device='cuda:0')
torch.Size([64, 3, 224, 224])
tensor([0], device='cuda:0', dtype=torch.int32) tensor([0], device='cuda:0')
torch.Size([64, 3, 224, 224])
tensor([0], device='cuda:0', dtype=torch.int32) tensor([0], device='cuda:0')
torch.Size([64, 3, 224, 224])
tensor([0], device='cuda:0', dtype=torch.int32) tensor([0], device='cuda:0')
torch.Size([64, 3, 224, 224])
tensor([0], device='cuda:0', dtype=torch.int32) tensor([0], device='cuda:0')
torch.Size([64, 3, 224, 224])
tensor([0], device='cuda:0', dtype=torch.int32) tensor([0], device='cuda:0')
torch.Size([64, 3, 224, 224])
tensor([0], device='cuda:0', dtype=torch.int32) tensor([0], device='cuda:0')
torch.Size([64, 3, 224, 224])
tensor([0], device='cuda:0', dtype=torch.int32) tensor([0], device='cuda:0')
torch.Size([64, 3, 224, 224])
tensor(

In [26]:
MODEL_PATH = 'face_recognition_model.pth'
torch.save(trained_cnn.state_dict(), MODEL_PATH)
files.download(MODEL_PATH)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>