# Installation

In [14]:
#!pip install opencv-python numpy pandas facenet-pytorch torch torchvision onnxruntime

# Neural Net

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F


In [2]:
class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding):
        super().__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=False)
        self.bn = nn.BatchNorm2d(out_channels)
        self.prelu = nn.PReLU(out_channels)

    def forward(self, x):
        return self.prelu(self.bn(self.conv(x)))


In [3]:
class DepthWise(nn.Module):
    def __init__(self, in_channels, out_channels, stride):
        super().__init__()
        self.dw = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=stride, padding=1, groups=in_channels, bias=False)
        self.bn1 = nn.BatchNorm2d(in_channels)
        self.prelu = nn.PReLU(in_channels)
        self.pw = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

    def forward(self, x):
        x = self.prelu(self.bn1(self.dw(x)))
        x = self.bn2(self.pw(x))
        return x


In [4]:
class Bottleneck(nn.Module):
    def __init__(self, in_channels, out_channels, stride):
        super().__init__()
        self.conv1 = ConvBlock(in_channels, out_channels, kernel_size=1, stride=1, padding=0)
        self.dw = DepthWise(out_channels, out_channels, stride)
        self.shortcut = nn.Sequential()
        self.use_shortcut = (stride == 1 and in_channels == out_channels)

    def forward(self, x):
        out = self.dw(self.conv1(x))
        if self.use_shortcut:
            out = out + x
        return out


In [10]:
class MobileFaceNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = ConvBlock(3, 64, kernel_size=3, stride=2, padding=1)
        self.layer2 = DepthWise(64, 64, stride=1)
        self.layer3 = self._make_layer(64, 64, stride=2, num_blocks=4)
        self.layer4 = self._make_layer(64, 128, stride=2, num_blocks=6)
        self.layer5 = self._make_layer(128, 128, stride=2, num_blocks=2)
        self.conv6 = ConvBlock(128, 512, kernel_size=1, stride=1, padding=0)
        self.dw7 = nn.Conv2d(512, 512, kernel_size=7, groups=512, bias=False)  # global depthwise
        self.bn7 = nn.BatchNorm2d(512)
        self.flatten = nn.Flatten()
        self.linear = nn.Linear(512, 128)
        self.bn8 = nn.BatchNorm1d(128)

    def _make_layer(self, in_channels, out_channels, stride, num_blocks):
        layers = [Bottleneck(in_channels, out_channels, stride)]
        for _ in range(1, num_blocks):
            layers.append(Bottleneck(out_channels, out_channels, stride=1))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        x = self.conv6(x)
        x = self.dw7(x)
        x = self.bn7(x)
        x = self.flatten(x)
        x = self.linear(x)
        x = self.bn8(x)
        return F.normalize(x, p=2, dim=1)  # L2 normalize


In [11]:
model = MobileFaceNet()

model.eval()
dummy_input = torch.randn(1, 3, 112, 112)  # 1 RGB face image
embedding = model(dummy_input)
print("Output embedding shape:", embedding.shape)


Output embedding shape: torch.Size([1, 128])


# Data Collection

In [12]:
import cv2
import numpy as np
import pandas as pd
from facenet_pytorch import MTCNN
import os
import torch
from datetime import datetime
import torchvision.transforms as transforms


mtcnn = MTCNN(keep_all=False)  # Only 1 face at a time

CSV_FILE = 'face_dataset1.csv'

# Create CSV if not exists
if not os.path.exists(CSV_FILE):
    df = pd.DataFrame(columns=["name"] + [f"emb_{i}" for i in range(128)])
    df.to_csv(CSV_FILE, index=False)


In [13]:
name = input("Enter the person's name: ")
cap = cv2.VideoCapture(0)

count = 0
MAX_SAMPLES = 10

while count < MAX_SAMPLES:
    ret, frame = cap.read()
    if not ret:
        break

    img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    boxes, probs = mtcnn.detect(img)

    if boxes is not None:
        box = boxes[0]  # Only first face
        x1, y1, x2, y2 = [int(v) for v in box]
        face = img[y1:y2, x1:x2]
        if face.size == 0:
            continue
        face = cv2.resize(face, (112, 112))

        cv2.rectangle(frame, (x1, y1), (x2, y2), (0,255,0), 2)
        cv2.imshow("Face", cv2.cvtColor(face, cv2.COLOR_RGB2BGR))

    cv2.imshow("Webcam", frame)
    key = cv2.waitKey(1)

    if key & 0xFF == ord('c') and boxes is not None:
        face_tensor = transforms.ToTensor()(face).unsqueeze(0) 
        face_tensor = face_tensor.float()  
        with torch.no_grad():
            embedding = model(face_tensor).squeeze().numpy()


        
        row = [name] + embedding.tolist()
        pd.DataFrame([row]).to_csv(CSV_FILE, mode='a', index=False, header=False)
        count += 1
        print(f"[{count}] Sample captured for {name}")

    elif key & 0xFF == ord('q'):
        print("Exit requested.")
        break

cap.release()
cv2.destroyAllWindows()


Enter the person's name:  ag


[1] Sample captured for ag
[2] Sample captured for ag
[3] Sample captured for ag
[4] Sample captured for ag
[5] Sample captured for ag
[6] Sample captured for ag
[7] Sample captured for ag
[8] Sample captured for ag
[9] Sample captured for ag
[10] Sample captured for ag
