# Installation

In [None]:
#!pip install opencv-python numpy pandas facenet-pytorch torch torchvision onnxruntime

# Neural Net

In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F


In [9]:
class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding):
        super().__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=False)
        self.bn = nn.BatchNorm2d(out_channels)
        self.prelu = nn.PReLU(out_channels)

    def forward(self, x):
        return self.prelu(self.bn(self.conv(x)))

In [10]:
class DepthWise(nn.Module):
    def __init__(self, in_channels, out_channels, stride):
        super().__init__()
        self.dw = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=stride, padding=1, groups=in_channels, bias=False)
        self.bn1 = nn.BatchNorm2d(in_channels)
        self.prelu = nn.PReLU(in_channels)
        self.pw = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

    def forward(self, x):
        x = self.prelu(self.bn1(self.dw(x)))
        x = self.bn2(self.pw(x))
        return x

In [11]:
class Bottleneck(nn.Module):
    def __init__(self, in_channels, out_channels, stride):
        super().__init__()
        self.conv1 = ConvBlock(in_channels, out_channels, kernel_size=1, stride=1, padding=0)
        self.dw = DepthWise(out_channels, out_channels, stride)
        self.shortcut = nn.Sequential()
        self.use_shortcut = (stride == 1 and in_channels == out_channels)

    def forward(self, x):
        out = self.dw(self.conv1(x))
        if self.use_shortcut:
            out = out + x
        return out

In [12]:
class MobileFaceNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = ConvBlock(3, 64, kernel_size=3, stride=2, padding=1)
        self.layer2 = DepthWise(64, 64, stride=1)
        self.layer3 = self._make_layer(64, 64, stride=2, num_blocks=4)
        self.layer4 = self._make_layer(64, 128, stride=2, num_blocks=6)
        self.layer5 = self._make_layer(128, 128, stride=2, num_blocks=2)
        self.conv6 = ConvBlock(128, 512, kernel_size=1, stride=1, padding=0)
        self.dw7 = nn.Conv2d(512, 512, kernel_size=7, groups=512, bias=False)  # global depthwise
        self.bn7 = nn.BatchNorm2d(512)
        self.flatten = nn.Flatten()
        self.linear = nn.Linear(512, 128)
        self.bn8 = nn.BatchNorm1d(128)

    def _make_layer(self, in_channels, out_channels, stride, num_blocks):
        layers = [Bottleneck(in_channels, out_channels, stride)]
        for _ in range(1, num_blocks):
            layers.append(Bottleneck(out_channels, out_channels, stride=1))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        x = self.conv6(x)
        x = self.dw7(x)
        x = self.bn7(x)
        x = self.flatten(x)
        x = self.linear(x)
        x = self.bn8(x)
        return F.normalize(x, p=2, dim=1)  # L2 normalize


In [13]:
model = MobileFaceNet()

model.eval()
dummy_input = torch.randn(1, 3, 112, 112)  # 1 RGB face image
embedding = model(dummy_input)
print("Output embedding shape:", embedding.shape)


Output embedding shape: torch.Size([1, 128])


# Data Collection

### mode 1: data collection

In [14]:
import cv2
import torch
import numpy as np
import pandas as pd
import os
import threading
from facenet_pytorch import MTCNN
from torchvision import transforms
from tkinter import Tk, filedialog
from PIL import Image

# Setup
mtcnn = MTCNN(keep_all=True)
model = MobileFaceNet()
model.eval()

CSV_FILE = "face_dataset.csv"
to_tensor = transforms.ToTensor()

# Input: name
name = input("Enter the person's name: ")

# Input: mode
mode = input("Choose input method:\n1 - Use webcam\n2 - Upload image\nYour choice: ")

if mode.strip() == "1":
    cap = cv2.VideoCapture(0)
    print("Press 'c' to capture, 'q' to quit.")
    count = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        boxes, _ = mtcnn.detect(img)

        if boxes is not None:
            for box in boxes[:1]:  # only first face
                x1, y1, x2, y2 = [int(v) for v in box]
                face = img[y1:y2, x1:x2]
                if face.size == 0:
                    continue
                face = cv2.resize(face, (112, 112))
                face_bgr = cv2.cvtColor(face, cv2.COLOR_RGB2BGR)
                cv2.imshow("Face", face_bgr)

        cv2.imshow("Webcam", frame)
        key = cv2.waitKey(1) & 0xFF

        if key == ord('c') and boxes is not None:
            face_tensor = to_tensor(face).unsqueeze(0).float()
            with torch.no_grad():
                embedding = model(face_tensor).squeeze().numpy()
            row = [name] + embedding.tolist()
            pd.DataFrame([row]).to_csv(CSV_FILE, mode='a', index=False, header=False)
            print(f"[{count + 1}] Saved embedding for {name}")
            count += 1

        elif key == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

# ==== MODE 2: UPLOAD ====
elif mode.strip() == "2":
    file_path = input("Paste the full path of the image file (jpg/png): ").strip()

    if file_path and os.path.exists(file_path):
        try:
            img = Image.open(file_path).convert("RGB")
            img_np = np.array(img)

            boxes, _ = mtcnn.detect(img_np)

            if boxes is not None:
                x1, y1, x2, y2 = [int(v) for v in boxes[0]]
                face = img_np[y1:y2, x1:x2]
                if face.size != 0:
                    face = cv2.resize(face, (112, 112))
                    face_tensor = to_tensor(face).unsqueeze(0).float()
                    with torch.no_grad():
                        embedding = model(face_tensor).squeeze().numpy()
                    row = [name] + embedding.tolist()
                    pd.DataFrame([row]).to_csv(CSV_FILE, mode='a', index=False, header=False)
                    print(f"✅ Saved embedding for {name} from uploaded image.")
                else:
                    print("❌ Face crop is empty.")
            else:
                print("❌ No face detected in image.")
        except Exception as e:
            print("❌ Error processing image:", e)
    else:
        print("❌ File not found. Please check the path.")


Enter the person's name:  ankit
Choose input method:
1 - Use webcam
2 - Upload image
Your choice:  1


Press 'c' to capture, 'q' to quit.
[1] Saved embedding for ankit
[2] Saved embedding for ankit
[3] Saved embedding for ankit
[4] Saved embedding for ankit
[5] Saved embedding for ankit
[6] Saved embedding for ankit
[7] Saved embedding for ankit
[8] Saved embedding for ankit
[9] Saved embedding for ankit
[10] Saved embedding for ankit
[11] Saved embedding for ankit
[12] Saved embedding for ankit
[13] Saved embedding for ankit
[14] Saved embedding for ankit
[15] Saved embedding for ankit
[16] Saved embedding for ankit
[17] Saved embedding for ankit
[18] Saved embedding for ankit


# Save model

In [15]:
import os
print(os.listdir())  # Shows files in current working directory


['.git', 'dataCollection.ipynb', 'mobileFaceNet.ipynb', '.ipynb_checkpoints', 'concatCsv.ipynb', 'mobilefacenet.pth', 'face_dataset.csv']


In [16]:
torch.save(model.state_dict(), "mobilefacenet.pth")


In [17]:
import os
print("Saved models:", [f for f in os.listdir() if f.endswith('.pth')])


Saved models: ['mobilefacenet.pth']


In [27]:
import cv2
import numpy as np
import pandas as pd
import torch
from facenet_pytorch import MTCNN
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from torchvision import transforms
from PIL import Image
import torch.nn as nn
import torch.nn.functional as F

# ====== MobileFaceNet Model (Inline) ======
class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding):
        super().__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=False)
        self.bn = nn.BatchNorm2d(out_channels)
        self.prelu = nn.PReLU(out_channels)

    def forward(self, x):
        return self.prelu(self.bn(self.conv(x)))

class DepthWise(nn.Module):
    def __init__(self, in_channels, out_channels, stride):
        super().__init__()
        self.dw = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=stride, padding=1, groups=in_channels, bias=False)
        self.bn1 = nn.BatchNorm2d(in_channels)
        self.prelu = nn.PReLU(in_channels)
        self.pw = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

    def forward(self, x):
        x = self.prelu(self.bn1(self.dw(x)))
        x = self.bn2(self.pw(x))
        return x

class Bottleneck(nn.Module):
    def __init__(self, in_channels, out_channels, stride):
        super().__init__()
        self.conv1 = ConvBlock(in_channels, out_channels, kernel_size=1, stride=1, padding=0)
        self.dw = DepthWise(out_channels, out_channels, stride)
        self.use_shortcut = (stride == 1 and in_channels == out_channels)

    def forward(self, x):
        out = self.dw(self.conv1(x))
        if self.use_shortcut:
            out = out + x
        return out

class MobileFaceNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = ConvBlock(3, 64, kernel_size=3, stride=2, padding=1)
        self.layer2 = DepthWise(64, 64, stride=1)
        self.layer3 = self._make_layer(64, 64, stride=2, num_blocks=4)
        self.layer4 = self._make_layer(64, 128, stride=2, num_blocks=6)
        self.layer5 = self._make_layer(128, 128, stride=2, num_blocks=2)
        self.conv6 = ConvBlock(128, 512, kernel_size=1, stride=1, padding=0)
        self.dw7 = nn.Conv2d(512, 512, kernel_size=7, groups=512, bias=False)
        self.bn7 = nn.BatchNorm2d(512)
        self.flatten = nn.Flatten()
        self.linear = nn.Linear(512, 128)
        self.bn8 = nn.BatchNorm1d(128)

    def _make_layer(self, in_channels, out_channels, stride, num_blocks):
        layers = [Bottleneck(in_channels, out_channels, stride)]
        for _ in range(1, num_blocks):
            layers.append(Bottleneck(out_channels, out_channels, stride=1))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        x = self.conv6(x)
        x = self.dw7(x)
        x = self.bn7(x)
        x = self.flatten(x)
        x = self.linear(x)
        x = self.bn8(x)
        return F.normalize(x, p=2, dim=1)

# ====== Load face_dataset.csv ======
df = pd.read_csv("vertically_combined_face_dataset.csv")
names = df.iloc[:, 0].values
embeddings = df.iloc[:, 1:].values.astype('float32')

# ====== Encode labels ======
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(names)

# ====== Train SVM classifier ======
clf = SVC(kernel='linear', probability=True)
clf.fit(embeddings, labels)

# ====== Setup MTCNN & MobileFaceNet ======
model = MobileFaceNet()
model.eval()
to_tensor = transforms.ToTensor()
mtcnn = MTCNN(keep_all=False)

# ====== Start Webcam for Recognition ======
cap = cv2.VideoCapture(0)
print("Press 'q' to quit")

while True:
    ret, frame = cap.read()
    if not ret:
        break

    img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    boxes, _ = mtcnn.detect(img)

    if boxes is not None:
        x1, y1, x2, y2 = [int(v) for v in boxes[0]]
        face = img[y1:y2, x1:x2]
        if face.size != 0:
            face = cv2.resize(face, (112, 112))
            face_tensor = to_tensor(face).unsqueeze(0).float()

            with torch.no_grad():
                emb = model(face_tensor).numpy()

            # Predict using SVM
            pred = clf.predict(emb)[0]
            proba = clf.predict_proba(emb)[0].max()
            name = label_encoder.inverse_transform([pred])[0]

            # Draw bounding box and label
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, f"{name} ({proba:.2f})", (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 255, 0), 2)

    cv2.imshow("Face Recognition (SVM)", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


Press 'q' to quit
