**Date: 20-10-2025**

In [29]:
import os
import cv2
import torch
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim

## Dataset Class

In [65]:
class FacialLandmarkDataset(Dataset):
    def __init__(self, image_dir, annotation_dir, transform=None):
        self.image_dir = image_dir
        self.annotation_dir = annotation_dir
        self.image_files = sorted(os.listdir(image_dir))
        self.transform = transform

    def __len__(self):
        return len(self.image_files)

    def read_pts(self, pts_path):
        points = []
        with open(pts_path, 'r') as f:
            for line in f:
                line = line.strip()
                # Skip metadata or braces
                if line.startswith("version") or line.startswith("n_points") or line.startswith("{") or line.startswith("}"):
                    continue
                try:
                    x, y = map(float, line.split())
                    points.append([x, y])
                except ValueError:
                    continue  # Skip lines that can't be parsed
        return np.array(points).flatten()

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.image_files[idx])
        base_name = os.path.splitext(self.image_files[idx])[0]
        #pts_path = os.path.join(self.annotation_dir, self.image_files[idx].replace('.jpg', '.pts'))
        pts_path = os.path.join(self.annotation_dir,base_name+'.pts')

        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        image = cv2.resize(image, (224, 224))
        image = image / 255.0
        image = torch.tensor(image, dtype=torch.float32).unsqueeze(0)

        landmarks = self.read_pts(pts_path)
        landmarks = torch.tensor(landmarks, dtype=torch.float32)

        return image, landmarks

## Model Architecture

In [66]:
class LandmarkModel(nn.Module):
    def __init__(self, num_landmarks):
        super(LandmarkModel, self).__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 32, 3), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3), nn.ReLU(), nn.MaxPool2d(2)
        )
        with torch.no_grad():
            dummy_input = torch.zeros(1, 1, 224, 224)
            dummy_output = self.cnn(dummy_input)
            self.flattened_size = dummy_output.view(1, -1).size(1)

        self.fc = nn.Sequential(
            nn.Linear(self.flattened_size, 512),
            nn.ReLU(),
            nn.Linear(512, num_landmarks * 2)
        )

    def forward(self, x):
        x = self.cnn(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)

## Training Loop

In [49]:
def train_model(model, dataloader, epochs=10):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(epochs):
        running_loss = 0.0
        for images, landmarks in dataloader:
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, landmarks)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        print(f"Epoch {epoch+1}, Loss: {running_loss / len(dataloader):.4f}")

## Testing with WebCam

In [68]:
def test_webcam(model):
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
    cap = cv2.VideoCapture(0)

    while True:
        ret, frame = cap.read()
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray, 1.3, 5)

        for (x, y, w, h) in faces:
            roi = gray[y:y+h, x:x+w]
            roi = cv2.resize(roi, (224, 224))
            roi = roi.astype(np.float32) / 255.0
            roi = (roi - 0.5) / 0.5  # Optional: if you normalized during training
            roi = torch.tensor(roi).unsqueeze(0).unsqueeze(0)

            with torch.no_grad():
                output = model(roi).detach().cpu().numpy().reshape(-1, 2)
                output *= [w / 224, h / 224]  # scale to face size
                output += [x, y]              # shift to original frame

            for (lx, ly) in output:
                cv2.circle(frame, (int(lx), int(ly)), 2, (0, 255, 0), -1)

        cv2.imshow('Facial Landmark Detection', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

## Evaluation

In [69]:
def evaluate_model(model, dataloader):
    criterion = nn.MSELoss()
    total_loss = 0.0
    with torch.no_grad():
        for images, landmarks in dataloader:
            outputs = model(images)
            loss = criterion(outputs, landmarks)
            total_loss += loss.item()

    avg_loss = total_loss / len(dataloader)
    print(f"Evaluation Loss (MSE): {avg_loss:.4f}")

## Run

In [70]:
# Load dataset
dataset = FacialLandmarkDataset('dataset/images/', 'dataset/annotations')
dataloader = DataLoader(dataset, batch_size=8, shuffle=True)
image, landmarks = dataset[0]
num_landmarks = landmarks.shape[0] // 2
print(f"No of landmarks: {num_landmarks}")
# Initialize and train
model = LandmarkModel(num_landmarks)
train_model(model, dataloader, epochs=20)

# Evaluate
evaluate_model(model, dataloader)

# Test on webcam
#test_webcam(model)

No of landmarks: 68


KeyboardInterrupt: 

## Save

In [54]:
# Save model after training
torch.save(model.state_dict(), 'landmark_model.pth')
print("Model saved to landmark_model.pth")

Model saved to landmark_model.pth


## Load and Test

In [62]:
import torch
import cv2
import numpy as np

# Recreate the model class
class LandmarkModel(nn.Module):
    def __init__(self):
        super(LandmarkModel, self).__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 32, 3), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3), nn.ReLU(), nn.MaxPool2d(2)
        )
        with torch.no_grad():
            dummy_input = torch.zeros(1, 1, 224, 224)
            dummy_output = self.cnn(dummy_input)
            self.flattened_size = dummy_output.view(1, -1).size(1)

        self.fc = nn.Sequential(
            nn.Linear(self.flattened_size, 512),
            nn.ReLU(),
            nn.Linear(512, 136)
        )

    def forward(self, x):
        x = self.cnn(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)

# Load model
model = LandmarkModel()
model.load_state_dict(torch.load('landmark_model.pth'))
model.eval()
print("Model loaded and ready for webcam testing")

# Start webcam detection
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, 1.3, 5)

    for (x, y, w, h) in faces:
        roi = gray[y:y+h, x:x+w]
        roi = cv2.resize(roi, (224, 224))
        roi = roi / 255.0
        roi = torch.tensor(roi, dtype=torch.float32).unsqueeze(0).unsqueeze(0)

        with torch.no_grad():
            output = model(roi).numpy().reshape(-1, 2)
            output *= [w / 224, h / 224]
            output += [x, y]

        for (lx, ly) in output:
            cv2.circle(frame, (int(lx), int(ly)), 2, (0, 255, 0), -1)

    cv2.imshow('Facial Landmark Detection', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

Model loaded and ready for webcam testing


In [71]:
# Load OpenCV DNN face detector
face_net = cv2.dnn.readNetFromCaffe(
    'deploy.prototxt',  # Download from OpenCV GitHub
    'res10_300x300_ssd_iter_140000.caffemodel'  # Download from OpenCV GitHub
)

In [72]:
model = LandmarkModel(num_landmarks)  # Use correct number from earlier
model.load_state_dict(torch.load('landmark_model.pth'))
model.eval()

LandmarkModel(
  (cnn): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
    (7): ReLU()
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Sequential(
    (0): Linear(in_features=86528, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=136, bias=True)
  )
)

In [None]:
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    h, w = frame.shape[:2]

    # Step 1: Detect face using OpenCV DNN
    blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300), [104, 117, 123], False, False)
    face_net.setInput(blob)
    detections = face_net.forward()

    for i in range(detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > 0.6:
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            x1, y1, x2, y2 = box.astype("int")

            # âœ… Step 2: Crop and preprocess face
            face = frame[y1:y2, x1:x2]
            gray = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)
            resized = cv2.resize(gray, (224, 224))
            normalized = resized.astype(np.float32) / 255.0
            tensor = torch.tensor(normalized).unsqueeze(0).unsqueeze(0)

            # Step 3: Predict landmarks
            with torch.no_grad():
                preds = model(tensor).cpu().numpy().reshape(-1, 2)
                preds *= [(x2 - x1) / 224, (y2 - y1) / 224]
                preds += [x1, y1]

            # Step 4: Draw landmarks
            for (lx, ly) in preds:
                cv2.circle(frame, (int(lx), int(ly)), 2, (0, 255, 0), -1)

    cv2.imshow("Facial Landmark Detection", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()