In [1]:
import dlib
import cv2
import numpy as np
import pandas as pd
import os
from pathlib import Path
from math import radians, degrees

In [2]:
face_detector = dlib.get_frontal_face_detector()

In [3]:
shape_predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")

In [11]:
VIDEO_DIR = "C:\\cv_project\\data\\Videos"
OUTPUT_DIR = "output_csv"
FEATURE_COLS = [
    "Pitch", "Yaw", "Roll", "inBrL", "otBrL", "inBrR", "otBrR",
    "EyeOL", "EyeOR", "oLipH", "iLipH", "LipCDt"
]

In [5]:
def shape_to_np(shape, dtype = 'double'):
    coords = np.zeros((68, 2), dtype=dtype)
    for i in range(68):
        coords[i] = (shape.part(i).x, shape.part(i).y)
    return coords

In [None]:
def preprocess_face(frame):
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    faces = face_detector(rgb, 1)

    if len(faces) == 0:
        return np.zeros((68, 2))

    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    shape = shape_predictor(gray, faces[0])
    coords = shape_to_np(shape)

    return coords



In [None]:

model_points = np.array([
    (0.0, 0.0, 0.0),
    (0.0, -70.0, -20.0),
    (-35.0, 50.0, -20.0),
    (35.0, 50.0, -20.0),
    (-35.0, -50.0, -20.0),
    (35.0, -50.0, -20.0),
])

def get_2d_points_from_landmarks(landmarks):
    """
    Extracts 2D image points from the landmarks numpy array.
    This assumes you have the landmarks as a numpy array of tuples (x, y).
    """
    return np.array([
        (landmarks[30][0], landmarks[30][1]),  # Nose tip
        (landmarks[8][0], landmarks[8][1]),    # Chin
        (landmarks[36][0], landmarks[36][1]),  # Left eye
        (landmarks[45][0], landmarks[45][1]),  # Right eye
        (landmarks[48][0], landmarks[48][1]),  # Left mouth corner
        (landmarks[54][0], landmarks[54][1])   # Right mouth corner
    ], dtype="double")

def get_pose_angles(rvec, tvec):
    """
    Convert rotation vector to pitch, yaw, and roll.
    """
    R, _ = cv2.Rodrigues(rvec)

    pitch = np.arctan2(R[2, 1], R[2, 2])  # pitch (rotation around X-axis)
    yaw = np.arctan2(-R[2, 0], np.sqrt(R[2, 1]**2 + R[2, 2]**2))  # yaw (rotation around Y-axis)
    roll = np.arctan2(R[1, 0], R[0, 0])  # roll (rotation around Z-axis)

    # Convert to degrees
    return degrees(pitch), degrees(yaw), degrees(roll)

def calculate_pose(landmarks, frame):
    """
    Given a set of landmarks and the frame, estimate the pitch, yaw, and roll of the face.
    """
    # 2D points from the landmarks array
    image_points = get_2d_points_from_landmarks(landmarks)

    # Camera matrix (assumed focal length and image center based on input size)
    size = frame.shape
    focal_length = size[1]  # Width of the image
    center = (size[1] / 2, size[0] / 2)  # Image center

    camera_matrix = np.array([
        [focal_length, 0, center[0]],
        [0, focal_length, center[1]],
        [0, 0, 1]
    ], dtype="double")

    dist_coeffs = np.zeros((4, 1))  # Assuming no lens distortion

    # Solve for rotation and translation vectors
    _, rvec, tvec = cv2.solvePnP(model_points, image_points, camera_matrix, dist_coeffs)

    # Get pitch, yaw, roll from rotation vector
    pitch, yaw, roll = get_pose_angles(rvec, tvec)

    return pitch, yaw, roll

In [8]:
def extract_features_from_frame(frame):

    #Extract landmarks for given frame
    landmarks = preprocess_face(frame)

    if landmarks is None:
        return np.zeros(len(FEATURE_COLS))

    # Compute Facial Features
    #-> Head Movement Features

    pitch, yaw, roll = calculate_pose(landmarks, frame)

    # -> Eyebrow Features
    inBrL = np.linalg.norm(landmarks[22] - landmarks[42])
    otBrL = np.linalg.norm(landmarks[26] - landmarks[45])
    inBrR = np.linalg.norm(landmarks[17] - landmarks[39])
    otBrR = np.linalg.norm(landmarks[21] - landmarks[36])
    EyeOL = np.linalg.norm(landmarks[36] - landmarks[39])
    EyeOR = np.linalg.norm(landmarks[42] - landmarks[45])

    # -> Lip features
    oLipH = np.abs(landmarks[51][1] - landmarks[57][1])
    iLipH = np.abs(landmarks[62][1] - landmarks[66][1])
    LipCDt = np.linalg.norm(landmarks[48] - landmarks[54])


    return np.array([
        pitch, yaw, roll, inBrL, otBrL, inBrR, otBrR, EyeOL, EyeOR, oLipH, iLipH, LipCDt
    ])

In [9]:
def process_video(video_path):
    print(f"Processing {video_path.name}...")
    cap = cv2.VideoCapture(str(video_path))
    features = []

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Resize or preprocess frame if needed
        frame_features = extract_features_from_frame(frame)
        features.append(frame_features)

    cap.release()

    if len(features) == 0:
        print(f"Warning: No frames extracted for {video_path.name}")
        return

    features = np.array(features)
    # features_norm = normalize_features(features)

    df = pd.DataFrame(features, columns=FEATURE_COLS)
    output_path = Path(OUTPUT_DIR) / f"{video_path.stem}_raw.csv"
    df.to_csv(output_path, index=False)
    print(f"Saved CSV: {output_path.name}")

def process_all_videos():
    video_files = list(Path(VIDEO_DIR).glob("*.avi"))
    for video_path in video_files:
        process_video(video_path)

In [None]:
process_all_videos()