In [1]:
DATASET_PATH = "data/lipread_mp4/"
BASE_PATH = "./"

In [2]:
import os

In [3]:
LANDMARKS_PATH = "landmarks/shape_predictor_68_face_landmarks_GTX.dat"

In [4]:
# Import Dependencies / Libraries
import os
import glob
import numpy as np
import matplotlib.pyplot as plt

# Video and Image Processing
import cv2

# Face Detection and Landmark Generation
import dlib

from imutils import face_utils
import imutils

In [5]:
def load_video(filename: str, path: str = DATASET_PATH):
  """Loads a video from given path with filename

  Args:
    filename (str): name of the video file
    path (str): location of the video

  Returns:
    A generator object of the frames of the video.
  """
  cap = cv2.VideoCapture(os.path.join(filename))

  while(cap.isOpened()):
    ret, frame = cap.read()
    if not ret:
        break
    yield frame
  cap.release()

# def load_video(filename: str):
#     """Loads a video from the given path with the filename.

#     Args:
#         filename (str): Name of the video file.

#     Returns:
#         A list of the frames of the video.
#     """
#     cap = cv2.VideoCapture(os.path.join(filename))  # Use the full file path

#     frames = []
#     while cap.isOpened():
#         ret, frame = cap.read()
#         if not ret:
#             break
#         frames.append(frame)
#     cap.release()

#     return frames

In [6]:
def get_mp4(dir=DATASET_PATH):
    if os.path.exists(dir) and os.path.isdir(dir):
        subdirectories = [os.path.join(dir, d) for d in os.listdir(dir)[:20]]
        mp4_files = []
        for subdirectory in subdirectories:
            mp4_files.extend(glob.glob(os.path.join(subdirectory, "train/*.mp4"), recursive=True))
        return mp4_files

In [7]:
len(get_mp4())

19646

In [8]:
# Path of landmarks model
LANDMARKS_PATH = os.path.join(BASE_PATH + "/landmarks/shape_predictor_68_face_landmarks_GTX.dat")

# Create a face detector
face_detector = dlib.get_frontal_face_detector()

# Create a landmark detector
landmark_detector = dlib.shape_predictor(LANDMARKS_PATH)

def get_landmarks(frame) -> np.ndarray:
    """Takes a frame and generates landmarks for the first face
    
    Args:
    frame: video frame or image required to generate landmarks
    
    Returns:
    A numpy array containing the co-ordinates of the landmarks of the first face in the given frame
    """
    
    faces = face_detector(frame)
    if faces:
      landmarks = landmark_detector(frame, faces[0])
      landmarks = face_utils.shape_to_np(landmarks)
      return landmarks

In [9]:
def generate_video_landmarks(frames: list) -> np.ndarray:
  """Generate landmarks the given video

  Args:
    filename (str): filename specifying the video

  Returns:
    A numpy.ndarray containing all the landmarks for the faces in the video"""
  landmarks = []

  for frame in frames:
    landmarks.append(get_landmarks(frame))
  return landmarks

In [10]:
def linear_interpolation(start_idx: int, end_idx: int, landmarks) -> list:
    """Defines a linear interpolation function to interpolate missing landmarks between indices

    Args:
        start_idx (int): An integer defining the starting index
        end_idx (int): An integer defining the stopping index
        landmarks: An array of size 68 containing the (x,y) values of the facial landmarks

    Returns:
        landmarks array after the missing points have been interpolated.
    """
    start_landmarks = landmarks[start_idx]
    end_landmarks = landmarks[end_idx]
    delta_idx = end_idx - start_idx
    delta_landmarks = end_landmarks - start_landmarks
    
    # Apply linear interpolation formula
    for idx in range(1, delta_idx):
        landmarks[idx + start_idx] = start_landmarks + delta_landmarks/delta_idx * idx
    return landmarks

In [11]:
def landmarks_interpolation(landmarks) -> np.ndarray:
    """Adds the missing landmarks to the landmarks array

    Args:
        landmarks: An array containing all the detected landmarks

    Returns:
        landmarks array filled in with missing landmarks
    """
    # Obtain indices of all the valid landmarks (i.e landmarks not None)
    valid_landmarks_idx = [idx for idx, _ in enumerate(landmarks) if _ is not None]

    # For middle parts of the landmarks array
    for idx in range(1, len(valid_landmarks_idx)):
        # If the valid landmarks indices are adjacent then skip to next iteration
        if valid_landmarks_idx[idx]-valid_landmarks_idx[idx-1] == 1:
            continue
        landmarks = linear_interpolation(start_idx=valid_landmarks_idx[idx-1],
                                        end_idx=valid_landmarks_idx[idx],
                                        landmarks=landmarks)

    # For beginning and ending parts of the landmarks array
    valid_landmarks_idx = [idx for idx, _ in enumerate(landmarks) if _ is not None]
    if valid_landmarks_idx:
        landmarks[:valid_landmarks_idx[0]] = [landmarks[valid_landmarks_idx[0]]] * valid_landmarks_idx[0]
        landmarks[valid_landmarks_idx[-1]:] = [landmarks[valid_landmarks_idx[-1]]] * (len(landmarks) - valid_landmarks_idx[-1])

    return np.array(landmarks)

In [12]:
# from tqdm import tqdm

# landmarks_array = []
# mp4_files = get_mp4()

# for mp4 in tqdm(mp4_files):
#     landmarks = generate_video_landmarks(mp4)
#     landmarks_array.append(landmarks)

In [13]:
import multiprocessing
import threading
import concurrent.futures
from tqdm import tqdm

# Define the number of processes to use for parallelism.
num_processes = multiprocessing.cpu_count()

def process_mp4(mp4_file):
    frames = load_video(mp4_file)
    landmarks = generate_video_landmarks(frames)
    return landmarks_interpolation(landmarks)

landmarks_array = []
mp4_files = get_mp4()

with multiprocessing.Pool(num_processes) as pool:
    for landmarks in tqdm(pool.imap_unordered(process_mp4, mp4_files), total=len(mp4_files)):
        landmarks_array.append(landmarks)

 56%|█████████████████████████████████████████████████████████████████████████████████████████████                                                                        | 11088/19646 [14:11<10:57, 13.02it/s]Process ForkPoolWorker-5:
Process ForkPoolWorker-2:
Process ForkPoolWorker-6:



KeyboardInterrupt: 

In [14]:
import gc
import multiprocessing
import concurrent.futures
from tqdm import tqdm
import cv2

# Define the number of processes to use for parallelism.
num_processes = multiprocessing.cpu_count() - 1

def video_processing(frames):
    landmarks = generate_video_landmarks(frames)
    return landmarks_interpolation(landmarks)

def process_mp4_batch(mp4_files):
    landmarks_array = []

    with concurrent.futures.ThreadPoolExecutor() as thread_executor:
        frames = list(thread_executor.map(load_video, mp4_files))

    batch_size = 10  # Adjust this value based on your memory constraints

    for i in range(0, len(frames), batch_size):
        batch_frames = frames[i:i + batch_size]
        with multiprocessing.Pool(num_processes) as pool:
            batch_landmarks = list(tqdm(pool.imap_unordered(video_processing, batch_frames), total=len(batch_frames)))

        landmarks_array.extend(batch_landmarks)

        # Explicitly trigger garbage collection after processing each batch
        gc.collect()

    return landmarks_array

def main():
    batch_size = 300
    mp4_files = get_mp4()
    all_landmarks = []

    with concurrent.futures.ProcessPoolExecutor() as executor:
        futures = []
        for i in range(0, len(mp4_files), batch_size):
            batch = mp4_files[i:i + batch_size]
            future = executor.submit(process_mp4_batch, batch)
            futures.append(future)

        # Collect results
        for future in concurrent.futures.as_completed(futures):
            all_landmarks.extend(future.result())

    return all_landmarks

all_landmarks = main()


100%|██████████████████████████████████████████████████████████████████████████████████████████████| 300/300 [00:15<00:00, 19.90it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████| 300/300 [00:16<00:00, 18.36it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████| 300/300 [00:17<00:00, 17.59it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████| 300/300 [00:16<00:00, 17.94it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████| 300/300 [00:17<00:00, 17.27it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████| 300/300 [00:17<00:00, 17.34it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████| 300/300 [00:18<00:00, 16.53it/s]
100%|█████████████████████████████████████████████████████████

KeyboardInterrupt: 

In [24]:
landmarks_array = np.array(landmarks_array)

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 2 dimensions. The detected shape was (19646, 29) + inhomogeneous part.

In [23]:
landmarks_array.shape

AttributeError: 'list' object has no attribute 'shape'

In [23]:
mean_array = np.mean(landmarks_array, axis=0)

In [24]:
mean_array = np.mean(mean_array, axis=0)

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [None]:
plt.scatter(mean_array[:,0], mean_array[:,1])

In [None]:
np.max(mean_array, axis=0)

In [25]:
video = load_video(mp4_files[1])
video

[]

In [16]:
mp4_files = get_mp4()