In [3]:
import tensorflow as tf
import pandas as pd
import numpy as np
from typing import List
from matplotlib import pyplot as plt
#import imageio
import dlib
import cv2
import os

2024-03-07 16:57:32.703596: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
!wget   http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2 # DOWNLOAD LINK
!bunzip2 shape_predictor_68_face_landmarks.dat.bz2
datFile =  "shape_predictor_68_face_landmarks.dat"

--2024-03-07 16:57:37--  http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
Resolving dlib.net (dlib.net)... 107.180.26.78
Connecting to dlib.net (dlib.net)|107.180.26.78|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 64040097 (61M)
Saving to: ‘shape_predictor_68_face_landmarks.dat.bz2.1’


2024-03-07 16:58:33 (1.08 MB/s) - ‘shape_predictor_68_face_landmarks.dat.bz2.1’ saved [64040097/64040097]

bunzip2: Output file shape_predictor_68_face_landmarks.dat already exists.


In [5]:
LIP_MARGIN = 0.4                # Marginal rate for lip-only image.
RESIZE = (70,30)                # Final image size
for i in range(33):
    if i != 18:
        VIDEO_PATH=f'/Users/alessiastroni/code/girishgautam/lip_translate/raw_data/s{i+2}/'


In [6]:
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')

In [7]:
video_list = os.listdir(VIDEO_PATH)

In [8]:
def shape_to_list(shape):
    coords = []
    for i in range(0, 68):
        coords.append((shape.part(i).x, shape.part(i).y))
    return coords

In [9]:
cropped_img_list=[]

for vid_name in video_list[:50]: # Iterate on video files

    print(f"Processing video: {vid_name}")

    if vid_name.endswith('.mpg'):
        vid_path = VIDEO_PATH + vid_name
        vid = cv2.VideoCapture(vid_path)

        frames = []               # A list to hold frame images
        frames_colour = []         # A list to hold original frame images
        while(True):
            success, frame = vid.read()
                # Read frame
            if not success:
                break                           # Break if no frame to read left
            gray = cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)   # Convert image into grayscale
            frames.append(gray)                  # Add image to the frame buffer
            frames_colour.append(frame)

        vid.release()

        landmarks = []
        for (i, image) in enumerate(frames):          #iterate on frame lis
            face_rects = detector(image,1)             #detects face
            if len(face_rects) < 1:                 #no faces
                print(f"No face detected: {vid_path}")
                continue
            if len(face_rects) > 1:                  #too many faces
                print(f"Too many faces: {vid_path}")
                continue
            rect = face_rects[0]                    #proper number of faces
            landmark = predictor(image, rect)   #detect face landmarks
            landmark = shape_to_list(landmark)
            landmarks.append(landmark)

        cropped_img = []
        for (i,landmark) in enumerate(landmarks):
            lip_landmark = landmark[48:68]                                          # Landmark corresponding to lip
            lip_x = sorted(lip_landmark,key = lambda pointx: pointx[0])             # Lip landmark sorted for determining lip region
            lip_y = sorted(lip_landmark, key = lambda pointy: pointy[1])
            x_add = int((-lip_x[0][0]+lip_x[-1][0])*LIP_MARGIN*1)                     # Determine Margins for lip-only image
            y_add = int((-lip_y[0][1]+lip_y[-1][1])*LIP_MARGIN*2)
            crop_pos = (lip_x[0][0]-x_add, lip_x[-1][0]+x_add, lip_y[0][1]-y_add, lip_y[-1][1]+y_add)
            cropped = frames_colour[i][crop_pos[2]:crop_pos[3],crop_pos[0]:crop_pos[1]]        # Crop image
            cropped = cv2.resize(cropped,(RESIZE[0],RESIZE[1]),interpolation=cv2.INTER_CUBIC)       # Resize
            cropped_img.append(cropped)

        print('------------ images cropped ----------')

        cropped_img_list.append(cropped_img)

        print(f"VIDEO COMPLETED: {vid_name}")


Processing video: pgag9n.mpg
------------ images cropped ----------
VIDEO COMPLETED: pgag9n.mpg
Processing video: brwj7n.mpg
------------ images cropped ----------
VIDEO COMPLETED: brwj7n.mpg
Processing video: bbwdza.mpg
------------ images cropped ----------
VIDEO COMPLETED: bbwdza.mpg
Processing video: lgbc7p.mpg
------------ images cropped ----------
VIDEO COMPLETED: lgbc7p.mpg
Processing video: lbbu1p.mpg
------------ images cropped ----------
VIDEO COMPLETED: lbbu1p.mpg
Processing video: bgiq1n.mpg
------------ images cropped ----------
VIDEO COMPLETED: bgiq1n.mpg
Processing video: lbim9p.mpg
------------ images cropped ----------
VIDEO COMPLETED: lbim9p.mpg
Processing video: pbaf2s.mpg
------------ images cropped ----------
VIDEO COMPLETED: pbaf2s.mpg
Processing video: srieza.mpg
------------ images cropped ----------
VIDEO COMPLETED: srieza.mpg
Processing video: bgix7p.mpg
------------ images cropped ----------
VIDEO COMPLETED: bgix7p.mpg
Processing video: lrbiza.mpg
-----------

In [10]:
def makeitgray(image_pls, i, j):
    return np.dot(image_pls[i][j][...,:3], [0.2989, 0.5780, 0.1440])

In [11]:
gray_image_list = []
for i, image in enumerate(cropped_img_list):
    gray_frame_list = []
    for j, frame in enumerate(image):
        gray_image = makeitgray(cropped_img_list,i,j)
        # comment this out if you don't need the shape (X, X, 1)
        gray_image = np.expand_dims(gray_image, axis=2)
        gray_frame_list.append(gray_image)
    gray_image_list.append(gray_frame_list)

In [12]:
def standardize(vid):
    mean_vid = vid.mean(axis=0)
    std_vid = vid.std(axis=0)
    return (vid-mean_vid)/std_vid

In [13]:
standardized_list=[]
for vid in gray_image_list:
    #print(type(vid))
    vid= np.array(vid)
    standard_vid = standardize(vid)
    standardized_list.append(standard_vid)

In [14]:
dict_keys=[]
for vid_name in video_list:
    dict_keys.append(vid_name.replace('.mpg', ''))

In [21]:
final_vids= dict(zip(dict_keys, standardized_list))


In [16]:
np.savez('zipped_vids.npz', **final_vids)

In [20]:
# loaded_data = np.load('zipped_vids.npz', allow_pickle=True).items()
# loaded_dict = {key: val for key, val in loaded_data}

# print(loaded_dict)

In [19]:
# import json

# for key, value in final_vids.items():
#     if isinstance(value, np.ndarray):
#         final_vids[key] = value.tolist()

# with open ('preprocessed_data.json', 'w') as f:
#     json.dump(final_vids, f)