## All data to csv file

In [212]:
#  https://github.com/natanielruiz/deep-head-pose.git
## data download link  👇
# http://www.cbsr.ia.ac.cn/users/xiangyuzhu/projects/3DDFA/Database/AFLW2000-3D.zip 

In [213]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [214]:
!unzip /content/drive/MyDrive/AFLW2000-3D.zip

Archive:  /content/drive/MyDrive/AFLW2000-3D.zip
replace AFLW2000/Code/DrawSolidHead.m? [y]es, [n]o, [A]ll, [N]one, [r]ename: A
  inflating: AFLW2000/Code/DrawSolidHead.m  
  inflating: AFLW2000/Code/DrawTextureHead.m  
  inflating: AFLW2000/Code/main_show_without_BFM.m  
  inflating: AFLW2000/Code/main_show_with_BFM.m  
  inflating: AFLW2000/Code/Mex/compile.m  
  inflating: AFLW2000/Code/Mex/Tnorm_Vnorm.h  
  inflating: AFLW2000/Code/Mex/Tnorm_VnormC.cpp  
  inflating: AFLW2000/Code/Mex/Tnorm_VnormC.mexw64  
  inflating: AFLW2000/Code/ModelGeneration/ModelGenerate.m  
  inflating: AFLW2000/Code/ModelGeneration/model_info.mat  
  inflating: AFLW2000/Code/Model_Exp.mat  
  inflating: AFLW2000/Code/Model_Shape_Sim.mat  
  inflating: AFLW2000/Code/NormDirection.m  
  inflating: AFLW2000/Code/readme.txt  
  inflating: AFLW2000/Code/RotationMatrix.m  
  inflating: AFLW2000/Code/Tnorm_VnormC.mexw64  
  inflating: AFLW2000/image00002.jpg  
  inflating: AFLW2000/image00002.mat  
  inflating: 

In [215]:
%%capture
!pip install mediapipe

In [216]:
import numpy as np
import os,cv2,math,glob,random
import scipy.io as sio
from math import cos, sin
from pathlib import Path
import pandas as pd
import mediapipe
from PIL import Image, ImageFilter
import warnings
warnings.filterwarnings('ignore')
from google.colab.patches import cv2_imshow
import matplotlib.pyplot as plt 
import plotly.express as px
import seaborn as sns
import plotly.graph_objects as go
import math
from plotly.subplots import make_subplots
from numpy import linalg as LA
import plotly.express as px



In [217]:
def multiple_histogram(data):
    
    num_row = math.ceil(len(data.columns) / 3)
    fig = make_subplots(rows=num_row, cols=3,subplot_titles=(data.columns))

    place_col = 1
    place_row = 1
    for i in data.columns:

        fig.add_trace(go.Histogram(x=data[i]),row=place_row, col=place_col)

        place_col += 1
        if place_col == 4:
            place_col = 1
            place_row += 1
    return fig.update_layout(height=1600, width=1100,
                          title_text="Multiple Histogram for all featrues")

In [218]:
def draw_axis(img, pitch,yaw,roll, tdx=None, tdy=None, size = 100):

    yaw = -yaw
    if tdx != None and tdy != None:
        tdx = tdx
        tdy = tdy
    else:
        height, width = img.shape[:2]
        tdx = width / 2
        tdy = height / 2

    # X-Axis pointing to right. drawn in red
    x1 = size * (cos(yaw) * cos(roll)) + tdx
    y1 = size * (cos(pitch) * sin(roll) + cos(roll) * sin(pitch) * sin(yaw)) + tdy

    # Y-Axis | drawn in green
    #        v
    x2 = size * (-cos(yaw) * sin(roll)) + tdx
    y2 = size * (cos(pitch) * cos(roll) - sin(pitch) * sin(yaw) * sin(roll)) + tdy

    # Z-Axis (out of the screen) drawn in blue
    x3 = size * (sin(yaw)) + tdx
    y3 = size * (-cos(yaw) * sin(pitch)) + tdy

    cv2.line(img, (int(tdx), int(tdy)), (int(x1),int(y1)),(0,0,255),3)
    cv2.line(img, (int(tdx), int(tdy)), (int(x2),int(y2)),(0,255,0),3)
    cv2.line(img, (int(tdx), int(tdy)), (int(x3),int(y3)),(255,0,0),2)

    return img

In [219]:
def get_list_from_filenames(file_path):
    # input:    relative path to .txt file with file names
    # output:   list of relative path names
    lines = [Path(f).stem for f in glob.glob(file_path)]
    return lines

def get_ypr_from_mat(mat_path):
    # Get yaw, pitch, roll from .mat annotation.
    # They are in radians
    mat = sio.loadmat(mat_path)
    # [pitch yaw roll tdx tdy tdz scale_factor]
    pre_pose_params = mat['Pose_Para'][0]
    # Get [pitch, yaw, roll]
    pose_params = pre_pose_params[:3]
    return pose_params

class AFLW2000():
    def __init__(self, data_dir,file_path, img_ext='.jpg', annot_ext='.mat', image_mode='RGB'):
        self.data_dir = data_dir
        self.file_path = file_path
        self.img_ext = img_ext
        self.annot_ext = annot_ext

        filename_list = get_list_from_filenames(data_dir)
        filename_list.sort()
        self.X_train = filename_list
        self.y_train = filename_list
        self.image_mode = image_mode
        self.length = len(filename_list)

    def __getitem__(self, index):
        img = Image.open(os.path.join(self.file_path,self.X_train[index] + self.img_ext))
        img = img.convert(self.image_mode)
        mat_path = os.path.join(self.file_path,self.y_train[index] + self.annot_ext)


        # We get the pose in radians
        pose = get_ypr_from_mat(mat_path)
        # And convert to degrees.
        pitch = pose[0]
        yaw = pose[1] 
        roll = pose[2]
        cont_labels = [yaw, pitch, roll] 

        return cont_labels

In [220]:
demo=AFLW2000("/content/AFLW2000/*.mat",'/content/AFLW2000/')

features=[]
images=[]
index=[]
for random_file in demo.X_train:
  X=[]
  images.append(random_file)
  faceModule = mediapipe.solutions.face_mesh
  # loading image and its correspinding mat file
  with faceModule.FaceMesh(static_image_mode=True) as faces:
      # loading the image
      image = cv2.imread('/content/AFLW2000/'+random_file+'.jpg')
      # processing the face to extract the landmark points (468 point) for each x,y,z
      results = faces.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
      if results.multi_face_landmarks != None: 
        # looping over the faces in the image
        
        for face in results.multi_face_landmarks:
          
            for landmark in face.landmark:
                x = landmark.x
                y = landmark.y

                # note: the x and y values are scaled to the their width and height so we will get back their actual value in the image
                shape = image.shape 
                relative_x = int(x * shape[1])
                relative_y = int(y * shape[0])
                X.append(relative_x)
                X.append(relative_y)
            features.append(np.array(X))
      else:
        index.append(random_file)


In [221]:
labels=[]
for i in range(2000):
  pla=demo.__getitem__(i)
  x = np.array(pla, dtype=np.float32)
  labels.append(x)

In [222]:
labels = np.array(labels, dtype=np.float32)

df = pd.DataFrame(labels, columns=['yaw', 'pitch', 'roll']) #convert to a dataframe
df['img']=demo.X_train
df = df.set_index("img").drop(labels=index, axis=0)
df.reset_index(inplace=True)
features= np.array(features, dtype=np.float32)
df_new = pd.concat([df, pd.DataFrame(features)], axis=1)
df_new.head()

Unnamed: 0,img,yaw,pitch,roll,0,1,2,3,4,5,...,926,927,928,929,930,931,932,933,934,935
0,image00002,0.018227,-0.399231,0.085676,218.0,309.0,220.0,287.0,220.0,291.0,...,251.0,218.0,246.0,221.0,243.0,225.0,299.0,212.0,304.0,208.0
1,image00004,1.189533,0.470065,0.300959,198.0,288.0,187.0,266.0,201.0,274.0,...,235.0,228.0,230.0,230.0,225.0,231.0,263.0,225.0,267.0,222.0
2,image00006,0.881137,-0.18465,-0.236852,143.0,359.0,144.0,349.0,143.0,351.0,...,150.0,323.0,148.0,324.0,147.0,325.0,168.0,320.0,169.0,319.0
3,image00008,0.299208,-0.175379,-0.373374,226.0,312.0,211.0,294.0,220.0,296.0,...,228.0,225.0,223.0,229.0,220.0,233.0,268.0,205.0,271.0,200.0
4,image00013,0.011965,-0.026812,-0.220662,229.0,301.0,221.0,272.0,224.0,281.0,...,234.0,216.0,229.0,219.0,227.0,221.0,271.0,203.0,275.0,201.0


In [223]:
X = df_new.drop(['img',	'yaw',	'pitch',	'roll'], axis=1)
Y = df_new[['yaw',	'pitch',	'roll']]


In [224]:
xPoints = [i for i in range(X.shape[1]) if i%2 ==0]
yPoints = [i for i in range(X.shape[1]) if i%2 !=0]

X[xPoints] = X[xPoints].sub(pd.Series(list(X[4]), index=X.index), axis='index')
X[yPoints] = X[yPoints].sub(pd.Series(list(X[5]), index=X.index), axis='index')

d = np.linalg.norm(np.array((X.iloc[:,342],X.iloc[:,343])) - np.array((X.iloc[:,10],X.iloc[:,11])),axis=0)
X = X.div(d,axis='index')

In [225]:
from sklearn.decomposition import PCA
pca = PCA()

pca.fit_transform(X)

d = {'Number of EigenVector': np.arange(1,len(X.columns)+1), 'Varaiability Captured': np.cumsum(pca.explained_variance_ratio_)}
df = pd.DataFrame(data=d)
px.line(df, x='Number of EigenVector', y='Varaiability Captured',markers=True,
                       title='EigenVectors VS Varaiability Captured')

In [226]:
pca = PCA(n_components=6)

Xred = pca.fit_transform(X)

In [227]:
# yaw

from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import r2_score

model_GradBoos_yaw = GradientBoostingRegressor(random_state=32,max_depth=3, learning_rate=0.2)

model_GradBoos_yaw.fit(Xred, Y['yaw'])

y_yaw_pred = model_GradBoos_yaw.predict(Xred)

print("r2_score train:      " ,r2_score(Y['yaw'], y_yaw_pred))


r2_score train:       0.9735831096730471


In [228]:
# pitch

from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import r2_score

model_GradBoos_pitch = GradientBoostingRegressor(random_state=32,max_depth=3, learning_rate=0.2)

model_GradBoos_pitch.fit(Xred, Y['pitch'])

y_pitch_pred = model_GradBoos_pitch.predict(Xred)

print("r2_score train:      " ,r2_score(Y['pitch'], y_pitch_pred))


r2_score train:       0.9683329637224696


In [229]:
# roll

from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import r2_score

model_GradBoos_roll = GradientBoostingRegressor(random_state=32,max_depth=3, learning_rate=0.2)

model_GradBoos_roll.fit(Xred, Y['roll'])

y_roll_pred = model_GradBoos_roll.predict(Xred)

print("r2_score train:      " ,r2_score(Y['roll'], y_roll_pred))


r2_score train:       0.9880298561538494


In [230]:
# Create a VideoCapture object and read from input file
cap = cv2.VideoCapture('/content/drive/MyDrive/ALI.mp4')
width= int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height= int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Check if camera opened successfully
if (cap.isOpened()== False): 
    print("Error opening video file")

# initializing a list to store the frames   
img_array = []
# Read until video is completed
while(cap.isOpened()):   
  # Capture frame-by-frame
    ret, frame = cap.read()
    if ret == True:
      with faceModule.FaceMesh(static_image_mode=True) as face:
        # processing the image to detect the face and then generating the land marks (468 for each x,y,z).
        results = face.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        if results.multi_face_landmarks != None:
          for face in results.multi_face_landmarks:
              # initializing X and Y lists to store the spacial coordinates of the points
              X = []
              Y = []
              # looping over the landmarks to extract x and y
              for landmark in face.landmark:
                  x = landmark.x
                  y = landmark.y
                  # retrieve the true values of x and y
                  shape = frame.shape 
                  relative_x = int(x * shape[1])
                  relative_y = int(y * shape[0])
                  X.append(relative_x)
                  Y.append(relative_y)

              X = np.array(X)
              Y = np.array(Y)
              # centering the data arround the point 99
              X_center = X - X[2]
              Y_center = Y - Y[2]
              d = np.linalg.norm(np.array((X[171],Y[171])) - np.array((X[10],Y[10])))
              X_norm = X_center/d
              Y_norm = Y_center/d
              X_norm = X_norm
              Y_norm = Y_norm
              points = pca.transform(np.hstack([X_norm,Y_norm]).reshape(1,-1))
              # predicting the 3 angels to draw the axis on the image
              p = model_GradBoos_pitch.predict(points)
              y = model_GradBoos_yaw.predict(points)
              r = model_GradBoos_roll.predict(points)
              draw_axis(frame,p, y, r, X[2],Y[2])
              # appending the result frame to the img_array list
              img_array.append(frame)
    # Break the loop
    else: 
        break

cap.release()  
# Closes all the frames
cv2.destroyAllWindows()
print("Number of Detected Frames = ",len(img_array))

Number of Detected Frames =  414


In [231]:


# converting the frames to video
out = cv2.VideoWriter('Aliout.mp4',cv2.VideoWriter_fourcc(*'DIVX'), 20, (width,height))
for i in range(len(img_array)):
    out.write(img_array[i])
out.release()