In [1]:
!pip install opencv-python

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
!pip install mediapipe

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting mediapipe
  Downloading mediapipe-0.8.10-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (32.8 MB)
[K     |████████████████████████████████| 32.8 MB 1.1 MB/s 
Installing collected packages: mediapipe
Successfully installed mediapipe-0.8.10


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
#Path of AFLW2000 dataset on drive
OutPATH="/content/drive/MyDrive/Machine Learning 1 project/AFLW2000"

## Essential Imports

In [5]:
import cv2
import mediapipe as mp
from google.colab.patches import cv2_imshow
import scipy.io as sio
from pathlib import Path
import glob
import pandas as pd
import numpy as np

## Function to get Landmarks

In [133]:
def GetLandmarksOfImage(image):

    xlist, ylist, first_point, second_point = [], [], [], []
      
    faceModule = mp.solutions.face_mesh

    # facemesh--> To get landmarks of each face
    with faceModule.FaceMesh(static_image_mode=True) as facemesh:
        
        # processing the image to extract the landmark points (468 point) for each x,y,z
        rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        # result --> resulting face mesh 
        result = facemesh.process(rgb_image)

        #check if the landmarks extracted are None
        if result.multi_face_landmarks != None: 
          #Extract landmarks from resulting face mesh 
          for face_landmarks in result.multi_face_landmarks:
              for landmark in face_landmarks.landmark:
                  # x and y here are scaled with width and height of the image
                  x = landmark.x
                  y = landmark.y
                  shape = image.shape 
            
                  #  we will get back to x and y actual values in the image
                  relative_x = int(x * shape[1])
                  relative_y = int(y * shape[0])

                  #These xlist and ylist are supposed to save each of x and y points of all landmarks of certain face in separate lists 
                  #xinndexes= [0, 1, ............... 468]  
                  #yindexes = [0, 1, ............... 468]
                  xlist.append(relative_x)
                  ylist.append(relative_y)

                  # Point location: near nose 
                  if landmark == face_landmarks.landmark[100]:
                    first_point.append(relative_x)
                    first_point.append(relative_y)

                  # Point location: at the top right of the face
                  if landmark == face_landmarks.landmark[105]:
                    second_point.append(relative_x)
                    second_point.append(relative_y)

    return xlist, ylist, first_point, second_point

## The function below is to center and normalize data points

In [134]:
def Normalize_Landmarks(Landmarks):
  #Get x,y of first point
  x_fp, y_fp = Landmarks[2][0], Landmarks[2][1]

  #Get x,y of second point
  x_sp, y_sp = Landmarks[3][0], Landmarks[3][1]

  # Get distance between first point and second point to normalize landmarks
  a = np.array([x_fp, y_fp])
  b = np.array([x_sp, y_sp])

  distance = np.linalg.norm(a-b)

  centered_normalized_landmarks_x = (np.array(Landmarks[0]) - x_fp) / distance 
  centered_normalized_landmarks_y = (np.array(Landmarks[1]) - y_fp) / distance

  return centered_normalized_landmarks_x, centered_normalized_landmarks_y

## 'Getpose' function to get yaw, pitch, roll of each image from its mat file

In [135]:
def Getpose(mat_file):
  # extracting the labels 3 angels
  pose_para = mat_file["Pose_Para"][0][:3]
  pitch = pose_para[0]
  yaw   = pose_para[1]
  roll  = pose_para[2]
  return [yaw, pitch, roll]

## Create Data Frame

In [136]:
xlistnames=[]
ylistnames=[]
for i in range(468):
  xlistnames.append('x_'+str(i))
  ylistnames.append('y_'+str(i))
yprlist=['yaw', 'pitch', 'roll']

In [137]:
df = pd.DataFrame(columns = xlistnames + ylistnames + yprlist)

## Iterate over all images and build the dataframe 

In [138]:
#Extract name of each image in AFLW2000 dataset
Images_Names = [Path(f).stem for f in glob.glob("/content/drive/MyDrive/Machine Learning 1 project/AFLW2000/*.mat")]
for imagename in Images_Names:
  img = cv2.imread(OutPATH + '/' + imagename + '.jpg')

  x = GetLandmarksOfImage(img)

  if x[0]==[] or x[1]==[] or x[2]==[] or x[3]==[]:
    continue

  x_landmarks, y_landmarks = Normalize_Landmarks(x)

  mat_file = sio.loadmat(OutPATH + '/' + imagename + '.mat')
  
  #Append data of each sample (image) to the row of dataframe
  df.loc[len(df)] = list(x_landmarks) + list(y_landmarks) + Getpose(mat_file)

In [139]:
df

Unnamed: 0,x_0,x_1,x_2,x_3,x_4,x_5,x_6,x_7,x_8,x_9,...,y_461,y_462,y_463,y_464,y_465,y_466,y_467,yaw,pitch,roll
0,0.594459,0.610525,0.594459,0.465927,0.594459,0.594459,0.546260,-0.433794,0.514127,0.514127,...,0.546260,0.562326,-0.465927,-0.433794,-0.401661,-0.578393,-0.610525,-0.070645,-0.137405,-0.053091
1,0.715860,0.914710,0.715860,0.695975,0.934595,0.894825,0.695975,-0.497125,0.656205,0.656205,...,0.397700,0.437470,-0.357930,-0.338045,-0.318160,-0.377815,-0.437470,-0.569188,0.181756,-0.070024
2,0.533144,0.546472,0.533144,0.426515,0.546472,0.533144,0.519815,-0.413186,0.506487,0.506487,...,0.533144,0.546472,-0.413186,-0.373201,-0.333215,-0.519815,-0.559801,0.039903,-0.192543,-0.027199
3,0.638821,0.557269,0.584453,0.380574,0.530085,0.502901,0.448534,-0.489309,0.394166,0.366982,...,0.326206,0.353390,-0.530085,-0.475717,-0.448534,-0.693188,-0.761148,0.000491,0.041106,-0.162906
4,0.638227,0.554250,0.587841,0.386296,0.537455,0.520659,0.453477,-0.453477,0.403091,0.369500,...,0.520659,0.537455,-0.571046,-0.487068,-0.436682,-0.755796,-0.806182,-0.610480,-0.254934,0.343419
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1848,0.390149,0.296513,0.390149,0.234089,0.280907,0.296513,0.374543,-0.343331,0.374543,0.358937,...,0.327725,0.374543,-0.468179,-0.421361,-0.405755,-0.577421,-0.639844,0.352952,0.033367,-0.062870
1849,-0.200715,0.120429,0.000000,0.334525,0.214096,0.334525,0.615526,-0.013381,0.856384,0.990194,...,0.896527,0.883146,0.254239,0.254239,0.281001,0.535240,0.535240,0.074078,-0.304740,0.735418
1850,0.352400,0.137044,0.313244,0.097889,0.097889,0.137044,0.254511,-0.274089,0.254511,0.215355,...,0.430711,0.450288,-0.509022,-0.450288,-0.430711,-0.646066,-0.743955,0.493541,-0.218161,-0.201171
1851,0.426746,0.311853,0.410333,0.229786,0.295439,0.311853,0.344679,-0.361093,0.344679,0.328266,...,0.361093,0.393919,-0.508812,-0.459573,-0.426746,-0.590879,-0.672945,0.267560,0.041948,-0.074263


In [140]:
# from sklearn.preprocessing import StandardScaler
# import pandas as pd
# df2=df
# standard_scaler = StandardScaler()
# df2[xlistnames+ylistnames] = standard_scaler.fit_transform(df2[xlistnames+ylistnames])
# df2

## Split the data to train and test

In [141]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df[xlistnames + ylistnames], df[yprlist], test_size=0.2, random_state=42)

## Linear Regression model

In [142]:
from sklearn.linear_model import LinearRegression
from sklearn.multioutput import MultiOutputRegressor
multioutputregressor = MultiOutputRegressor(LinearRegression()).fit(X_train, y_train)
predictions = multioutputregressor.predict(X_test)

In [143]:
# model evaluation for testing set
from sklearn import metrics
# mae = metrics.mean_absolute_error(y_test, predictions)
# mse = metrics.mean_squared_error(y_test, predictions)
r2 = metrics.r2_score(y_test, predictions)

print("The model performance for testing set")
print("--------------------------------------")

# print('MAE is {}'.format(mae))
# print('MSE is {}'.format(mse))
print('R2 score is {}'.format(r2))

The model performance for testing set
--------------------------------------
R2 score is -3.0402613064521127


# ADABOOST model

In [144]:
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import AdaBoostRegressor

multioutputregressor = MultiOutputRegressor(AdaBoostRegressor(random_state=42, n_estimators=100)).fit(X_train, y_train)
predictions = multioutputregressor.predict(X_test)

In [145]:
# model evaluation for testing set
from sklearn import metrics
mae = metrics.mean_absolute_error(y_test, predictions)
mse = metrics.mean_squared_error(y_test, predictions)
r2 = metrics.r2_score(y_test, predictions)

print("The model performance for testing set")
print("--------------------------------------")

print('MAE is {}'.format(mae))
print('MSE is {}'.format(mse))
print('R2 score is {}'.format(r2))

The model performance for testing set
--------------------------------------
MAE is 0.18822240035219018
MSE is 0.06084577890902084
R2 score is 0.5249563744869438


## XGBOOST model 

In [146]:
from sklearn.multioutput import MultiOutputRegressor
import xgboost as xgb

multioutputregressor = MultiOutputRegressor(xgb.XGBRegressor(objective='reg:squarederror')).fit(X_train, y_train)
predictions = multioutputregressor.predict(X_test)

In [147]:
# model evaluation for testing set
from sklearn import metrics
mae = metrics.mean_absolute_error(y_test, predictions)
mse = metrics.mean_squared_error(y_test, predictions)
r2 = metrics.r2_score(y_test, predictions)

print("The model performance for testing set")
print("--------------------------------------")

print('MAE is {}'.format(mae))
print('MSE is {}'.format(mse))
print('R2 score is {}'.format(r2))

The model performance for testing set
--------------------------------------
MAE is 0.08636457733899378
MSE is 0.060767401921861475
R2 score is 0.3685567478933612


## SVR model "Best R2Score"

In [148]:
from sklearn.svm import SVR
multioutputregressor = MultiOutputRegressor(SVR()).fit(X_train, y_train)
predictions = multioutputregressor.predict(X_test)

In [149]:
# model evaluation for testing set
from sklearn import metrics
mae = metrics.mean_absolute_error(y_test, predictions)
mse = metrics.mean_squared_error(y_test, predictions)
r2 = metrics.r2_score(y_test, predictions)

print("The model performance for testing set")
print("--------------------------------------")

print('MAE is {}'.format(mae))
print('MSE is {}'.format(mse))
print('R2 score is {}'.format(r2))

The model performance for testing set
--------------------------------------
MAE is 0.07260246934762014
MSE is 0.014371390592831481
R2 score is 0.8650746031619588


## Draw axis with angles (yaw, pitch, roll)

In [151]:
from math import cos, sin

def draw_axis(img, pitch,yaw,roll, tdx=None, tdy=None, size = 100):

    yaw = -yaw
    if tdx != None and tdy != None:
        tdx = tdx
        tdy = tdy
    else:
        height, width = img.shape[:2]
        tdx = width / 2
        tdy = height / 2

    # X-Axis pointing to right. drawn in red
    x1 = size * (cos(yaw) * cos(roll)) + tdx
    y1 = size * (cos(pitch) * sin(roll) + cos(roll) * sin(pitch) * sin(yaw)) + tdy

    # Y-Axis | drawn in green
    #        v
    x2 = size * (-cos(yaw) * sin(roll)) + tdx
    y2 = size * (cos(pitch) * cos(roll) - sin(pitch) * sin(yaw) * sin(roll)) + tdy

    # Z-Axis (out of the screen) drawn in blue
    x3 = size * (sin(yaw)) + tdx
    y3 = size * (-cos(yaw) * sin(pitch)) + tdy

    cv2.line(img, (int(tdx), int(tdy)), (int(x1),int(y1)),(0,0,255),3)
    cv2.line(img, (int(tdx), int(tdy)), (int(x2),int(y2)),(0,255,0),3)
    cv2.line(img, (int(tdx), int(tdy)), (int(x3),int(y3)),(255,0,0),2)

    return img

## Upload test video

In [154]:
from google.colab import files
uploaded = files.upload()

Saving WIN_20220613_23_34_59_Pro.mp4 to WIN_20220613_23_34_59_Pro.mp4


# Test model on video 

In [183]:
import cv2
cap = cv2.VideoCapture('WIN_20220613_23_34_59_Pro.mp4')
video_name = 'Re.avi'
video = cv2.VideoWriter(video_name, 0, 10, (1280,720))
df_test = pd.DataFrame(columns = xlistnames+ylistnames)

while (cap.isOpened()):
  ret, frame = cap.read()
  if ret is not True :
    break
  height, width, channels = frame.shape
  

  x=GetLandmarksOfImage(frame)

  if x[0]==[] or x[1]==[] or x[2]==[] or x[3]==[]:
    continue

  x_landmarks, y_landmarks = Normalize_Landmarks(x)

  df_test.loc[len(df_test)] = list(x_landmarks) + list(y_landmarks) 

  predictions = multioutputregressor.predict(df_test.tail(1))

  out_img = draw_axis(frame, predictions[0][1], predictions[0][0], predictions[0][2], x[2][0], x[2][1])

  video.write(out_img)

cv2.destroyAllWindows()
video.release()

                                                                                                   The end ✌