### Libraries

In [1]:
import numpy as np
import pandas as pd
import pickle

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, r2_score

import warnings
warnings.filterwarnings('ignore')
from google.colab.patches import cv2_imshow

In [2]:
#mount drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Data Splitting

In [3]:
data = pd.read_csv('/content/drive/MyDrive/head_pose_dataset.csv')
data.head()

Unnamed: 0,x1,y1,x2,y2,x3,y3,x4,y4,x5,y5,...,y465,x466,y466,x467,y467,x468,y468,pitch,yaw,roll
0,0.06703,0.328205,0.000895,0.08433,0.109037,0.177909,0.093921,-0.149767,0.005139,0.008122,...,-0.260575,0.411546,-0.252619,0.844953,-0.231329,0.897006,-0.255406,0.107997,0.837121,0.249012
1,0.009322,0.595813,0.015251,0.118809,-0.005072,0.359897,-0.124509,-0.136488,0.012932,0.002051,...,-0.169538,0.129904,-0.184685,0.684021,-0.134949,0.750501,-0.19336,0.404001,-0.131908,-0.084306
2,0.123301,0.333559,0.013915,0.078694,0.166921,0.180381,0.103639,-0.144438,0.007269,0.003677,...,-0.286893,0.417817,-0.275312,0.820107,-0.311576,0.8753,-0.340237,0.106887,0.921113,0.128691
3,-0.076576,0.345102,0.002309,0.086475,-0.073506,0.175926,-0.114812,-0.158177,0.008806,0.005363,...,-0.293584,-0.033024,-0.28644,0.294092,-0.308274,0.332659,-0.342919,-0.067951,-0.425775,0.023097
4,0.123781,0.232474,0.021969,0.060497,0.104812,0.085345,-0.013593,-0.167524,0.004959,0.003812,...,-0.411016,0.214658,-0.38245,0.632065,-0.5045,0.670066,-0.553568,-0.373085,0.293578,-0.125011


In [4]:
#create features and labels
features = data.iloc[:,:936]
labels = data.iloc[:,936:]
pitch_label = labels.iloc[:,0]
yaw_label = labels.iloc[:,1]
roll_label = labels.iloc[:,2]

In [5]:
X_train, X_test, y_train_pitch, y_test_pitch = train_test_split(features, pitch_label, test_size=0.2, shuffle = True, random_state=42)
X_train, X_test, y_train_yaw, y_test_yaw = train_test_split(features, yaw_label, test_size=0.2, shuffle = True, random_state=42)
X_train, X_test, y_train_roll, y_test_roll = train_test_split(features, roll_label, test_size=0.2, shuffle = True, random_state=42)

In [6]:
features.head()

Unnamed: 0,x1,y1,x2,y2,x3,y3,x4,y4,x5,y5,...,x464,y464,x465,y465,x466,y466,x467,y467,x468,y468
0,0.06703,0.328205,0.000895,0.08433,0.109037,0.177909,0.093921,-0.149767,0.005139,0.008122,...,0.518581,-0.27285,0.462333,-0.260575,0.411546,-0.252619,0.844953,-0.231329,0.897006,-0.255406
1,0.009322,0.595813,0.015251,0.118809,-0.005072,0.359897,-0.124509,-0.136488,0.012932,0.002051,...,0.215131,-0.175605,0.1569,-0.169538,0.129904,-0.184685,0.684021,-0.134949,0.750501,-0.19336
2,0.123301,0.333559,0.013915,0.078694,0.166921,0.180381,0.103639,-0.144438,0.007269,0.003677,...,0.522467,-0.303719,0.470179,-0.286893,0.417817,-0.275312,0.820107,-0.311576,0.8753,-0.340237
3,-0.076576,0.345102,0.002309,0.086475,-0.073506,0.175926,-0.114812,-0.158177,0.008806,0.005363,...,-0.007835,-0.306598,-0.031811,-0.293584,-0.033024,-0.28644,0.294092,-0.308274,0.332659,-0.342919
4,0.123781,0.232474,0.021969,0.060497,0.104812,0.085345,-0.013593,-0.167524,0.004959,0.003812,...,0.291112,-0.442346,0.248447,-0.411016,0.214658,-0.38245,0.632065,-0.5045,0.670066,-0.553568


### Machine Learning Models (Creation, Training, Saving)

In [7]:
# from sklearn.svm import SVR
# from sklearn.ensemble import RandomForestRegressor
# from sklearn.ensemble import GradientBoostingRegressor
# from xgboost import XGBRegressor
pipelines_p = {
    'svr': make_pipeline(StandardScaler(), PCA(n_components=0.99), SVR(kernel='rbf')),
    'rf': make_pipeline(StandardScaler(), PCA(n_components=0.99), RandomForestRegressor(n_estimators = 150)),
    'gb': make_pipeline(StandardScaler(), PCA(n_components=0.99), GradientBoostingRegressor(n_estimators = 150, learning_rate=0.4)),
    'XGB': make_pipeline(StandardScaler(), PCA(n_components=0.99), XGBRegressor(n_estimators = 150, learning_rate=0.3))
}

pipelines_y = {
    'svr': make_pipeline(StandardScaler(), PCA(n_components=0.99), SVR(kernel='rbf')),
    'rf': make_pipeline(StandardScaler(), PCA(n_components=0.99), RandomForestRegressor(n_estimators = 150)),
    'gb': make_pipeline(StandardScaler(), PCA(n_components=0.99), GradientBoostingRegressor(n_estimators = 150, learning_rate=0.4)),
    'XGB': make_pipeline(StandardScaler(), PCA(n_components=0.99), XGBRegressor(n_estimators = 150, learning_rate=0.3))
}

pipelines_r = {
    'svr': make_pipeline(StandardScaler(), PCA(n_components=0.99), SVR(kernel='rbf')),
    'rf': make_pipeline(StandardScaler(), PCA(n_components=0.99), RandomForestRegressor(n_estimators = 150)),
    'gb': make_pipeline(StandardScaler(), PCA(n_components=0.99), GradientBoostingRegressor(n_estimators = 150, learning_rate=0.4)),
    'XGB': make_pipeline(StandardScaler(), PCA(n_components=0.99), XGBRegressor(n_estimators = 150, learning_rate=0.3))
}

In [8]:
models_pitch = {}
models_yaw = {}
models_roll = {}

In [9]:
for algo, pipeline in pipelines_p.items():
  model = pipeline.fit(X_train, y_train_pitch)
  models_pitch[algo] = model



In [10]:
for algo, pipeline in pipelines_y.items():
  model = pipeline.fit(X_train, y_train_yaw)
  models_yaw[algo] = model



In [11]:
for algo, pipeline in pipelines_r.items():
  model = pipeline.fit(X_train, y_train_roll)
  models_roll[algo] = model



In [16]:
for algo, model in models_pitch.items():
  yhat_train = model.predict(X_train)
  yhat = model.predict(X_test)
  print(f'model: {algo}\nMSE (train):{mean_squared_error(y_train_pitch,yhat_train)}\nMSE (test):{mean_squared_error(y_test_pitch, yhat)}\nR2 score (train):{r2_score(y_train_pitch, yhat_train)}\nR2 score (test):{r2_score(y_test_pitch, yhat)}\n')

model: svr
MSE (train):0.17556488474012272
MSE (test):0.5120503538188519
R2 score (train):0.2781942372645362
R2 score (test):0.04067412539606485

model: rf
MSE (train):0.030675366458385714
MSE (test):1.022428551739091
R2 score (train):0.8738833433778069
R2 score (test):-0.9155189666446959

model: gb
MSE (train):0.00212038653401262
MSE (test):1.4980751096891851
R2 score (train):0.9912823841638805
R2 score (test):-1.8066423626247148

model: XGB
MSE (train):0.0040439556136593
MSE (test):3.0951554984884693
R2 score (train):0.9833739504884109
R2 score (test):-4.798771026087535



In [14]:
for algo, model in models_yaw.items():
  yhat_train = model.predict(X_train)
  yhat = model.predict(X_test)
  print(f'model: {algo}\nMSE (train):{mean_squared_error(y_train_yaw,yhat_train)}\nMSE (test):{mean_squared_error(y_test_yaw, yhat)}\nR2 score (train):{r2_score(y_train_yaw, yhat_train)}\nR2 score (test):{r2_score(y_test_yaw, yhat)}\n')

model: svr
MSE (train):0.024610995610624097
MSE (test):0.10708085682545244
R2 score (train):0.9221230732550014
R2 score (test):0.7439333842430695

model: rf
MSE (train):0.0047993145434176935
MSE (test):0.13886967184562748
R2 score (train):0.9848134600876284
R2 score (test):0.6679155550767599

model: gb
MSE (train):0.001489440558541072
MSE (test):0.14385360425620677
R2 score (train):0.9952869418570591
R2 score (test):0.6559972837500874

model: XGB
MSE (train):0.0032290397522905063
MSE (test):0.13413160155814782
R2 score (train):0.9897823031532595
R2 score (test):0.6792458867504322



In [15]:
for algo, model in models_roll.items():
  yhat_train = model.predict(X_train)
  yhat = model.predict(X_test)
  print(f'model: {algo}\nMSE (train):{mean_squared_error(y_train_roll,yhat_train)}\nMSE (test):{mean_squared_error(y_test_roll, yhat)}\nR2 score (train):{r2_score(y_train_roll, yhat_train)}\nR2 score (test):{r2_score(y_test_roll, yhat)}\n')

model: svr
MSE (train):0.175186348569316
MSE (test):0.710533628675681
R2 score (train):0.4070350299012392
R2 score (test):0.22935545573777472

model: rf
MSE (train):0.02920023138820283
MSE (test):0.5382748758557305
R2 score (train):0.9011640206363929
R2 score (test):0.41618724343167324

model: gb
MSE (train):0.0007232544488112253
MSE (test):0.33679074116899615
R2 score (train):0.9975519522147956
R2 score (test):0.6347168708627212

model: XGB
MSE (train):0.0020963931400651576
MSE (test):0.8998998710796312
R2 score (train):0.9929041976971046
R2 score (test):0.023968890364310935



In [17]:
#save chosen models
with open('final_pitch_mdl.pkl', 'wb') as f:
  pickle.dump(models_pitch['rf'], f)

with open('final_yaw_mdl.pkl', 'wb') as f:
  pickle.dump(models_yaw['svr'], f)

with open('final_roll_mdl.pkl', 'wb') as f:
  pickle.dump(models_roll['gb'], f)

In [18]:
#try some models
pipeline_pitch = make_pipeline(StandardScaler(), PCA(n_components=0.99), RandomForestRegressor())
pipeline_yaw = make_pipeline(StandardScaler(), PCA(n_components=0.99), RandomForestRegressor())
pipeline_roll = make_pipeline(StandardScaler(), PCA(n_components=0.99), RandomForestRegressor())

In [19]:
#Training
model_pitch = pipeline_pitch.fit(X_train, y_train_pitch)
model_yaw = pipeline_yaw.fit(X_train, y_train_yaw)
model_roll = pipeline_roll.fit(X_train, y_train_roll)

In [20]:
print('PITCH SCORES:')
print(f'MSE (train):{mean_squared_error(y_train_pitch,model_pitch.predict(X_train))}')
print(f'MSE (train):{mean_squared_error(y_test_pitch,model_pitch.predict(X_test))}')
print(f'R2 score (train):{r2_score(y_train_pitch, model_pitch.predict(X_train))}')
print(f'R2 score (test):{r2_score(y_test_pitch, model_pitch.predict(X_test))}')
print('==============================================================================')
print('YAW SCORES:')
print(f'MSE (train):{mean_squared_error(y_train_yaw,model_yaw.predict(X_train))}')
print(f'MSE (train):{mean_squared_error(y_test_yaw,model_yaw.predict(X_test))}')
print(f'R2 score (train):{r2_score(y_train_yaw, model_yaw.predict(X_train))}')
print(f'R2 score (test):{r2_score(y_test_yaw, model_yaw.predict(X_test))}')
print('==============================================================================')
print('ROLL SCORES:')
print(f'MSE (train):{mean_squared_error(y_train_roll,model_roll.predict(X_train))}')
print(f'MSE (train):{mean_squared_error(y_test_roll,model_roll.predict(X_test))}')
print(f'R2 score (train):{r2_score(y_train_roll, model_roll.predict(X_train))}')
print(f'R2 score (test):{r2_score(y_test_roll, model_roll.predict(X_test))}')
print('==============================================================================')

PITCH SCORES:
MSE (train):0.035725050414052606
MSE (train):0.918166600942344
R2 score (train):0.8531224094097843
R2 score (test):-0.720184296157609
YAW SCORES:
MSE (train):0.0050094948231943935
MSE (train):0.1406441031450716
R2 score (train):0.9841483836108219
R2 score (test):0.6636722885283565
ROLL SCORES:
MSE (train):0.031068009804560842
MSE (train):0.5381458285671858
R2 score (train):0.8948420259042028
R2 score (test):0.41632720807916523


In [21]:
#save chosen models
with open('rf_pitch_mdl.pkl', 'wb') as f:
  pickle.dump(model_pitch, f)

with open('rf_yaw_mdl.pkl', 'wb') as f:
  pickle.dump(model_yaw, f)

with open('rf_roll_mdl.pkl', 'wb') as f:
  pickle.dump(model_roll, f)