In [11]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

np.random.seed(42)

In [12]:
df_6pts = pd.read_csv('../cleaned_data_6pts.csv', index_col=0)
df_6pts.head()

Unnamed: 0,x2,x3,x4,x5,x6,y2,y3,y4,y5,y6,pitch,yaw,roll
image00002,-0.117864,-0.66087,0.811568,-0.471326,0.388258,0.99303,-0.86858,-0.73413,0.247089,0.431947,-22.874239,1.044306,4.908885
image00004,0.244669,0.390942,0.818068,0.240005,0.475098,0.969607,-0.26848,-0.445271,0.578156,0.481578,26.932741,68.155235,17.24367
image00006,0.475943,0.029326,0.663393,0.186035,0.639567,0.879476,-0.389183,-0.480418,0.351012,0.307038,-10.579652,50.485409,-13.570644
image00008,0.545762,-0.534558,0.548172,-0.038727,0.562912,0.83794,-0.411924,-0.79225,0.351042,0.173244,-10.048455,17.143373,-21.39278
image00010,0.717417,0.251543,0.403701,0.469008,0.526977,0.696644,-0.364394,-0.518721,0.27928,0.265331,-50.544579,68.640549,-59.207973


In [13]:
df_6pts.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2000 entries, image00002 to image04375
Data columns (total 13 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   x2      2000 non-null   float64
 1   x3      2000 non-null   float64
 2   x4      2000 non-null   float64
 3   x5      2000 non-null   float64
 4   x6      2000 non-null   float64
 5   y2      2000 non-null   float64
 6   y3      2000 non-null   float64
 7   y4      2000 non-null   float64
 8   y5      2000 non-null   float64
 9   y6      2000 non-null   float64
 10  pitch   2000 non-null   float64
 11  yaw     2000 non-null   float64
 12  roll    2000 non-null   float64
dtypes: float64(13)
memory usage: 218.8+ KB


In [14]:
df_21pts = pd.read_csv('../cleaned_data_21pts.csv', index_col=0)
df_21pts.head()

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,...,y13,y14,y15,y16,y17,y18,y19,pitch,yaw,roll
image00002,-0.850815,-0.525609,-0.207266,0.387123,0.729734,1.045237,-0.66087,-0.250516,0.378241,0.811568,...,0.028274,0.247089,0.516972,0.431947,0.99303,-0.59558,-0.426537,-22.874239,1.044306,4.908885
image00004,0.418979,0.261841,0.289089,0.480327,0.684867,0.992539,0.390942,0.426828,0.629349,0.818068,...,0.1591,0.578156,0.446953,0.481578,0.969607,0.522405,0.27204,26.932741,68.155235,17.24367
image00006,-0.028031,-0.083471,-0.014014,0.283632,0.487453,0.808384,0.029326,0.155509,0.435379,0.663393,...,0.052335,0.351012,0.349787,0.307038,0.879476,0.037008,-0.11427,-10.579652,50.485409,-13.570644
image00008,-0.726478,-0.638846,-0.417195,0.070929,0.318399,0.667258,-0.534558,-0.258377,0.20887,0.548172,...,-0.006753,0.351042,0.387353,0.173244,0.83794,0.132924,-0.411059,-10.048455,17.143373,-21.39278
image00010,0.252695,0.051993,0.02933,0.103541,0.186478,0.479988,0.251543,0.251446,0.335959,0.403701,...,-0.022183,0.27928,0.287105,0.265331,0.696644,-0.026336,-0.307893,-50.544579,68.640549,-59.207973


In [15]:
df_21pts.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2000 entries, image00002 to image04375
Data columns (total 39 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   x1      2000 non-null   float64
 1   x2      2000 non-null   float64
 2   x3      2000 non-null   float64
 3   x4      2000 non-null   float64
 4   x5      2000 non-null   float64
 5   x6      2000 non-null   float64
 6   x7      2000 non-null   float64
 7   x8      2000 non-null   float64
 8   x9      2000 non-null   float64
 9   x10     2000 non-null   float64
 10  x11     2000 non-null   float64
 11  x13     2000 non-null   float64
 12  x14     2000 non-null   float64
 13  x15     2000 non-null   float64
 14  x16     2000 non-null   float64
 15  x17     2000 non-null   float64
 16  x18     2000 non-null   float64
 17  x19     2000 non-null   float64
 18  y1      2000 non-null   float64
 19  y2      2000 non-null   float64
 20  y3      2000 non-null   float64
 21  y4      2000 non-null   flo

In [24]:
# some util functions
def split_x_y(data):
    return data[:, :-3], data[:, -3:]

def split_train_dev_x_y(df, train_size=0.9):
    data_train, data_test = train_test_split(df, train_size=train_size)
    X_train, Y_train = split_x_y(data_train.values)
    X_dev, Y_dev = split_x_y(data_test.values)
    return X_train, Y_train, X_dev, Y_dev 

def try_model(X_train, Y_train, X_dev, Y_dev, model):
    pitch_model = model().fit(X_train, Y_train[:, 0])
    yaw_model = model().fit(X_train, Y_train[:, 1])
    roll_model = model().fit(X_train, Y_train[:, 2])

    print('\nAccuracy on training set: ')
    h_pitch = pitch_model.predict(X_train)
    h_yaw = yaw_model.predict(X_train)
    h_roll = roll_model.predict(X_train)
    print('Pitch r2_score =', r2_score(Y_train[:, 0] , h_pitch))
    print('Yaw r2_score =', r2_score(Y_train[:, 1] , h_yaw))
    print('Roll r2_score =', r2_score(Y_train[:, 2] , h_roll))

    print('\nAccuracy on dev set: ')
    h_pitch = pitch_model.predict(X_dev)
    h_yaw = yaw_model.predict(X_dev)
    h_roll = roll_model.predict(X_dev)
    print('Pitch r2_score =', r2_score(Y_dev[:, 0] , h_pitch))
    print('Yaw r2_score =', r2_score(Y_dev[:, 1] , h_yaw))
    print('Roll r2_score =', r2_score(Y_dev[:, 2] , h_roll))


In [25]:
from sklearn.linear_model import ElasticNet, LinearRegression

# Try LinearRegression on 6pts df.
X_train, Y_train, X_dev, Y_dev = split_train_dev_x_y(df_6pts)
try_model(X_train, Y_train, X_dev, Y_dev, LinearRegression)


Accuracy on training set: 
Pitch r2_score = 0.2805593225401123
Yaw r2_score = 0.9408328234773186
Roll r2_score = 0.3128567898209521

Accuracy on dev set: 
Pitch r2_score = -0.21508064518378878
Yaw r2_score = 0.5841539957848071
Roll r2_score = 0.17053814055740024


In [26]:
# Try LinearRegression on 21pts df.
X_train, Y_train, X_dev, Y_dev = split_train_dev_x_y(df_21pts)
try_model(X_train, Y_train, X_dev, Y_dev, LinearRegression)


Accuracy on training set: 
Pitch r2_score = 0.2594035928359243
Yaw r2_score = 0.915819445076715
Roll r2_score = 0.32627454030517034

Accuracy on dev set: 
Pitch r2_score = 0.37427591994527265
Yaw r2_score = 0.969122218050318
Roll r2_score = 0.3336567648018326


In [29]:
# Try LinearRegression on 68pts df.
df_68pts = pd.read_csv('../cleaned_data_68pts.csv', index_col=0)
X_train, Y_train, X_dev, Y_dev = split_train_dev_x_y(df_68pts)
try_model(X_train, Y_train, X_dev, Y_dev, LinearRegression)


Accuracy on training set: 
Pitch r2_score = 0.43458286154306813
Yaw r2_score = 0.9427031323983798
Roll r2_score = 0.5432405306386449

Accuracy on dev set: 
Pitch r2_score = 0.09895475221648797
Yaw r2_score = 0.963377561761136
Roll r2_score = 0.039919182011481036


In [27]:
# Try ElasticNet on 6pts df.
X_train, Y_train, X_dev, Y_dev = split_train_dev_x_y(df_6pts)
try_model(X_train, Y_train, X_dev, Y_dev, ElasticNet)


Accuracy on training set: 
Pitch r2_score = 0.043020915691247996
Yaw r2_score = 0.5393319791203773
Roll r2_score = 0.07863342719630362

Accuracy on dev set: 
Pitch r2_score = -0.016896498076169886
Yaw r2_score = 0.37794736891205916
Roll r2_score = 0.00471031990499049


In [28]:
# Try ElasticNet on 21pts df.
X_train, Y_train, X_dev, Y_dev = split_train_dev_x_y(df_21pts)
try_model(X_train, Y_train, X_dev, Y_dev, ElasticNet)


Accuracy on training set: 
Pitch r2_score = 0.10277603105791633
Yaw r2_score = 0.833631939687485
Roll r2_score = 0.0827422879338986

Accuracy on dev set: 
Pitch r2_score = 0.30363568266596996
Yaw r2_score = 0.8959795120496848
Roll r2_score = 0.22964544617915872


In [30]:
from sklearn.ensemble import RandomForestRegressor
X_train, Y_train, X_dev, Y_dev = split_train_dev_x_y(df_21pts)
try_model(X_train, Y_train, X_dev, Y_dev, RandomForestRegressor)


Accuracy on training set: 
Pitch r2_score = 0.8250015630499163
Yaw r2_score = 0.9842378759318143
Roll r2_score = 0.9483928563940763

Accuracy on dev set: 
Pitch r2_score = 0.5813582092494429
Yaw r2_score = 0.9964953779611181
Roll r2_score = 0.693752453833863
