In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [2]:
data_path = "renderExamples/A_dorian_bpm100_01_motion.csv"

In [3]:
df = pd.read_csv(data_path, index_col=0)

In [4]:
df.head()

Unnamed: 0,ticks,acc_x,acc_y,acc_z,gyr_x,gyr_y,gyr_z,mag_x,mag_y,mag_z,r,theta,phi
b28a8a33-e2c6-4135-b3b2-b023ce16ef90,6807,-992,-40,-124,-700,100,0,170,607,-68,1000.52,97.12,-162.12
ef00b6ef-42da-4fe2-899b-4c8d3f1cdd9a,6826,-992,-39,-126,-800,400,-200,182,613,-62,1000.73,97.23,-162.8
0674a27f-196d-4d02-ad8e-538015cf996b,6845,-992,-37,-125,-800,-100,0,177,613,-65,1000.53,97.18,-163.51
2b45e4d6-f73f-45c4-8e25-235150248841,6863,-993,-39,-125,-700,0,0,174,613,-68,1001.6,97.17,-162.67
7b64dd9b-62c9-48df-beac-2c98655ed3cc,6882,-991,-38,-126,-800,100,100,176,609,-71,999.7,97.24,-163.22


Since _acc_y_ and _acc_z_ are components of _phi_, it should be expected that these components demonstrate the highest correlation. Further down, we look for the highest correlations with _phi_

<img src="https://upload.wikimedia.org/wikipedia/commons/a/a8/Arctangent2.svg" /> (Source: Wikipedia, 2021)

In [5]:
df['arctan2_acc_y_acc_z'] = np.degrees(np.arctan2(df['acc_y'], df['acc_z']))

Right now, we can identify the inputs of _phi_ and how it is components

In [6]:
df.head()

Unnamed: 0,ticks,acc_x,acc_y,acc_z,gyr_x,gyr_y,gyr_z,mag_x,mag_y,mag_z,r,theta,phi,arctan2_acc_y_acc_z
b28a8a33-e2c6-4135-b3b2-b023ce16ef90,6807,-992,-40,-124,-700,100,0,170,607,-68,1000.52,97.12,-162.12,-162.121303
ef00b6ef-42da-4fe2-899b-4c8d3f1cdd9a,6826,-992,-39,-126,-800,400,-200,182,613,-62,1000.73,97.23,-162.8,-162.801459
0674a27f-196d-4d02-ad8e-538015cf996b,6845,-992,-37,-125,-800,-100,0,177,613,-65,1000.53,97.18,-163.51,-163.511246
2b45e4d6-f73f-45c4-8e25-235150248841,6863,-993,-39,-125,-700,0,0,174,613,-68,1001.6,97.17,-162.67,-162.672078
7b64dd9b-62c9-48df-beac-2c98655ed3cc,6882,-991,-38,-126,-800,100,100,176,609,-71,999.7,97.24,-163.22,-163.217356


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 36 entries, b28a8a33-e2c6-4135-b3b2-b023ce16ef90 to e73e664d-b943-45ec-9484-10223b1fa6c5
Data columns (total 14 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   ticks                36 non-null     int64  
 1   acc_x                36 non-null     int64  
 2   acc_y                36 non-null     int64  
 3   acc_z                36 non-null     int64  
 4   gyr_x                36 non-null     int64  
 5   gyr_y                36 non-null     int64  
 6   gyr_z                36 non-null     int64  
 7   mag_x                36 non-null     int64  
 8   mag_y                36 non-null     int64  
 9   mag_z                36 non-null     int64  
 10  r                    36 non-null     float64
 11  theta                36 non-null     float64
 12  phi                  36 non-null     float64
 13  arctan2_acc_y_acc_z  36 non-null     float64
dtypes: float64(4), int64(10)
mem

The following checks to see if we have any nulls in our data.  This situation would need to be remedied if any null data was found be substituting a default value, such as zero. 

In [8]:
print(df.isnull().sum())

ticks                  0
acc_x                  0
acc_y                  0
acc_z                  0
gyr_x                  0
gyr_y                  0
gyr_z                  0
mag_x                  0
mag_y                  0
mag_z                  0
r                      0
theta                  0
phi                    0
arctan2_acc_y_acc_z    0
dtype: int64


In [9]:
corr = df.corr()
print(corr)

                        ticks     acc_x     acc_y     acc_z     gyr_x  \
ticks                1.000000  0.173410 -0.083784  0.080326  0.066730   
acc_x                0.173410  1.000000 -0.075791  0.014983  0.675143   
acc_y               -0.083784 -0.075791  1.000000 -0.434211 -0.502327   
acc_z                0.080326  0.014983 -0.434211  1.000000  0.072942   
gyr_x                0.066730  0.675143 -0.502327  0.072942  1.000000   
gyr_y                0.066177  0.099575  0.419817  0.281097 -0.365134   
gyr_z                0.142463  0.361648  0.258142  0.334244 -0.106083   
mag_x               -0.344510 -0.104303 -0.825874  0.095637  0.385684   
mag_y               -0.141832 -0.986712  0.033893 -0.063891 -0.660370   
mag_z               -0.275551  0.173542  0.580209 -0.891188 -0.004849   
r                    0.044017 -0.208071 -0.193058  0.163937  0.041544   
theta               -0.088957 -0.012785  0.438363 -0.992975 -0.101764   
phi                  0.242572  0.399650  0.557863 -

Get the top three features that has the highest correlation with acc_x

In [10]:
#---get the top 3 features that has the highest correlation---
print(df.corr().abs().nlargest(3, 'acc_x').index)
print(df.corr().abs().nlargest(3, 'acc_x').values[:,12])

Index(['acc_x', 'mag_y', 'gyr_x'], dtype='object')
[0.39964987 0.41491681 0.15085786]


Get the top three features that has the highest correlation with acc_y

In [11]:
print(df.corr().abs().nlargest(3, 'acc_y').index)
print(df.corr().abs().nlargest(3, 'acc_y').values[:,12])

Index(['acc_y', 'mag_x', 'mag_z'], dtype='object')
[0.55786273 0.67098649 0.19947962]


Get the top three features that has the highest correlation with acc_y

In [12]:
print(df.corr().abs().nlargest(3, 'acc_z').index)
print(df.corr().abs().nlargest(3, 'acc_z').values[:,12])

Index(['acc_z', 'theta', 'mag_z'], dtype='object')
[0.11904485 0.11426263 0.19947962]


In [13]:
df.head()

Unnamed: 0,ticks,acc_x,acc_y,acc_z,gyr_x,gyr_y,gyr_z,mag_x,mag_y,mag_z,r,theta,phi,arctan2_acc_y_acc_z
b28a8a33-e2c6-4135-b3b2-b023ce16ef90,6807,-992,-40,-124,-700,100,0,170,607,-68,1000.52,97.12,-162.12,-162.121303
ef00b6ef-42da-4fe2-899b-4c8d3f1cdd9a,6826,-992,-39,-126,-800,400,-200,182,613,-62,1000.73,97.23,-162.8,-162.801459
0674a27f-196d-4d02-ad8e-538015cf996b,6845,-992,-37,-125,-800,-100,0,177,613,-65,1000.53,97.18,-163.51,-163.511246
2b45e4d6-f73f-45c4-8e25-235150248841,6863,-993,-39,-125,-700,0,0,174,613,-68,1001.6,97.17,-162.67,-162.672078
7b64dd9b-62c9-48df-beac-2c98655ed3cc,6882,-991,-38,-126,-800,100,100,176,609,-71,999.7,97.24,-163.22,-163.217356


Since it is known that _phi_ is the arc-tangent of _acc_y_ and _acc_z_ then we 

In [14]:
x = pd.DataFrame(np.c_[df['acc_z'], df['acc_y']], columns = ['acc_z','acc_y'])
Y = df['phi']

In [15]:
from sklearn.model_selection import train_test_split
x_train, x_test, Y_train, Y_test = train_test_split(x, Y, test_size = 0.3,
                                                    random_state=5)

In [16]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(x_train, Y_train)

LinearRegression()

In [17]:
phi_pred = model.predict(x_test)

In [18]:
print('R-squared: %.4f' % model.score(x_test,
                                      Y_test))

R-squared: -0.3809
