In [2]:
#!/usr/bin/env python
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
%load_ext version_information
%version_information numpy, pandas, matplotlib
%version_information
fig_prefix = "../figures/2018-07-22-jw-weaselball-heatmap_"
data_prefix = "../data/2018-07-22-jw-weaselball-heatmap_"


In [3]:
df = pd.read_csv('../data/collection.csv', index_col=False)
df.head()


Unnamed: 0,Time,ID,Mount_X,Mount_Y,Mount_Z,Pos_x,Pos_y,Pos_z,Yaw,Pitch,...,Linear_Velocity_Z_Relative,Linear_Acceleration_X_Relative,Linear_Acceleration_Y_Relative,Linear_Acceleration_Z_Relative_Relative,Rotational_Velocity_X_Relative,Rotational_Velocity_Y_Relative,Rotational_Velocity_Z_Relative,Rotational_Acceleration_X_Relative,Rotational_Acceleration_Y_Relative,Rotational_Acceleration_Z_Relative
0,0 0,50,0,0,0,-0.110867,0.022535,0.01,0.0868,0.0868,...,0.0,-1.2689,-0.260063,1.67592,0.0,0.0,0.0,42.0139,6.07755,31.8614
1,0 0,51,0,0,0,0.08873,-0.073107,0.01,0.47689,0.47689,...,0.0,8.15832,1.91332,4.8049,0.0,0.0,0.0,16.1584,235.351,-110.122
2,0 0,48,0,0,0,-0.012243,-0.025526,0.01,1.2,1.2,...,0.0,0.040257,-0.569083,1.34639,0.0,0.0,0.0,15.0759,30.6618,11.0086
3,0 0,49,0,0,0,0.036942,0.072364,0.01,0.5015,0.5015,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0 1000000,50,0,0,0,-0.113624,0.018111,0.041,-0.039995,-0.039995,...,-0.08577,14.2506,16.6491,-85.7701,-1.6969,-5.19367,0.206992,-1797.8,-5201.47,241.885


In [6]:
#Create goal columns (x_t+1, y_t+1, z_t+1)

df['Mount_X_Next'] = df['Mount_X']
df['Mount_Y_Next'] = df['Mount_Y']
df['Mount_Yaw_Next'] = df['Mount_Z']

df['Mount_X_Next'].shift(-1)
df['Mount_Y_Next'].shift(-1)
df['Mount_Yaw_Next'].shift(-1)

df.head()

Unnamed: 0,Time,ID,Mount_X,Mount_Y,Mount_Z,Pos_x,Pos_y,Pos_z,Yaw,Pitch,...,Linear_Acceleration_Z_Relative_Relative,Rotational_Velocity_X_Relative,Rotational_Velocity_Y_Relative,Rotational_Velocity_Z_Relative,Rotational_Acceleration_X_Relative,Rotational_Acceleration_Y_Relative,Rotational_Acceleration_Z_Relative,Mount_X_Next,Mount_Y_Next,Mount_Yaw_Next
0,0 0,50,0,0,0,-0.110867,0.022535,0.01,0.0868,0.0868,...,1.67592,0.0,0.0,0.0,42.0139,6.07755,31.8614,0,0,0
1,0 0,51,0,0,0,0.08873,-0.073107,0.01,0.47689,0.47689,...,4.8049,0.0,0.0,0.0,16.1584,235.351,-110.122,0,0,0
2,0 0,48,0,0,0,-0.012243,-0.025526,0.01,1.2,1.2,...,1.34639,0.0,0.0,0.0,15.0759,30.6618,11.0086,0,0,0
3,0 0,49,0,0,0,0.036942,0.072364,0.01,0.5015,0.5015,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0
4,0 1000000,50,0,0,0,-0.113624,0.018111,0.041,-0.039995,-0.039995,...,-85.7701,-1.6969,-5.19367,0.206992,-1797.8,-5201.47,241.885,0,0,0


In [7]:
#Split into training and testing
df['is_train'] = np.random.uniform(0, 1, len(df)) <= .75
df.head()

Unnamed: 0,Time,ID,Mount_X,Mount_Y,Mount_Z,Pos_x,Pos_y,Pos_z,Yaw,Pitch,...,Rotational_Velocity_X_Relative,Rotational_Velocity_Y_Relative,Rotational_Velocity_Z_Relative,Rotational_Acceleration_X_Relative,Rotational_Acceleration_Y_Relative,Rotational_Acceleration_Z_Relative,Mount_X_Next,Mount_Y_Next,Mount_Yaw_Next,is_train
0,0 0,50,0,0,0,-0.110867,0.022535,0.01,0.0868,0.0868,...,0.0,0.0,0.0,42.0139,6.07755,31.8614,0,0,0,False
1,0 0,51,0,0,0,0.08873,-0.073107,0.01,0.47689,0.47689,...,0.0,0.0,0.0,16.1584,235.351,-110.122,0,0,0,True
2,0 0,48,0,0,0,-0.012243,-0.025526,0.01,1.2,1.2,...,0.0,0.0,0.0,15.0759,30.6618,11.0086,0,0,0,True
3,0 0,49,0,0,0,0.036942,0.072364,0.01,0.5015,0.5015,...,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,False
4,0 1000000,50,0,0,0,-0.113624,0.018111,0.041,-0.039995,-0.039995,...,-1.6969,-5.19367,0.206992,-1797.8,-5201.47,241.885,0,0,0,True


In [8]:
#Create dataframes based on split
train, test = df[df['is_train']==True], df[df['is_train']==False]
# Show the number of observations for the test and training dataframes
print('Number of observations in the training data:', len(train))
print('Number of observations in the test data:',len(test))

('Number of observations in the training data:', 728074)
('Number of observations in the test data:', 242310)


In [9]:
#print list of dtypes of each column (sanity check)
df.dtypes

Time                                        object
ID                                           int64
Mount_X                                      int64
Mount_Y                                      int64
Mount_Z                                      int64
Pos_x                                      float64
Pos_y                                      float64
Pos_z                                      float64
Yaw                                        float64
Pitch                                      float64
Roll                                       float64
Linear_Velocity_X_World                    float64
Linear_Velocity_Y_World                    float64
Linear_Velocity_Z_World                    float64
Linear_Acceleration_X_World                float64
Linear_Acceleration_Y_World                float64
Linear_Acceleration_Z_World                float64
Rotational_Velocity_X_World                float64
Rotational_Velocity_Y_World                float64
Rotational_Velocity_Z_World    

In [10]:
# Create a list of the feature column's names
features = df.columns[1:35]

# View features
features

Index([u'ID', u'Mount_X', u'Mount_Y', u'Mount_Z', u'Pos_x', u'Pos_y', u'Pos_z',
       u'Yaw', u'Pitch', u'Roll', u'Linear_Velocity_X_World',
       u'Linear_Velocity_Y_World', u'Linear_Velocity_Z_World',
       u'Linear_Acceleration_X_World', u'Linear_Acceleration_Y_World',
       u'Linear_Acceleration_Z_World', u'Rotational_Velocity_X_World',
       u'Rotational_Velocity_Y_World', u'Rotational_Velocity_Z_World',
       u'Rotational_Acceleration_X_World', u'Rotational_Acceleration_Y_World',
       u'Rotational_Acceleration_Z_World', u'Linear_Velocity_X_Relative',
       u'Linear_Velocity_Y_Relative', u'Linear_Velocity_Z_Relative',
       u'Linear_Acceleration_X_Relative', u'Linear_Acceleration_Y_Relative',
       u'Linear_Acceleration_Z_Relative_Relative',
       u'Rotational_Velocity_X_Relative', u'Rotational_Velocity_Y_Relative',
       u'Rotational_Velocity_Z_Relative',
       u'Rotational_Acceleration_X_Relative',
       u'Rotational_Acceleration_Y_Relative',
       u'Rotational_A

In [20]:
#Train the classifier
clf = RandomForestClassifier(n_jobs=1, verbose = 1)
clf.fit(train[features], train['Mount_X_Next'])

[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.6s finished


RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=1,
            warm_start=False)

In [21]:
# Apply the Classifier we trained to the test data 
preds= clf.predict(test[features])

[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s finished


In [22]:
# View the predicted probabilities of the first 10 observations
clf.predict_proba(test[features])[0:10]

[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s finished


array([[1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.]])

In [23]:
# Create confusion matrix
pd.crosstab(test['Mount_X_Next'], preds, rownames=['Actual Species'], colnames=['Predicted Species'])

Predicted Species,0
Actual Species,Unnamed: 1_level_1
0,242310


In [24]:
# View a list of the features and their importance scores
list(zip(train[features], clf.feature_importances_))

[('ID', 0.0),
 ('Mount_X', 0.0),
 ('Mount_Y', 0.0),
 ('Mount_Z', 0.0),
 ('Pos_x', 0.0),
 ('Pos_y', 0.0),
 ('Pos_z', 0.0),
 ('Yaw', 0.0),
 ('Pitch', 0.0),
 ('Roll', 0.0),
 ('Linear_Velocity_X_World', 0.0),
 ('Linear_Velocity_Y_World', 0.0),
 ('Linear_Velocity_Z_World', 0.0),
 ('Linear_Acceleration_X_World', 0.0),
 ('Linear_Acceleration_Y_World', 0.0),
 ('Linear_Acceleration_Z_World', 0.0),
 ('Rotational_Velocity_X_World', 0.0),
 ('Rotational_Velocity_Y_World', 0.0),
 ('Rotational_Velocity_Z_World', 0.0),
 ('Rotational_Acceleration_X_World', 0.0),
 ('Rotational_Acceleration_Y_World', 0.0),
 ('Rotational_Acceleration_Z_World', 0.0),
 ('Linear_Velocity_X_Relative', 0.0),
 ('Linear_Velocity_Y_Relative', 0.0),
 ('Linear_Velocity_Z_Relative', 0.0),
 ('Linear_Acceleration_X_Relative', 0.0),
 ('Linear_Acceleration_Y_Relative', 0.0),
 ('Linear_Acceleration_Z_Relative_Relative', 0.0),
 ('Rotational_Velocity_X_Relative', 0.0),
 ('Rotational_Velocity_Y_Relative', 0.0),
 ('Rotational_Velocity_Z_Rela