In [25]:
#!/usr/bin/env python
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
%load_ext version_information
%version_information numpy, pandas, matplotlib
%version_information
fig_prefix = "../figures/2018-07-22-jw-weaselball-heatmap_"
data_prefix = "../data/2018-07-22-jw-weaselball-heatmap_"


The version_information extension is already loaded. To reload it, use:
  %reload_ext version_information


In [26]:
df = pd.read_csv('../data/collection.csv', index_col=False)
df.head()


Unnamed: 0,Time,ID,Mount_X,Mount_Y,Mount_Yaw,Pos_x,Pos_y,Pos_z,Yaw,Pitch,...,Linear_Velocity_Z_Relative,Linear_Acceleration_X_Relative,Linear_Acceleration_Y_Relative,Linear_Acceleration_Z_Relative_Relative,Rotational_Velocity_X_Relative,Rotational_Velocity_Y_Relative,Rotational_Velocity_Z_Relative,Rotational_Acceleration_X_Relative,Rotational_Acceleration_Y_Relative,Rotational_Acceleration_Z_Relative
0,0 0,48,0.0,0.0,0.0,-0.012243,-0.025526,0.01,1.2,1.2,...,0.0,0.023433,-0.578691,1.26987,0.0,0.0,0.0,14.0356,28.9883,11.533
1,0 0,49,0.0,0.0,0.0,0.036942,0.072364,0.01,0.5015,0.5015,...,0.0,27.5628,-10.7006,-39.5994,0.0,0.0,0.0,-343.126,-1903.24,927.512
2,0 0,50,0.0,0.0,0.0,-0.110867,0.022535,0.01,0.0868,0.0868,...,0.0,-1.80407,-0.274777,1.31604,0.0,0.0,0.0,36.6215,-37.7046,41.7151
3,0 0,51,0.0,0.0,0.0,0.08873,-0.073107,0.01,0.47689,0.47689,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0 1000000,48,0.0,0.0,-4.04756e-21,-0.009433,-0.02605,0.041,1.1986,1.1986,...,-0.104303,-20.0471,5.02931,-104.303,-0.331358,-4.25314,-0.37465,-320.43,-4271.88,-366.004


In [27]:
#Clean x,y data
df_clean = df.copy()
#Clean up the data

#Shift X and Y over so that way it could be made easier to use. Currently the world reference is at the center of the "play area"
LENGTH_OF_BOX = 1.127 #This can be obtained from the .sdf file of the weazelball enclosure in gazebo

df_clean['Mount_X'] += LENGTH_OF_BOX / 2
df_clean['Mount_Y'] += LENGTH_OF_BOX / 2
df_clean.head()

Unnamed: 0,Time,ID,Mount_X,Mount_Y,Mount_Yaw,Pos_x,Pos_y,Pos_z,Yaw,Pitch,...,Linear_Velocity_Z_Relative,Linear_Acceleration_X_Relative,Linear_Acceleration_Y_Relative,Linear_Acceleration_Z_Relative_Relative,Rotational_Velocity_X_Relative,Rotational_Velocity_Y_Relative,Rotational_Velocity_Z_Relative,Rotational_Acceleration_X_Relative,Rotational_Acceleration_Y_Relative,Rotational_Acceleration_Z_Relative
0,0 0,48,0.5635,0.5635,0.0,-0.012243,-0.025526,0.01,1.2,1.2,...,0.0,0.023433,-0.578691,1.26987,0.0,0.0,0.0,14.0356,28.9883,11.533
1,0 0,49,0.5635,0.5635,0.0,0.036942,0.072364,0.01,0.5015,0.5015,...,0.0,27.5628,-10.7006,-39.5994,0.0,0.0,0.0,-343.126,-1903.24,927.512
2,0 0,50,0.5635,0.5635,0.0,-0.110867,0.022535,0.01,0.0868,0.0868,...,0.0,-1.80407,-0.274777,1.31604,0.0,0.0,0.0,36.6215,-37.7046,41.7151
3,0 0,51,0.5635,0.5635,0.0,0.08873,-0.073107,0.01,0.47689,0.47689,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0 1000000,48,0.5635,0.5635,-4.04756e-21,-0.009433,-0.02605,0.041,1.1986,1.1986,...,-0.104303,-20.0471,5.02931,-104.303,-0.331358,-4.25314,-0.37465,-320.43,-4271.88,-366.004


In [28]:
#Discretize x,y
NUMBER_OF_SQUARES = 100 #This should be a square number to create equal sized squares.
RESOLUTION_OF_S1 = 0.1 #This is used to discretize the yaw angle over 0 - 2*pi


df_discretized = df_clean.copy()
mappingBoxConstant = (NUMBER_OF_SQUARES ** (1/2.0)) / (LENGTH_OF_BOX)
for index, row in df_clean.iterrows():
    df_discretized.at[index, 'Mount_X'] = int(row['Mount_X'] * mappingBoxConstant)
    df_discretized.at[index, 'Mount_Y'] = int(row['Mount_Y'] * mappingBoxConstant)

df_discretized.head()

Unnamed: 0,Time,ID,Mount_X,Mount_Y,Mount_Yaw,Pos_x,Pos_y,Pos_z,Yaw,Pitch,...,Linear_Velocity_Z_Relative,Linear_Acceleration_X_Relative,Linear_Acceleration_Y_Relative,Linear_Acceleration_Z_Relative_Relative,Rotational_Velocity_X_Relative,Rotational_Velocity_Y_Relative,Rotational_Velocity_Z_Relative,Rotational_Acceleration_X_Relative,Rotational_Acceleration_Y_Relative,Rotational_Acceleration_Z_Relative
0,0 0,48,5.0,5.0,0.0,-0.012243,-0.025526,0.01,1.2,1.2,...,0.0,0.023433,-0.578691,1.26987,0.0,0.0,0.0,14.0356,28.9883,11.533
1,0 0,49,5.0,5.0,0.0,0.036942,0.072364,0.01,0.5015,0.5015,...,0.0,27.5628,-10.7006,-39.5994,0.0,0.0,0.0,-343.126,-1903.24,927.512
2,0 0,50,5.0,5.0,0.0,-0.110867,0.022535,0.01,0.0868,0.0868,...,0.0,-1.80407,-0.274777,1.31604,0.0,0.0,0.0,36.6215,-37.7046,41.7151
3,0 0,51,5.0,5.0,0.0,0.08873,-0.073107,0.01,0.47689,0.47689,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0 1000000,48,5.0,5.0,-4.04756e-21,-0.009433,-0.02605,0.041,1.1986,1.1986,...,-0.104303,-20.0471,5.02931,-104.303,-0.331358,-4.25314,-0.37465,-320.43,-4271.88,-366.004


In [29]:
#Create goal columns (x_t+1, y_t+1, z_t+1)

df_discretized['Mount_X_Next'] = df_discretized['Mount_X']
df_discretized['Mount_Y_Next'] = df_discretized['Mount_Y']
df_discretized['Mount_Yaw_Next'] = df_discretized['Mount_Yaw']

df_discretized['Mount_X_Next'].shift(-1)
df_discretized['Mount_Y_Next'].shift(-1)
df_discretized['Mount_Yaw_Next'].shift(-1)

df_discretized.drop(df_discretized.index[len(df_discretized)-1])
df_discretized.head()

Unnamed: 0,Time,ID,Mount_X,Mount_Y,Mount_Yaw,Pos_x,Pos_y,Pos_z,Yaw,Pitch,...,Linear_Acceleration_Z_Relative_Relative,Rotational_Velocity_X_Relative,Rotational_Velocity_Y_Relative,Rotational_Velocity_Z_Relative,Rotational_Acceleration_X_Relative,Rotational_Acceleration_Y_Relative,Rotational_Acceleration_Z_Relative,Mount_X_Next,Mount_Y_Next,Mount_Yaw_Next
0,0 0,48,5.0,5.0,0.0,-0.012243,-0.025526,0.01,1.2,1.2,...,1.26987,0.0,0.0,0.0,14.0356,28.9883,11.533,5.0,5.0,0.0
1,0 0,49,5.0,5.0,0.0,0.036942,0.072364,0.01,0.5015,0.5015,...,-39.5994,0.0,0.0,0.0,-343.126,-1903.24,927.512,5.0,5.0,0.0
2,0 0,50,5.0,5.0,0.0,-0.110867,0.022535,0.01,0.0868,0.0868,...,1.31604,0.0,0.0,0.0,36.6215,-37.7046,41.7151,5.0,5.0,0.0
3,0 0,51,5.0,5.0,0.0,0.08873,-0.073107,0.01,0.47689,0.47689,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,5.0,0.0
4,0 1000000,48,5.0,5.0,-4.04756e-21,-0.009433,-0.02605,0.041,1.1986,1.1986,...,-104.303,-0.331358,-4.25314,-0.37465,-320.43,-4271.88,-366.004,5.0,5.0,-4.04756e-21


In [23]:
#one-hot encode the ID

In [30]:
#Split into training and testing
df_discretized['is_train'] = np.random.uniform(0, 1, len(df)) <= .75
df_discretized.head()

Unnamed: 0,Time,ID,Mount_X,Mount_Y,Mount_Yaw,Pos_x,Pos_y,Pos_z,Yaw,Pitch,...,Rotational_Velocity_X_Relative,Rotational_Velocity_Y_Relative,Rotational_Velocity_Z_Relative,Rotational_Acceleration_X_Relative,Rotational_Acceleration_Y_Relative,Rotational_Acceleration_Z_Relative,Mount_X_Next,Mount_Y_Next,Mount_Yaw_Next,is_train
0,0 0,48,5.0,5.0,0.0,-0.012243,-0.025526,0.01,1.2,1.2,...,0.0,0.0,0.0,14.0356,28.9883,11.533,5.0,5.0,0.0,False
1,0 0,49,5.0,5.0,0.0,0.036942,0.072364,0.01,0.5015,0.5015,...,0.0,0.0,0.0,-343.126,-1903.24,927.512,5.0,5.0,0.0,True
2,0 0,50,5.0,5.0,0.0,-0.110867,0.022535,0.01,0.0868,0.0868,...,0.0,0.0,0.0,36.6215,-37.7046,41.7151,5.0,5.0,0.0,True
3,0 0,51,5.0,5.0,0.0,0.08873,-0.073107,0.01,0.47689,0.47689,...,0.0,0.0,0.0,0.0,0.0,0.0,5.0,5.0,0.0,True
4,0 1000000,48,5.0,5.0,-4.04756e-21,-0.009433,-0.02605,0.041,1.1986,1.1986,...,-0.331358,-4.25314,-0.37465,-320.43,-4271.88,-366.004,5.0,5.0,-4.04756e-21,False


In [31]:
#Create dataframes based on split
train, test = df_discretized[df_discretized['is_train']==True], df_discretized[df_discretized['is_train']==False]
# Show the number of observations for the test and training dataframes
print('Number of observations in the training data:', len(train))
print('Number of observations in the test data:',len(test))

('Number of observations in the training data:', 602014)
('Number of observations in the test data:', 200134)


In [32]:
#print list of information of each column (sanity check)
df_discretized.describe()

Unnamed: 0,ID,Mount_X,Mount_Y,Mount_Yaw,Pos_x,Pos_y,Pos_z,Yaw,Pitch,Roll,...,Linear_Acceleration_Z_Relative_Relative,Rotational_Velocity_X_Relative,Rotational_Velocity_Y_Relative,Rotational_Velocity_Z_Relative,Rotational_Acceleration_X_Relative,Rotational_Acceleration_Y_Relative,Rotational_Acceleration_Z_Relative,Mount_X_Next,Mount_Y_Next,Mount_Yaw_Next
count,802148.0,802148.0,802148.0,802148.0,802148.0,802148.0,802148.0,802148.0,802148.0,802148.0,...,802148.0,802148.0,802148.0,802148.0,802148.0,802148.0,802148.0,802148.0,802148.0,802148.0
mean,49.5,4.002054,4.000199,0.823019,-0.016842,-0.073193,0.041,-0.014942,-0.014942,0.003067,...,0.061054,0.038016,-1.739983,-0.621639,-0.023886,-0.433075,-0.059419,4.002054,4.000199,0.823019
std,1.118035,0.04528,0.014122,0.121607,0.075979,0.05983,6.9e-05,0.355916,0.355916,1.814801,...,3.935653,1.476612,1.246721,1.577533,79.142703,114.08707,80.230471,0.04528,0.014122,0.121607
min,48.0,4.0,4.0,-0.011652,-0.148693,-0.176816,0.01,-1.51857,-1.51857,-3.14159,...,-390.532,-8.69311,-11.3207,-10.5481,-5308.28,-8986.25,-4673.62,4.0,4.0,-0.011652
25%,48.75,4.0,4.0,0.782846,-0.081025,-0.118614,0.041,-0.224346,-0.224346,-1.56684,...,-1.57342,-0.7864,-2.40493,-1.428573,-26.497625,-36.4396,-23.385975,4.0,4.0,0.782846
50%,49.5,4.0,4.0,0.850708,-0.032201,-0.074804,0.041,0.013392,0.013392,0.004818,...,0.012268,0.020535,-2.005415,-0.520857,-1.36099,-0.59142,-0.279429,4.0,4.0,0.850708
75%,50.25,4.0,4.0,0.916945,0.044804,-0.021552,0.041,0.213985,0.213985,1.574613,...,1.5981,0.915959,-0.88215,0.32462,24.215025,35.5315,23.3469,4.0,4.0,0.916945
max,51.0,5.0,5.0,1.0295,0.122253,0.081919,0.041426,1.49133,1.49133,3.14159,...,225.912,10.1539,10.7698,10.1,3254.11,7376.6,5321.93,5.0,5.0,1.0295


In [33]:
# Create a list of the feature column's names
features = df_discretized.columns[1:35]

# View features
features

Index([u'ID', u'Mount_X', u'Mount_Y', u'Mount_Yaw', u'Pos_x', u'Pos_y',
       u'Pos_z', u'Yaw', u'Pitch', u'Roll', u'Linear_Velocity_X_World',
       u'Linear_Velocity_Y_World', u'Linear_Velocity_Z_World',
       u'Linear_Acceleration_X_World', u'Linear_Acceleration_Y_World',
       u'Linear_Acceleration_Z_World', u'Rotational_Velocity_X_World',
       u'Rotational_Velocity_Y_World', u'Rotational_Velocity_Z_World',
       u'Rotational_Acceleration_X_World', u'Rotational_Acceleration_Y_World',
       u'Rotational_Acceleration_Z_World', u'Linear_Velocity_X_Relative',
       u'Linear_Velocity_Y_Relative', u'Linear_Velocity_Z_Relative',
       u'Linear_Acceleration_X_Relative', u'Linear_Acceleration_Y_Relative',
       u'Linear_Acceleration_Z_Relative_Relative',
       u'Rotational_Velocity_X_Relative', u'Rotational_Velocity_Y_Relative',
       u'Rotational_Velocity_Z_Relative',
       u'Rotational_Acceleration_X_Relative',
       u'Rotational_Acceleration_Y_Relative',
       u'Rotational

In [38]:
#Train the classifier
clf = RandomForestClassifier(n_jobs=-1, verbose = 1)
clf.fit(train[features], train['Mount_X_Next'])

[Parallel(n_jobs=-1)]: Done   6 out of  10 | elapsed:    2.4s remaining:    1.6s
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:    3.4s finished


RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=-1,
            oob_score=False, random_state=None, verbose=1,
            warm_start=False)

In [39]:
# Apply the Classifier we trained to the test data 
preds= clf.predict(test[features])

[Parallel(n_jobs=8)]: Done   6 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=8)]: Done  10 out of  10 | elapsed:    0.0s finished


In [40]:
# View the predicted probabilities of the first 10 observations
clf.predict_proba(test[features])[0:10]

[Parallel(n_jobs=8)]: Done   6 out of  10 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=8)]: Done  10 out of  10 | elapsed:    0.0s finished


array([[0. , 1. ],
       [0.1, 0.9],
       [0. , 1. ],
       [0. , 1. ],
       [0. , 1. ],
       [0. , 1. ],
       [0. , 1. ],
       [0. , 1. ],
       [0.1, 0.9],
       [0. , 1. ]])

In [41]:
# Create confusion matrix
pd.crosstab(test['Mount_X_Next'], preds, rownames=['Actual Species'], colnames=['Predicted Species'])

Predicted Species,4.0,5.0
Actual Species,Unnamed: 1_level_1,Unnamed: 2_level_1
4.0,199741,0
5.0,0,393


In [42]:
# View a list of the features and their importance scores
list(zip(train[features], clf.feature_importances_))

[('ID', 0.0009665704155942106),
 ('Mount_X', 0.6275507754647671),
 ('Mount_Y', 0.02731782808666657),
 ('Mount_Yaw', 0.1103825773448234),
 ('Pos_x', 0.015647197388796892),
 ('Pos_y', 0.009749489787305677),
 ('Pos_z', 0.0015296893523177317),
 ('Yaw', 0.05484189660446007),
 ('Pitch', 0.02798901593101783),
 ('Roll', 0.014887470205248965),
 ('Linear_Velocity_X_World', 0.0051829179137507135),
 ('Linear_Velocity_Y_World', 0.013830285926463487),
 ('Linear_Velocity_Z_World', 0.00013591251083640993),
 ('Linear_Acceleration_X_World', 0.00037324934219456606),
 ('Linear_Acceleration_Y_World', 0.002216184429118267),
 ('Linear_Acceleration_Z_World', 7.173001382583407e-05),
 ('Rotational_Velocity_X_World', 0.016300857616531807),
 ('Rotational_Velocity_Y_World', 0.012517428233013637),
 ('Rotational_Velocity_Z_World', 0.011167566157900813),
 ('Rotational_Acceleration_X_World', 6.998161272509284e-07),
 ('Rotational_Acceleration_Y_World', 0.0024055146508218735),
 ('Rotational_Acceleration_Z_World', 0.0008