In [1]:
#!pip install scikit-learn

In [2]:
# Notebook to carry out predictions on the skateboard data dataset
# Iain McIntosh
# 10/23/2023

# import libraries
import pandas as pd
import numpy as np
import plotly.graph_objs as go
%matplotlib inline

from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestRegressor

# setfilename
filename = 'dftracks2000.csv'

# import dataset
dftracks = pd.read_csv(filename)

In [3]:
# print the first 5 rows
dftracks.head()

# print the shape
dftracks.shape

(8000, 5)

In [4]:
# rename 0 index column to index

dftracks.rename(columns={'Unnamed: 0':'index'}, inplace=True)
print(dftracks.columns)

Index(['index', 'frame', 'point', 'X', 'Y'], dtype='object')


In [5]:
# points are 0 = head, 1 = body, 2 = left wheel, 3 = right wheel

# remove point 3 from the dataset as it is not needed left wheel will be target
dftracks = dftracks[dftracks['point'] != 3]

In [6]:
# format to match the format of the training data

# remove Unnamed: 0 column
dftracks = dftracks.drop('index', axis=1)

# move point to the leftmost column
point = dftracks['point']
dftracks.drop(labels=['point'], axis=1, inplace=True)
dftracks.insert(0, 'point', point)

# remove X for simplicity - it is probably a great feature to use but it adds complexity
dftracks = dftracks.drop('X', axis=1)

# rename Y to target
dftracks.rename(columns={'Y':'target'}, inplace=True)

In [7]:
# print the shape
dftracks.shape

(6000, 3)

In [8]:
# change name to melt
melt = dftracks

# convert all fields to float
melt = melt.astype(float)

# set the columns to specify as id_vars
id_vars = ['point', 'frame', 'target']

# sort the melt
melt = melt.sort_values(by=['frame', 'point'])
# print the first 5 rows
melt.head()

# print the data types
melt.dtypes

# print the first 5 rows
melt.head()



Unnamed: 0,point,frame,target
0,0.0,0.0,-185.910416
1,1.0,0.0,-231.862278
2,2.0,0.0,-309.274549
4,0.0,1.0,-185.368552
5,1.0,1.0,-232.258315


In [9]:
# print the shape
melt.shape

(6000, 3)

In [10]:
# split the data

# how many total frames
total_frames = melt['frame'].nunique()

# set percentage split
split_percent = 0.8

# calculate split point
split_point = int(total_frames * split_percent)

melt_train = melt [melt['frame'] < split_point].copy()
melt_valid = melt [melt['frame'] >= split_point].copy()

# print the first 5 rows
print('Head of melt_train')
melt_train.head()

Head of melt_train


Unnamed: 0,point,frame,target
0,0.0,0.0,-185.910416
1,1.0,0.0,-231.862278
2,2.0,0.0,-309.274549
4,0.0,1.0,-185.368552
5,1.0,1.0,-232.258315


In [11]:
# print shape
print('Shape of melt_train')
melt_train.shape

Shape of melt_train


(4800, 3)

In [12]:
# setup a 1-step target

shifttarget = 1

#group data by product code
melt_train['target_next_frame'] = melt_train.groupby(['point'])['target'].shift(-shifttarget)

#print the first 5 rows
print(melt_train.head())

# Validate

melt_valid["target_next_frame"] = melt_valid.groupby(['point'])['target'].shift(-shifttarget)

# dropna
melt_train = melt_train.dropna()
melt_valid = melt_valid.dropna()

# print the first 5 rows
print(melt_valid.head())

# print the shape
print(melt_valid.shape)

   point  frame      target  target_next_frame
0    0.0    0.0 -185.910416        -185.368552
1    1.0    0.0 -231.862278        -232.258315
2    2.0    0.0 -309.274549        -308.638573
4    0.0    1.0 -185.368552        -185.618992
5    1.0    1.0 -232.258315        -232.514820
      point   frame      target  target_next_frame
6400    0.0  1600.0 -213.588896        -211.304483
6401    1.0  1600.0 -242.118244        -239.088836
6402    2.0  1600.0 -305.181456        -300.843573
6404    0.0  1601.0 -211.304483        -210.238667
6405    1.0  1601.0 -239.088836        -236.079111
(1197, 4)


In [13]:
melt_train = melt_train.dropna()
print(melt_train.head())
print(melt_train.tail())

   point  frame      target  target_next_frame
0    0.0    0.0 -185.910416        -185.368552
1    1.0    0.0 -231.862278        -232.258315
2    2.0    0.0 -309.274549        -308.638573
4    0.0    1.0 -185.368552        -185.618992
5    1.0    1.0 -232.258315        -232.514820
      point   frame      target  target_next_frame
6389    1.0  1597.0 -254.154367        -249.709654
6390    2.0  1597.0 -314.927359        -311.881170
6392    0.0  1598.0 -221.666164        -217.813482
6393    1.0  1598.0 -249.709654        -246.200695
6394    2.0  1598.0 -311.881170        -308.861876


In [14]:
# add future shifted diff feature

# Train

melt_train["diff_frame"] = melt_train.groupby(['point'])['target'].diff(-shifttarget)

# dropna
melt_train = melt_train.dropna()

# print the first 5 rows
print(melt_train.head())

# print the shape
print(melt_train.shape)

# Validate

melt_valid["diff_frame"] = melt_valid.groupby(['point'])['target'].diff(-shifttarget)

# dropna
melt_valid = melt_valid.dropna()

# print the first 5 rows
print(melt_valid.head())

# print the shape
print(melt_valid.shape)

   point  frame      target  target_next_frame  diff_frame
0    0.0    0.0 -185.910416        -185.368552   -0.541863
1    1.0    0.0 -231.862278        -232.258315    0.396037
2    2.0    0.0 -309.274549        -308.638573   -0.635977
4    0.0    1.0 -185.368552        -185.618992    0.250440
5    1.0    1.0 -232.258315        -232.514820    0.256505
(4794, 5)
      point   frame      target  target_next_frame  diff_frame
6400    0.0  1600.0 -213.588896        -211.304483   -2.284412
6401    1.0  1600.0 -242.118244        -239.088836   -3.029408
6402    2.0  1600.0 -305.181456        -300.843573   -4.337883
6404    0.0  1601.0 -211.304483        -210.238667   -1.065817
6405    1.0  1601.0 -239.088836        -236.079111   -3.009725
(1194, 5)


In [15]:
# set the columns to specify as id_vars
id_vars = ['point', 'frame', 'target','target_next_frame', 'diff_frame']

In [16]:
# true / false mask for NA values in the data assume point cannot be used

def mape(y_true, y_pred):
    ape = np.abs((y_true - y_pred) / y_true)
    ape[~np.isfinite(ape)] = 1
    return np.mean(ape)

def wmape(y_true, y_pred):
    return np.sum(np.abs(y_true - y_pred)) / np.sum(np.abs(y_true))

In [17]:
y_pred = melt_train['target']
y_true = melt_train['target_next_frame']

In [18]:
mape(y_true, y_pred)

0.0069408553135016605

In [19]:
wmape(y_true, y_pred)

0.006576587535903874

In [20]:
# train the model

melt_train.head(5)

# set the features

features = ['target', 'target_next_frame', 'diff_frame']

# print the first 5 rows
melt_train.head()

Unnamed: 0,point,frame,target,target_next_frame,diff_frame
0,0.0,0.0,-185.910416,-185.368552,-0.541863
1,1.0,0.0,-231.862278,-232.258315,0.396037
2,2.0,0.0,-309.274549,-308.638573,-0.635977
4,0.0,1.0,-185.368552,-185.618992,0.25044
5,1.0,1.0,-232.258315,-232.51482,0.256505


In [21]:
# imputer

imputer = SimpleImputer()

Xtr = imputer.fit_transform(melt_train[features])
ytr = melt_train['target_next_frame']

# training the model

mdl = RandomForestRegressor(n_estimators=100, n_jobs=6, random_state=0)
mdl.fit(Xtr, ytr)

In [22]:
# evaluate the model

# print features type
print(type(features))

Xval = imputer.transform(melt_valid[features])
yval = melt_valid['target_next_frame']

p = mdl.predict(Xval)

<class 'list'>


In [23]:
# print first 5 predictions
print('First 5 predictions')
print(p[0:5])

# print first 5 actuals
print('First 5 actuals')
print(yval[0:5])


First 5 predictions
[-211.28720984 -239.11399155 -300.8415184  -210.23441834 -236.08388572]
First 5 actuals
6400   -211.304483
6401   -239.088836
6402   -300.843573
6404   -210.238667
6405   -236.079111
Name: target_next_frame, dtype: float64


In [24]:
mape(yval, p)

0.00023072130080197274

In [25]:
wmape(yval, p)

0.00016825257256132003

In [26]:
# draw a graph of frame on x and p and yval on y as a scatter plot

# create a dataframe of frame and p
df = pd.DataFrame({'frame': melt_valid['frame'], 'p': p, 'yval': yval})

# print the first 5 rows
print(df.head())

# plot frame as x and p and yval as y series

# create a trace
trace1 = go.Scatter(
    x = df['frame'],
    y = df['p'],
    mode = 'markers',
    name = 'Predictions'
)

# create a trace
trace2 = go.Scatter(
    x = df['frame'],
    y = df['yval'],
    mode = 'markers',
    name = 'Actuals'
)

# create a data list
data = [trace1, trace2]

# create a layout
layout = go.Layout(
    title = 'Predictions vs Actuals for all points 1 frame ahead',
    xaxis = dict(title = 'Frame'),
    yaxis = dict(title = 'Target')
)

# create a figure
fig = go.Figure(data=data, layout=layout)

# plot the figure
fig.show()




       frame           p        yval
6400  1600.0 -211.287210 -211.304483
6401  1600.0 -239.113992 -239.088836
6402  1600.0 -300.841518 -300.843573
6404  1601.0 -210.234418 -210.238667
6405  1601.0 -236.083886 -236.079111


In [27]:
# extend the model to predict 1 second or 24 frames into the future

frames_into_future = 100

melt_train['target_next24frames'] = melt_train.groupby("point")['target'].shift(-frames_into_future)
melt_valid['target_next24frames'] = melt_valid.groupby("point")['target'].shift(-frames_into_future)

print(melt_train[melt_train['point'] == 1].head())

# remove NA values

melt_train = melt_train.dropna()

    point  frame      target  target_next_frame  diff_frame   
1     1.0    0.0 -231.862278        -232.258315    0.396037  \
5     1.0    1.0 -232.258315        -232.514820    0.256505   
9     1.0    2.0 -232.514820        -232.880330    0.365510   
13    1.0    3.0 -232.880330        -233.168149    0.287819   
17    1.0    4.0 -233.168149        -233.672247    0.504098   

    target_next24frames  
1           -270.266676  
5           -269.234161  
9           -268.574123  
13          -265.701971  
17          -263.387718  


In [28]:
# set the features
features = ['target', 'target_next_frame', 'diff_frame', 'target_next24frames']

imputer = SimpleImputer()
Xtr = imputer.fit_transform(melt_train[features])
ytr = melt_train[['target_next_frame', 'target_next24frames']]

mdl = RandomForestRegressor(n_estimators=100, random_state=0, n_jobs=6)
mdl.fit(Xtr, ytr)

In [29]:
Xval = imputer.transform(melt_valid[features])
yval = melt_valid[['target_next_frame', 'target_next24frames']]

p = mdl.predict(Xval)

In [30]:
mape(yval, p)

0.1296189166418723

In [31]:
wmape(yval, p)

target_next_frame      0.005253
target_next24frames    0.003465
dtype: float64

In [32]:
melt_valid.tail()

Unnamed: 0,point,frame,target,target_next_frame,diff_frame,target_next24frames
7985,1.0,1996.0,-211.272225,-211.103783,-0.168443,
7986,2.0,1996.0,-281.898823,-284.530106,2.631283,
7988,0.0,1997.0,-175.300384,-176.753268,1.452885,
7989,1.0,1997.0,-211.103783,-211.909933,0.80615,
7990,2.0,1997.0,-284.530106,-287.41313,2.883024,


In [33]:
# create new dataframe from melt_valid

print(melt_valid.head())

new_examples = melt_valid

# print the first 5 rows
print(new_examples.head())

# remove NA from new_examples
new_examples = new_examples.dropna()

      point   frame      target  target_next_frame  diff_frame   
6400    0.0  1600.0 -213.588896        -211.304483   -2.284412  \
6401    1.0  1600.0 -242.118244        -239.088836   -3.029408   
6402    2.0  1600.0 -305.181456        -300.843573   -4.337883   
6404    0.0  1601.0 -211.304483        -210.238667   -1.065817   
6405    1.0  1601.0 -239.088836        -236.079111   -3.009725   

      target_next24frames  
6400          -182.165051  
6401          -219.978046  
6402          -283.322411  
6404          -185.022826  
6405          -223.785195  
      point   frame      target  target_next_frame  diff_frame   
6400    0.0  1600.0 -213.588896        -211.304483   -2.284412  \
6401    1.0  1600.0 -242.118244        -239.088836   -3.029408   
6402    2.0  1600.0 -305.181456        -300.843573   -4.337883   
6404    0.0  1601.0 -211.304483        -210.238667   -1.065817   
6405    1.0  1601.0 -239.088836        -236.079111   -3.009725   

      target_next24frames  
6400      

In [34]:
p = mdl.predict(new_examples[features])
print(p)

[[-215.04644251 -184.47936244]
 [-239.4377213  -223.38535523]
 [-300.71989403 -283.16890583]
 ...
 [-218.35643907 -175.50353508]
 [-246.06068945 -213.18334394]
 [-299.32414227 -284.82327719]]



X has feature names, but RandomForestRegressor was fitted without feature names



In [35]:
new_examples['pred_target_next_frame'] = p[:, 0]
new_examples['pred_target_next24frames'] = p[:, 1]

new_examples.head()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,point,frame,target,target_next_frame,diff_frame,target_next24frames,pred_target_next_frame,pred_target_next24frames
6400,0.0,1600.0,-213.588896,-211.304483,-2.284412,-182.165051,-215.046443,-184.479362
6401,1.0,1600.0,-242.118244,-239.088836,-3.029408,-219.978046,-239.437721,-223.385355
6402,2.0,1600.0,-305.181456,-300.843573,-4.337883,-283.322411,-300.719894,-283.168906
6404,0.0,1601.0,-211.304483,-210.238667,-1.065817,-185.022826,-212.957914,-186.879476
6405,1.0,1601.0,-239.088836,-236.079111,-3.009725,-223.785195,-236.154473,-225.215475


In [36]:
# plot new_examples dataframe as a scatter plot
# frame is the x axis
# on the y-axis plot_target_next24frames and pred_target_next24frame

# create a trace
trace1 = go.Scatter(
    x = new_examples['frame'],
    y = new_examples['target_next24frames'],
    mode = 'markers',
    name = 'Actuals'
)

# create a trace
trace2 = go.Scatter(
    x = new_examples['frame'],
    y = new_examples['pred_target_next24frames'],
    mode = 'markers',
    name = 'Predictions'
)

# create a data list
data = [trace1, trace2]

# create a layout
layout = go.Layout(
    title = 'Predictions vs Actuals for all points ' +  str(frames_into_future) + ' frames ahead',
    xaxis = dict(title = 'Frame'),
    yaxis = dict(title = 'Target')
)

# create a figure
fig = go.Figure(data=data, layout=layout)

# plot the figure
fig.show()