In [24]:
import tensorflow as tf
import os
import pandas as pd
import strym
from strym import strymread
import matplotlib.pyplot as plt
from pylab import rcParams
import strym.DBC_Read_Tools as dbc
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_validate

# Load in Data
I load in data from a .csv file with the columns `['Time', 'Bus', 'MessageID', 'Message', 'MessageLength', 'Latitude', 'Longitude']`. The columns for the training data are `['Time', 'Bus', 'MessageID', 'Message', 'MessageLength']` while the columns for the labels are `['Time', 'Latitude', 'Longitude']`.

In [25]:
data_path = '../2020-12-02-00-00-49_5FNYF6H05HB089022_CAN_Messages.csv'

In [26]:
r = strymread(csvfile=data_path)

[2021_07_09_18_10_58] (root) INFO: Vehicle model infered is honda-pilot-2017


In [27]:
debug_num_samples = 2000
r_dataframe = r.dataframe[:debug_num_samples]

features = ['Time', 'Bus', 'MessageID', 'Message', 'MessageLength']
labels = ['Time', 'Latitude', 'Longitude']
can_data_df = r_dataframe[features][:]
labels_df = r_dataframe[labels][:]

The next cell is used to convert the Messages from strings representing hexadecimal values to integers.

In [28]:
for i, message in enumerate(can_data_df['Message']):
    can_data_df['Message'][i] = int(message, 16)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
data = np.array(can_data_df)
labels = np.array(labels_df)
print(data.shape, labels.shape)

The cell below changes the values in the 'Latitude' and 'Longitude' columns from absolute latitude and longitude to the change in latitude and longitude, respectively.

In [31]:
time = labels_df['Time'][:-1]
lat = labels_df['Latitude']
long = labels_df['Longitude']
labels_df['Latitude'] = [0].append([(lat[i+1] - lat[i]) for i in range((len(lat))-1)])
labels_df['Longitude'] = [0].append([(long[i+1] - long[i]) for i in range((len(long))-1)])

This cell creates a window that slides over the data, creating subsamples that provide the model with context of previous changes in latitude and longitude.

In [32]:
sequenceLength = 5

stackedData = []
stackedLabels = []

# split can_data into subsampled sequences
for i in range(debug_num_samples-sequenceLength):
    stackedData.append(data[i:i+sequenceLength])
    stackedLabels.append(labels[i:i+sequenceLength])
    
stackedData = np.array(stackedData)
stackedLabels = np.array(stackedLabels)
print(stackedData.shape, stackedLabels.shape)

(1995, 5, 5) (1995, 5, 3)


In [33]:
stackedData = stackedData.reshape((stackedData.shape[0], stackedData.shape[1]*stackedData.shape[2]))
stackedLabels = stackedLabels.reshape((stackedLabels.shape[0], stackedLabels.shape[1]*stackedLabels.shape[2]))
print(stackedData.shape, stackedLabels.shape)

(1995, 25) (1995, 15)


# The Model

In [34]:
model = RandomForestRegressor(n_estimators=data.shape[0]//3, max_depth=50)

In [35]:
model.fit(data, labels)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=50, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=666, n_jobs=None, oob_score=False,
                      random_state=None, verbose=0, warm_start=False)

# Results

In [36]:
predictions = model.predict(data)
print(predictions)
# for i, p in enumerate(predictions[100:300]):
#     print('Sample', i+1, '=='*27)
#     print('Prediction:  ', p)
#     print('Ground truth:', labels[i])
print(labels)

[[ 1.60686725e+09  3.22956133e+01 -1.10824048e+02]
 [ 1.60686725e+09  3.22956133e+01 -1.10824048e+02]
 [ 1.60686725e+09  3.22956133e+01 -1.10824048e+02]
 ...
 [ 1.60686725e+09  3.22956133e+01 -1.10824048e+02]
 [ 1.60686725e+09  3.22956133e+01 -1.10824048e+02]
 [ 1.60686725e+09  3.22956133e+01 -1.10824048e+02]]
[[ 1.60686725e+09  3.22956488e+01 -1.10824048e+02]
 [ 1.60686725e+09  3.22956488e+01 -1.10824048e+02]
 [ 1.60686725e+09  3.22956488e+01 -1.10824048e+02]
 ...
 [ 1.60686725e+09  3.22955852e+01 -1.10824049e+02]
 [ 1.60686725e+09  3.22955852e+01 -1.10824049e+02]
 [ 1.60686725e+09  3.22955852e+01 -1.10824049e+02]]


In [37]:
model.score(data, labels)





-3.2520114464120584e-06

In [50]:
cv = cross_validate(model, data, labels)
print(cv)











{'fit_time': array([0.84782505, 0.82456708, 0.80318308, 0.79720712, 0.87325907]), 'score_time': array([0.04675102, 0.04658103, 0.04719019, 0.04668689, 0.06040502]), 'test_score': array([-8.45646096e+01, -1.62660031e+01, -6.77400807e-03, -1.82105436e+01,
       -8.41358301e+01])}




