In [91]:
import tensorflow as tf
import os
import pandas as pd
import strym
from strym import strymread
import matplotlib.pyplot as plt
from pylab import rcParams
import strym.DBC_Read_Tools as dbc
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_validate

# Load in Data
I load in data from a .csv file with the columns `['Time', 'Bus', 'MessageID', 'Message', 'MessageLength', 'Latitude', 'Longitude']`. The columns for the training data are `['Time', 'Bus', 'MessageID', 'Message', 'MessageLength']` while the columns for the labels are `['Time', 'Latitude', 'Longitude']`.

In [92]:
use_decoded = True
apply_window = True

In [93]:
data_path = ''
df = None
if use_decoded:
    data_path = 'outputs/'
    combined_decoded_dataframes = [pd.read_csv(data_path + '/' + file) for file in os.listdir(data_path)]
    df = pd.concat(combined_decoded_dataframes)
    df.describe
else:
    data_path = '../encoded_data.csv'
    df = strymread(csvfile=data_path).dataframe
    df.describe

In [114]:
debug_num_samples = 8000
r = df[:debug_num_samples]
test_num_samples = 2000
test_df = df[-test_num_samples:]

features = []
if use_decoded:
    features = ['Time', 'Speed', 'LatAcceleration', 'LongAcceleration', 'ZAcceleration', 'YawRate', 'SteerTorque', 'SteerRate',
                'SteerAngle', 'SteerFraction', 'FLWheelSpeed', 'FRWheelSpeed', 'RRWheelSpeed', 'RLWheelSpeed']
else:
    features = ['Time', 'Bus', 'MessageID', 'Message', 'MessageLength']
    
labels = ['Time', 'Latitude', 'Longitude']

can_data_df = r[features][:]
labels_df = r[labels][:]

test_data_df = test_df[features][:].reset_index(drop=True)
test_labels_df = test_df[labels][:].reset_index(drop=True)

print(test_data_df.shape, test_labels_df.shape)

(2000, 14) (2000, 3)


The next cell is used to convert the Messages from strings representing hexadecimal values to integers.

In [115]:
if not use_decoded:
    for i, message in enumerate(can_data_df['Message']):
        can_data_df['Message'][i] = int(message, 16)
        test_data_df['Message'][i] = int(test_data_df['Message'][i], 16)

The cell below changes the values in the 'Latitude' and 'Longitude' columns from absolute latitude and longitude to the change in latitude and longitude, respectively.

In [116]:
time = labels_df['Time'][:-1]

lat = [0]
long = [0]
test_lat = [0]
test_long = [0]

for i in range(len(labels_df['Latitude']) - 1):
    lat.append(labels_df['Latitude'][i+1] - labels_df['Latitude'][i])
    long.append(labels_df['Longitude'][i+1] - labels_df['Longitude'][i])
    
for i in range(len(test_labels_df['Latitude']) - 1):
    test_lat.append(test_labels_df['Latitude'][i+1] - test_labels_df['Latitude'][i])
    test_long.append(test_labels_df['Longitude'][i+1] - test_labels_df['Longitude'][i])


labels_df['Latitude'] = lat
labels_df['Longitude'] = long
test_labels_df['Latitude'] = test_lat
test_labels_df['Longitude'] = test_long

In [117]:
data = np.array(can_data_df)
labels = np.array(labels_df)

test_data = np.array(test_data_df)
test_labels = np.array(test_labels_df)

print(data.shape, labels.shape)
print(test_data.shape, test_labels.shape)

(8000, 14) (8000, 3)
(2000, 14) (2000, 3)


This cell creates a window that slides over the data, creating subsamples that provide the model with context of previous changes in latitude and longitude.

In [118]:
sequenceLength = 5

stackedData = []
stackedLabels = []

stackedTestData = []
stackedTestLabels = []

# split can_data into subsampled sequences
for i in range(debug_num_samples-sequenceLength):
    stackedData.append(data[i:i+sequenceLength])
    stackedLabels.append(labels[i:i+sequenceLength])
    
for i in range(test_num_samples-sequenceLength):
    stackedTestData.append(test_data[i:i+sequenceLength])
    stackedTestLabels.append(test_labels[i:i+sequenceLength])
    
stackedData = np.array(stackedData)
stackedLabels = np.array(stackedLabels)

stackedTestData = np.array(stackedTestData)
stackedTestLabels = np.array(stackedTestLabels)

print(stackedData.shape, stackedLabels.shape)
print(stackedTestData.shape, stackedTestLabels.shape)

(7995, 5, 14) (7995, 5, 3)
(1995, 5, 14) (1995, 5, 3)


In [119]:
data = stackedData.reshape((stackedData.shape[0], stackedData.shape[1]*stackedData.shape[2]))
labels = stackedLabels.reshape((stackedLabels.shape[0], stackedLabels.shape[1]*stackedLabels.shape[2]))

testData = stackedTestData.reshape((stackedTestData.shape[0], stackedTestData.shape[1]*stackedTestData.shape[2]))
testLabels = stackedTestLabels.reshape((stackedTestLabels.shape[0], stackedTestLabels.shape[1]*stackedTestLabels.shape[2]))
print(stackedData.shape, stackedLabels.shape)

(7995, 5, 14) (7995, 5, 3)


# The Model

In [120]:
model = RandomForestRegressor(n_estimators=data.shape[0]//3, max_depth=50)

In [121]:
model.fit(data, labels)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=50, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=2665, n_jobs=None, oob_score=False,
                      random_state=None, verbose=0, warm_start=False)

# Results

In [122]:
predictions = model.predict(data)
for i, p in enumerate(predictions[-10:]):
    print('Sample', i+1, '=='*27)
    print('Prediction:  ', p)
    print('Ground truth:', labels[i])


Prediction:   [ 1.61722586e+09  8.69592897e-06 -1.29285188e-05  1.61722586e+09
  8.67521260e-06 -1.29372443e-05  1.61722586e+09  8.68620851e-06
 -1.29436817e-05  1.61722586e+09  8.66512521e-06 -1.29522233e-05
  1.61722586e+09  8.66733245e-06 -1.29537807e-05]
Ground truth: [ 1.61722515e+09  0.00000000e+00  0.00000000e+00  1.61722515e+09
  3.81469727e-06  0.00000000e+00  1.61722515e+09  3.81469727e-06
  0.00000000e+00  1.61722515e+09  0.00000000e+00  0.00000000e+00
  1.61722515e+09  3.81469727e-06 -7.62939452e-06]
Prediction:   [ 1.61722586e+09  8.68541081e-06 -1.29358391e-05  1.61722586e+09
  8.67748677e-06 -1.29287413e-05  1.61722586e+09  8.67216968e-06
 -1.29547146e-05  1.61722586e+09  8.66021224e-06 -1.29319808e-05
  1.61722586e+09  8.66520780e-06 -1.29552692e-05]
Ground truth: [ 1.61722515e+09  3.81469727e-06  0.00000000e+00  1.61722515e+09
  3.81469727e-06  0.00000000e+00  1.61722515e+09  0.00000000e+00
  0.00000000e+00  1.61722515e+09  3.81469727e-06 -7.62939452e-06
  1.61722515e+

In [123]:
model.score(testData, testLabels)





-0.2130818142110059