In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from joblib import load

In [2]:
data = load('log_level.pkl')
data.describe()

Unnamed: 0,latitude,longitude,timeZoneOffset,AccelPedalAngle_A,ATShiftPosition,EngineRPM,FuelConsumed,Odo,Speed,TripCount,AccelFB,AccelLR,BrakePedalInd,speed_mps,speed_mph,fuel_consumed_gal,AT_Gear,part,tow
count,532318.0,532318.0,532318.0,532318.0,532318.0,532318.0,59678.0,532313.0,532318.0,532318.0,532318.0,532318.0,532318.0,532318.0,532318.0,59678.0,532318.0,532318.0,532318.0
mean,33.073051,-96.884432,-6.0,11.963428,38.17934,1535.750488,1.461814,2568.075195,55.54929,432.323212,0.032621,0.066787,0.260547,15.43037,34.516716,0.000386,0.019285,84.992754,0.289923
std,0.111213,0.156008,0.0,13.092259,7.079235,731.117676,1.60978,2188.76001,44.576721,340.655121,0.652874,0.584469,0.438934,12.382433,27.698681,0.000425,0.013698,74.966658,0.453727
min,32.783611,-97.370538,-6.0,0.0,0.0,481.0,0.0,832.0,0.0,185.0,-6.8,-5.2,0.0,0.0,0.0,0.0,0.0,1.0,0.0
25%,33.047077,-96.920009,-6.0,0.0,40.0,838.5,0.3315,1039.0,11.184999,192.0,-0.175,-0.1,0.0,3.106947,6.950034,8.8e-05,0.00607,30.0,0.0
50%,33.081795,-96.839017,-6.0,9.5,40.0,1485.400024,0.8045,1146.0,53.56225,195.0,0.0,0.05,0.0,14.878415,33.282026,0.000213,0.018879,63.0,0.0
75%,33.153984,-96.796873,-6.0,21.0,40.0,2084.800049,2.271,4808.0,89.596664,737.0,0.275,0.233333,1.0,24.887981,55.672768,0.0006,0.034083,113.0,1.0
max,33.225486,-96.664188,-6.0,100.0,40.0,5725.0,18.7005,6594.0,152.830002,1095.0,4.7,6.4,1.0,42.452812,94.964127,0.00494,0.067532,316.0,1.0


In [3]:
pd.options.mode.chained_assignment = None
# Fuel Economy related features
df_p = data[~data['FuelConsumed'].isna()]
df_p['cantimediff'] = df_p.cantimestamp.diff()
df_p['timediff_sec'] = df_p['cantimediff'].apply(lambda x: x.total_seconds())
df_p = df_p[df_p['timediff_sec']<2]
df_p['acc_dist'] = df_p['timediff_sec'] * df_p['speed_mps']
df_p['fuel/speed'] = df_p['fuel_consumed_gal'] / df_p['speed_mph']
df_p['fuel/speed'].replace([np.inf, -np.inf], np.nan, inplace=True)
df_p['speed/RPM'] = df_p['speed_mph'] / df_p['EngineRPM']

trips = df_p.groupby(['VIN','TripCount','part']).agg({'vehicleName': ['first'],
                                       'dispatchModelType': ['first'],
                                       'gpsDateTime': ['min', 'max'],
                                       'acc_dist': ['sum'],
                                       'fuel_consumed_gal':['sum'],
                                       'speed_mph': ['min', 'max', 'mean', 'median', 'std'],
                                       'AccelPedalAngle_A': ['min', 'max', 'mean', 'median', 'std'],
                                       'EngineRPM': ['min', 'max', 'mean', 'median', 'std'],
                                       'BrakePedalInd': ['mean', 'median', 'std'],
                                       'fuel/speed': ['min', 'max', 'mean', 'median', 'std'],
                                       'speed/RPM': ['max', 'mean', 'median', 'std']}).reset_index()
trips.columns = ['vin','tripcount','part', 'model', 'dispatchModelType', 'start_time', 'end_time', 'distance_m', 'fuel_consumed_gal',
                 'speed_mph_min','speed_mph_max','speed_mph_mean','speed_mph_median','speed_mph_std',
                 'AccelPedalA_min','AccelPedalA_max','AccelPedalA_mean','AccelPedalA_median','AccelPedalA_std',
                 'EngineRPM_min','EngineRPM_max','EngineRPM_mean','EngineRPM_median','EngineRPM_std',
                 'BrakePedalInd_mean','BrakePedalInd_median','BrakePedalInd_std',
                 'fuel/speed_min','fuel/speed_max','fuel/speed_mean','fuel/speed_median','fuel/speed_std',
                 'speed/RPM_max','speed/RPM_mean','speed/RPM_median','speed/RPM_std']

# AccelFB related features
df_a = data[data.ATShiftPosition == 40] # Observe only in Drive mode
df_a1 = df_a[(df_a.AccelPedalAngle_A > 0)&(df_a.BrakePedalInd != 1)]
df_a1['Acceleration/pedalA'] = df_a1['AccelFB'] / df_a1['AccelPedalAngle_A']
trips2 = df_a1.groupby(['VIN','TripCount','part']).agg({'Acceleration/pedalA':['min','max','mean','median','std']}).reset_index()
trips2.columns = ['vin','tripcount','part', 'Acceleration/pedalA_min', 'Acceleration/pedalA_max',
                  'Acceleration/pedalA_mean', 'Acceleration/pedalA_median', 'Acceleration/pedalA_std']

df_a2 = df_a[(df_a.BrakePedalInd == 1)&(df_a.speed_mph >0)]
trips3 = df_a2.groupby(['VIN','TripCount','part']).agg({'AccelFB': ['min', 'max', 'mean', 'median', 'std']}).reset_index()
trips3.columns = ['vin','tripcount','part', 'Brake_AccelFB_min', 'Brake_AccelFB_max',
                  'Brake_AccelFB_mean', 'Brake_AccelFB_median', 'Brake_AccelFB_std']

trips4 = df_a.groupby(['VIN','TripCount','part']).agg({'AccelFB': ['min', 'max', 'mean', 'median', 'std'],
                                                'AccelLR': ['min', 'max', 'mean', 'median', 'std'],
                                                'tow':['first']}).reset_index()
trips4.columns = ['vin','tripcount','part', 'AccelFB_min', 'AccelFB_max', 'AccelFB_mean', 'AccelFB_median', 
        'AccelFB_std', 'AccelLR_min', 'AccelLR_max','AccelLR_mean', 'AccelLR_median', 'AccelLR_std','tow']

tripsdata = trips.merge(trips2, on = ['vin', 'tripcount','part'], how = 'left')\
                 .merge(trips3, on = ['vin', 'tripcount','part'], how = 'left')\
                 .merge(trips4, on = ['vin', 'tripcount','part'], how = 'right')

In [4]:
data_tundra = tripsdata[tripsdata.model.isin(['TUNDRA', 'TACOMA'])].sample(frac=1, random_state=42)
col = ['model', 'dispatchModelType', 'distance_m', 'fuel_consumed_gal', 'speed_mph_min',
       'speed_mph_max', 'speed_mph_mean', 'speed_mph_median', 'speed_mph_std',
       'AccelPedalA_min', 'AccelPedalA_max', 'AccelPedalA_mean',
       'AccelPedalA_median', 'AccelPedalA_std', 'EngineRPM_min',
       'EngineRPM_max', 'EngineRPM_mean', 'EngineRPM_median', 'EngineRPM_std',
       'BrakePedalInd_mean', 'BrakePedalInd_median', 'BrakePedalInd_std',
       'fuel/speed_min', 'fuel/speed_max', 'fuel/speed_mean',
       'fuel/speed_median', 'fuel/speed_std', 'speed/RPM_max',
       'speed/RPM_mean', 'speed/RPM_median', 'speed/RPM_std',
       'Acceleration/pedalA_min', 'Acceleration/pedalA_max',
       'Acceleration/pedalA_mean', 'Acceleration/pedalA_median',
       'Acceleration/pedalA_std', 
       #'Brake_AccelFB_min', 'Brake_AccelFB_max','Brake_AccelFB_mean', 'Brake_AccelFB_median', 'Brake_AccelFB_std',
       'AccelFB_min', 'AccelFB_max', 'AccelFB_mean', 'AccelFB_median',
       'AccelFB_std', 'AccelLR_min', 'AccelLR_max', 'AccelLR_mean',
       'AccelLR_median', 'AccelLR_std']
X = data_tundra[col+['tow']]
X = X.dropna()
y = X['tow']
X = X.drop(['tow'], axis=1)
X = pd.get_dummies(X)
print(f'X Size: {X.shape}')
print(f'y class ratio:\n{y.value_counts()}')

X Size: (779, 48)
y class ratio:
0    542
1    237
Name: tow, dtype: int64


In [5]:
from lightgbm import LGBMClassifier
from sklearn.model_selection import train_test_split, cross_validate

lgb = LGBMClassifier(random_state = 42)
scoring = ('accuracy','roc_auc','precision','recall')
score = cross_validate(lgb, X, y, cv=5, scoring=scoring)
print('Model Performance:\nAccuracy: {:0.3f}\nArea under curve: {:0.3f}'.format(score['test_accuracy'].mean(),score['test_roc_auc'].mean()))
print('Precision: {:0.3f}\nRecall: {:0.3f}'.format(score['test_precision'].mean(),score['test_recall'].mean()))

OSError: dlopen(/Users/frank.xu/.venv/lib/python3.9/site-packages/lightgbm/lib_lightgbm.so, 6): Library not loaded: /usr/local/opt/libomp/lib/libomp.dylib
  Referenced from: /Users/frank.xu/.venv/lib/python3.9/site-packages/lightgbm/lib_lightgbm.so
  Reason: image not found

In [2]:
import lightgbm

OSError: dlopen(/Users/frank.xu/.venv/lib/python3.9/site-packages/lightgbm/lib_lightgbm.so, 6): Library not loaded: /usr/local/opt/libomp/lib/libomp.dylib
  Referenced from: /Users/frank.xu/.venv/lib/python3.9/site-packages/lightgbm/lib_lightgbm.so
  Reason: image not found