In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px 
plt.style.use('ggplot')

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from scipy.stats import norm
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

from IPython.core.display import HTML

%matplotlib inline

pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 500)

In [2]:
# read in dataset that calculated scores on 2 hours within derate 
codes = pd.read_csv('data/holdout_codes_4hr.csv', low_memory=False)

codes.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,active_transition_count,accelerator_pedal,barometric_pressure,cruise_control_active,cruise_control_set_speed,distance_ltd,engine_coolant_temperature,engine_load,engine_oil_pressure,engine_oil_temperature,engine_rpm,engine_time_ltd,fuel_level,fuel_ltd,fuel_rate,fuel_temperature,ign_status,intake_manifold_temperature,parking_brake,speed,switched_battery_voltage,throttle,turbo_boost_pressure,target,spn_0,spn_100,spn_101,spn_102,spn_1024,spn_1028,spn_103,spn_1045,spn_105,spn_1056,spn_1059,spn_1067,spn_1068,spn_1072,spn_1078,spn_110,spn_111,spn_1172,spn_1176,spn_1209,spn_1231,spn_1236,spn_1247,spn_1279,spn_1322,spn_1323,spn_1325,spn_1326,spn_1327,spn_1328,spn_1347,spn_1481,spn_1482,spn_1483,spn_1569,spn_157,spn_1668,spn_1675,spn_168,spn_17096,spn_171,spn_175,spn_17590,spn_1761,spn_177,spn_1787,spn_1807,spn_1808,spn_1809,spn_1815,spn_184,spn_188,spn_190,spn_2017,spn_2023,spn_235,spn_236,spn_245,spn_247,spn_248,spn_256,spn_25780,spn_2579,spn_2623,spn_2629,spn_2659,spn_27,spn_2791,spn_2795,spn_2863,spn_2866,spn_29902,spn_3031,spn_3058,spn_3060,spn_3064,spn_3216,spn_3217,spn_3218,spn_3226,spn_3228,spn_3241,spn_3242,spn_3245,spn_3246,spn_3249,spn_3251,spn_3253,spn_32894,spn_3360,spn_3361,spn_3362,spn_3363,spn_3364,spn_3464,spn_3480,spn_3482,spn_3490,spn_3509,spn_3510,spn_3511,spn_3513,spn_3514,spn_35527,spn_3556,spn_3584,spn_3597,spn_36017,spn_3605,spn_3610,spn_3663,spn_3697,spn_3698,spn_37,spn_3703,spn_3720,spn_37265,spn_3821,spn_3936,spn_4094,spn_4096,spn_411,spn_412,spn_42190,spn_4276,spn_43088,spn_4334,spn_4340,spn_4342,spn_4344,spn_4346,spn_4354,spn_4360,spn_4363,spn_4364,spn_4375,spn_4376,spn_444,spn_4607,spn_46262,spn_47284,spn_4752,spn_4765,spn_4794,spn_4796,spn_4811,spn_5019,spn_5024,spn_5031,spn_50353,spn_5113,spn_512,spn_51923,spn_520200,spn_520203,spn_520298,spn_520302,spn_520330,spn_523530,spn_523531,spn_523543,spn_524033,spn_524037,spn_524287,spn_5298,spn_5319,spn_5394,spn_53958,spn_5396,spn_5397,spn_5442,spn_5443,spn_5444,spn_54478,spn_5491,spn_5571,spn_558,spn_5585,spn_563,spn_56503,spn_5742,spn_5743,spn_576,spn_578,spn_5848,spn_5851,spn_5853,spn_5862,spn_596,spn_609,spn_611,spn_612,spn_614,spn_624,spn_627,spn_629,spn_630,spn_632,spn_636,spn_639,spn_641,spn_647,spn_649,spn_651,spn_652,spn_65287,spn_653,spn_654,spn_655,spn_65535,spn_656,spn_677,spn_70,spn_723,spn_768,spn_77,spn_789,spn_790,spn_791,spn_792,spn_793,spn_794,spn_795,spn_797,spn_798,spn_799,spn_800,spn_801,spn_802,spn_803,spn_806,spn_807,spn_81,spn_810,spn_811,spn_829,spn_84,spn_862,spn_886,spn_905,spn_907,spn_91,spn_917,spn_929,spn_94,spn_95,spn_96,spn_97,spn_98,fmi_0,fmi_1,fmi_2,fmi_3,fmi_4,fmi_5,fmi_6,fmi_7,fmi_8,fmi_9,fmi_10,fmi_11,fmi_12,fmi_13,fmi_14,fmi_15,fmi_16,fmi_17,fmi_18,fmi_19,fmi_20,fmi_21,fmi_23,fmi_31,lamp_status_0,lamp_status_2,lamp_status_9,lamp_status_11,lamp_status_255,lamp_status_511,lamp_status_617,lamp_status_1023,lamp_status_1279,lamp_status_2035,lamp_status_2047,lamp_status_4351,lamp_status_5119,lamp_status_5375,lamp_status_6143,lamp_status_16639,lamp_status_16895,lamp_status_17407,lamp_status_17663,lamp_status_18419,lamp_status_18431,lamp_status_21503,lamp_status_22527,lamp_status_50175,lamp_status_51199,lamp_status_62463,lamp_status_63487,lamp_status_65535,ecu_model_0USA13_13_0415_2238A,ecu_model_20412511P07,ecu_model_6X1u10D1500000000,ecu_model_6X1u13D1500000000,ecu_model_6X1u17D1500000000,ecu_model_CECU3B-NAMUX4,ecu_model_EC60-adv,ecu_model_EC80ESP,ecu_model_EEO-xxF112C,ecu_model_MX,ecu_model_________Y043718,ecu_model_unknown
0,0,0,2,100.0,14.2825,True,62.13712,517711.8,183.2,90.0,33.64,216.6125,1359.125,10323.1,65.16582,78862.886425,16.85423,41.592694,True,113.0,True,64.68085,3276.75,23.091571,26.39,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1,1,1,126,0.0,14.79,True,64.6226,518554.7,185.0,14.0,25.52,165.875,648.375,10338.7,65.16582,78990.877785,0.766101,41.592694,True,102.2,True,0.0,3276.75,23.091571,1.74,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2,2,2,1,11.6,14.355,True,64.6226,520309.6,185.0,0.0,31.9,203.8438,1083.125,10374.8,65.16582,79274.466482,0.0,41.592694,True,109.4,True,27.56364,3276.75,23.091571,2.9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
3,3,3,1,11.6,14.355,True,64.6226,520309.6,185.0,0.0,31.9,203.8438,1083.125,10374.8,65.16582,79274.466482,0.0,41.592694,True,109.4,True,27.56364,3276.75,23.091571,2.9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
4,4,4,1,48.8,14.355,True,64.6226,520314.6,161.6,45.0,38.86,154.5125,1170.0,10375.15,65.16582,79275.391085,4.530565,41.592694,True,100.4,True,28.79667,3276.75,23.091571,6.67,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0


In [3]:
codes = codes.drop(columns=['Unnamed: 0', 'Unnamed: 0.1'])

In [4]:
#Define predictors
X = codes.drop('target', axis=1)

#Define target
y = codes['target']


In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state = 20, stratify=y)



In [6]:
from imblearn.over_sampling import SMOTE
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

In [7]:
oversampler = SMOTE(k_neighbors=10, n_jobs=-1, random_state=42)

In [8]:
X_smote, y_smote = oversampler.fit_resample(X_train, y_train)

## 4-hr window, Random Forest 

In [9]:
rf = RandomForestClassifier(n_estimators=150, max_depth=2, random_state=0).fit(X_smote, y_smote)

In [10]:
y_pred = rf.predict(X_test)

In [11]:
accuracy_score(y_test, y_pred)

0.8461832791276066

In [12]:
confusion_matrix(y_test, y_pred)

array([[26511,  4806],
       [   18,    27]], dtype=int64)

In [13]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      0.85      0.92     31317
           1       0.01      0.60      0.01        45

    accuracy                           0.85     31362
   macro avg       0.50      0.72      0.46     31362
weighted avg       1.00      0.85      0.92     31362



In [14]:
feature_importances = pd.DataFrame({'feature': X.columns, 'importance': rf.feature_importances_})

feature_importances.sort_values('importance', ascending = False).head(10)

Unnamed: 0,feature,importance
6,distance_ltd,0.093943
0,Unnamed: 0.1,0.085312
20,speed,0.052918
304,lamp_status_1023,0.050904
14,fuel_ltd,0.049409
40,spn_111,0.040431
13,fuel_level,0.037343
22,throttle,0.037017
290,fmi_17,0.036973
11,engine_rpm,0.035938
