In [1]:
import pandas as pd
import numpy as np
import json
from pandas.io.json import json_normalize
import os
import glob

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report
from sklearn.linear_model import SGDClassifier
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
from scipy.stats import chi2_contingency
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import OneHotEncoder

In [2]:
faults_diagnostics = pd.read_csv('faults_diagnostics.csv')
target = pd.read_csv('target.csv')

In [3]:
#Setting up EventTimeStamp as Date Time 

In [3]:
faults_diagnostics['EventTimeStamp'] = pd.to_datetime(faults_diagnostics['EventTimeStamp'])

In [4]:
faults_diagnostics = faults_diagnostics.loc[faults_diagnostics['active'] == True]

In [6]:
faults_diagnostics.columns

Index(['Unnamed: 0', 'FaultId', 'AcceleratorPedal', 'BarometricPressure',
       'CruiseControlActive', 'CruiseControlSetSpeed', 'DistanceLtd',
       'EngineCoolantTemperature', 'EngineLoad', 'EngineOilPressure',
       'EngineOilTemperature', 'EngineRpm', 'EngineTimeLtd', 'FuelLevel',
       'FuelLtd', 'FuelRate', 'FuelTemperature', 'IgnStatus',
       'IntakeManifoldTemperature', 'LampStatus', 'ParkingBrake', 'Speed',
       'SwitchedBatteryVoltage', 'Throttle', 'TurboBoostPressure', 'RecordID',
       'ESS_Id', 'EventTimeStamp', 'eventDescription', 'ecuSoftwareVersion',
       'ecuSerialNumber', 'ecuModel', 'ecuMake', 'ecuSource', 'spn', 'fmi',
       'active', 'activeTransitionCount', 'EquipmentID', 'MCTNumber',
       'Latitude', 'Longitude', 'LocationTimeStamp'],
      dtype='object')

In [12]:
faults_diagnostics_Final.shape

(546420, 45)

In [10]:
faults_diagnostics_Final = target.merge(faults_diagnostics, left_on = 'RecordID', right_on = 'FaultId', how = 'inner')

In [8]:
faults_diagnostics_Final['spnFmi'] = faults_diagnostics_Final['spn'].astype(str) + '-' + faults_diagnostics_Final['fmi'].astype(str)

In [9]:
#dropped Columns in the below cell

In [10]:
faults_diagnostics_Final = faults_diagnostics_Final.drop(columns = ['spn', 'fmi', 'FaultId', 'Unnamed: 0', 'RecordID_x', 'ecuModel', 'LocationTimeStamp', 'CruiseControlSetSpeed', 'RecordID_y', 'SwitchedBatteryVoltage', 'Throttle', 'ParkingBrake', 'ecuSoftwareVersion', 'ecuSerialNumber', 'LampStatus', 'MCTNumber', 'eventDescription', 'active', 'ESS_Id'], axis = 1)

In [11]:
faults_diagnostics_Final['Merge_ID'] = 1

In [12]:
faults_diagnostics_Final = pd.get_dummies(faults_diagnostics_Final, columns = ['CruiseControlActive', 'IgnStatus', 'ecuMake', 'spnFmi'], prefix=['CruiseControlActive', 'IgnStatus', 'ecuMake', 'spnFmi'])
faults_diagnostics_Final

Unnamed: 0,target,AcceleratorPedal,BarometricPressure,DistanceLtd,EngineCoolantTemperature,EngineLoad,EngineOilPressure,EngineOilTemperature,EngineRpm,EngineTimeLtd,...,spnFmi_96-3,spnFmi_96-4,spnFmi_96-9,spnFmi_97-15,spnFmi_97-16,spnFmi_97-3,spnFmi_97-4,spnFmi_976-9,spnFmi_98-18,spnFmi_98-5
0,False,0.0,14.2100,423178.70000,100.4,11.0,0.00,96.74375,0.000,1632.20,...,0,0,0,0,0,0,0,0,0,0
1,False,9.2,14.3550,368419.30000,181.4,18.0,36.54,201.08750,1008.000,6960.80,...,0,0,0,0,0,0,0,0,0,0
2,False,9.2,14.3550,368419.30000,181.4,18.0,36.54,201.08750,1008.000,6960.80,...,0,0,0,0,0,0,0,0,0,0
3,False,48.0,14.4275,470381.40000,181.4,30.0,38.28,196.53130,1514.500,9480.00,...,0,0,0,0,0,0,0,0,0,0
4,False,82.8,14.2825,278736.70000,188.6,80.0,39.44,210.03130,1711.375,6292.20,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
546415,False,0.0,14.6450,391932.60000,181.4,11.0,22.62,197.60000,597.375,8016.75,...,1,0,0,0,0,0,0,0,0,0
546416,False,0.0,14.3550,457529.70000,181.4,11.0,19.72,207.21880,600.250,13047.05,...,0,0,0,0,0,0,0,0,0,0
546417,False,100.0,14.5000,423937.90000,185.0,51.0,37.12,211.49370,1310.250,10722.70,...,0,0,0,0,0,0,0,0,0,0
546418,False,0.0,14.3550,465925.40000,186.8,62.0,41.18,212.84380,1340.750,9326.75,...,0,0,0,0,0,0,0,0,0,0


In [13]:
newlist_1 = [x for x in faults_diagnostics_Final.columns.tolist() if "spnFmi" in x]

In [14]:
newlist_1.append('EquipmentID')
newlist_1.append('Merge_ID')
newlist_1.append('EventTimeStamp')

In [15]:
newlist_1

['spnFmi_0-0',
 'spnFmi_100-1',
 'spnFmi_100-18',
 'spnFmi_100-2',
 'spnFmi_100-3',
 'spnFmi_100-4',
 'spnFmi_101-0',
 'spnFmi_101-15',
 'spnFmi_101-16',
 'spnFmi_101-2',
 'spnFmi_101-3',
 'spnFmi_101-4',
 'spnFmi_102-10',
 'spnFmi_102-14',
 'spnFmi_102-15',
 'spnFmi_102-16',
 'spnFmi_102-17',
 'spnFmi_102-18',
 'spnFmi_102-2',
 'spnFmi_102-20',
 'spnFmi_102-3',
 'spnFmi_102-31',
 'spnFmi_102-4',
 'spnFmi_102-5',
 'spnFmi_1023-5',
 'spnFmi_1024-0',
 'spnFmi_1028-9',
 'spnFmi_103-1',
 'spnFmi_103-10',
 'spnFmi_103-16',
 'spnFmi_103-18',
 'spnFmi_103-4',
 'spnFmi_103-7',
 'spnFmi_103-9',
 'spnFmi_1043-2',
 'spnFmi_1045-2',
 'spnFmi_1045-7',
 'spnFmi_105-0',
 'spnFmi_105-17',
 'spnFmi_105-18',
 'spnFmi_105-2',
 'spnFmi_105-3',
 'spnFmi_105-5',
 'spnFmi_1056-2',
 'spnFmi_1056-4',
 'spnFmi_1056-5',
 'spnFmi_1059-2',
 'spnFmi_1067-11',
 'spnFmi_1067-2',
 'spnFmi_1067-7',
 'spnFmi_1068-2',
 'spnFmi_107-3',
 'spnFmi_107-4',
 'spnFmi_1071-5',
 'spnFmi_1072-1',
 'spnFmi_1072-3',
 'spnFmi_1072-5'

In [16]:
faults_diagnostics_Final = faults_diagnostics_Final.sort_values('EventTimeStamp', ascending = True)

In [17]:
faults_diagnostics_rolling_1 = faults_diagnostics_Final[newlist_1].groupby('EquipmentID').rolling('5D', on = 'EventTimeStamp').sum()

In [18]:
faults_diagnostics_rolling_1

Unnamed: 0_level_0,Unnamed: 1_level_0,Merge_ID,spnFmi_0-0,spnFmi_100-1,spnFmi_100-18,spnFmi_100-2,spnFmi_100-3,spnFmi_100-4,spnFmi_101-0,spnFmi_101-15,spnFmi_101-16,...,spnFmi_96-4,spnFmi_96-9,spnFmi_97-15,spnFmi_97-16,spnFmi_97-3,spnFmi_97-4,spnFmi_976-9,spnFmi_98-18,spnFmi_98-5,EventTimeStamp
EquipmentID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
301,24464,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2015-05-11 13:11:20
301,25357,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2015-05-13 08:22:32
301,27715,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2015-05-18 09:34:05
301,29792,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2015-05-21 13:57:35
301,33526,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2015-05-28 13:31:41
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2377,545568,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-02-28 04:56:18
2377,545942,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-03-03 07:47:01
2377,546419,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-03-06 14:14:13
2380,546080,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2020-03-04 10:06:10


In [19]:
new_list_2 = [x for x in faults_diagnostics_Final.columns.tolist() if "spnFmi" in x]

In [20]:
faults_diagnostics_Final = faults_diagnostics_Final.drop(columns = new_list_2)

In [21]:
dropped_columns = faults_diagnostics_Final.drop(columns = ['Merge_ID', 'EquipmentID', 'EventTimeStamp'])

In [22]:
agg_dict = {
    x : lambda x: x[-1] for x in dropped_columns.columns.tolist()
}

In [23]:
faults_diagnostics_Final.head()

Unnamed: 0,target,AcceleratorPedal,BarometricPressure,DistanceLtd,EngineCoolantTemperature,EngineLoad,EngineOilPressure,EngineOilTemperature,EngineRpm,EngineTimeLtd,...,CruiseControlActive_False,CruiseControlActive_True,CruiseControlActive_unknown,IgnStatus_False,IgnStatus_True,ecuMake_Cummins,ecuMake_Eaton,ecuMake_PACCAR,ecuMake_Volvo,ecuMake_unknown
531698,False,0.0,14.4275,274765.4,183.2,15.0,25.52,190.85,597.625,5673.1,...,1,0,0,0,1,0,0,1,0,0
531699,False,0.0,14.4275,274765.4,183.2,15.0,25.52,190.85,597.625,5673.1,...,1,0,0,0,1,0,0,1,0,0
531700,False,23.2,14.355,418316.5,170.6,6.0,41.18,173.1875,1279.75,8909.25,...,1,0,0,0,1,0,0,0,0,1
531705,False,48.0,14.7175,55971.24,168.8,37.0,22.62,188.375,600.0,1096.5,...,1,0,0,0,1,0,1,0,0,0
531706,False,0.0,14.4275,306348.1,186.8,14.0,19.14,208.0625,599.625,8180.75,...,1,0,0,0,1,0,0,1,0,0


In [24]:
agg_dict['Merge_ID'] = 'sum'

In [25]:
test_2 = (faults_diagnostics_Final.groupby('EquipmentID').rolling('5D', on = 'EventTimeStamp').agg(agg_dict))

In [26]:
faults_diagnostics_rolling_2 = faults_diagnostics_rolling_1.merge(test_2, on = ['EquipmentID', 'EventTimeStamp', 'Merge_ID'], how = 'inner')

In [29]:
faults_diagnostics_rolling_2 = faults_diagnostics_rolling_2.reset_index()

In [30]:
target_1 = faults_diagnostics_rolling_2[['target', 'EquipmentID', 'EventTimeStamp']]

In [None]:
target_1 = target_1.reset_index()

In [None]:
Predictors = faults_diagnostics_rolling_2.drop(columns = ['target', 'EventTimeStamp', 'Merge_ID'])

In [None]:
Predictors = Predictors.reset_index()

In [None]:
Predictors.to_csv('Predictors.csv', index = False)

In [32]:
target_1.to_csv('target_1.csv', index = True)

In [None]:
import random

trucks = target_1[['EquipmentID']].drop_duplicates()['EquipmentID'].to_list()

random.shuffle(trucks)

trainTrucks = trucks[0:831]

testTrucks = trucks[831:]

xTrain = Predictors.loc[Predictors['EquipmentID'].isin(trainTrucks)].drop(columns = 'EquipmentID')
xTest = Predictors.loc[Predictors['EquipmentID'].isin(testTrucks)].drop(columns = 'EquipmentID')
yTrain = target_1.loc[target_1['EquipmentID'].isin(trainTrucks)].drop(columns = 'EquipmentID')
yTest = target_1.loc[target_1['EquipmentID'].isin(testTrucks)].drop(columns = 'EquipmentID')

In [None]:
faults_diagnostics.info()

In [None]:
faults_diagnostics