In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px 
plt.style.use('ggplot')


from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_squared_log_error

from sklearn.metrics import mean_absolute_percentage_error, r2_score
from sklearn.ensemble import RandomForestClassifier

from scipy.stats import norm


from IPython.core.display import HTML

%matplotlib inline

pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 500)

In [3]:
clean_data = pd.read_csv('data/cleaned_faults_diagnostic.csv', low_memory=False)
clean_data.head()


Unnamed: 0,equipment_id,record_id,ess_id,event_timestamp,event_description,ecu_software_version,ecu_serial_number,ecu_model,ecu_make,ecu_source,spn,fmi,active,active_transition_count,mct_number,latitude,longitude,location_timestamp,accelerator_pedal,barometric_pressure,cruise_control_active,cruise_control_set_speed,distance_ltd,engine_coolant_temperature,engine_load,engine_oil_pressure,engine_oil_temperature,engine_rpm,engine_time_ltd,fuel_level,fuel_ltd,fuel_rate,fuel_temperature,ign_status,intake_manifold_temperature,lamp_status,parking_brake,speed,switched_battery_voltage,throttle,turbo_boost_pressure,5246_derate,1569_derate
0,1327,42689,2241022,2015-05-04 15:38:35,High Voltage (Aftertreatment 1 Particulate Tra...,unknown,unknown,unknown,unknown,49,50353,0,True,2,105383198,35.98875,-83.579583,2015-05-04 15:39:11,100.0,14.2825,True,62.13712,517711.8,183.2,90.0,33.64,216.6125,1359.125,10323.1,,78862.886425,16.85423,,True,113.0,2,True,64.68085,3276.75,,26.39,False,False
1,1327,45667,2296851,2015-05-07 06:52:14,Condition Exists Cruise Control Enable Switch,unknown,unknown,unknown,unknown,49,596,31,True,126,105383198,40.1975,-74.661435,2015-05-07 06:52:50,0.0,14.79,True,64.6226,518554.7,185.0,14.0,25.52,165.875,648.375,10338.7,,78990.877785,0.766101,,True,102.2,255,True,0.0,3276.75,,1.74,False,False
2,1327,58809,2507532,2015-05-19 12:02:55,Abnormal Update Rate Aftertreatment 1 Intake NOx,04993120*00001782*082113134117*07700053*I0*BBZ*,79419774,6X1u10D1500000000,CMMNS,0,3216,9,True,1,105383198,35.49125,-86.458842,2015-05-19 12:03:32,11.6,14.355,True,64.6226,520309.6,185.0,0.0,31.9,203.8438,1083.125,10374.8,,79274.466482,0.0,,True,109.4,17407,True,27.56364,3276.75,,2.9,False,False
3,1327,65879,2610228,2015-05-26 08:11:45,Abnormal Update Rate Aftertreatment 1 Intake NOx,04993120*00001782*082113134117*07700053*I0*BBZ*,79419774,6X1u10D1500000000,CMMNS,0,3216,9,True,1,105383198,35.490787,-86.433842,2015-05-21 14:40:18,11.6,14.355,True,64.6226,520309.6,185.0,0.0,31.9,203.8438,1083.125,10374.8,,79274.466482,0.0,,True,109.4,17407,True,27.56364,3276.75,,2.9,False,False
4,1327,65939,2611189,2015-05-26 08:44:10,Abnormal Update Rate Aftertreatment 1 Intake NOx,04993120*00001782*082113134117*07700053*I0*BBZ*,79419774,6X1u10D1500000000,CMMNS,0,3216,9,True,1,105383198,35.523703,-86.440787,2015-05-26 08:52:22,48.8,14.355,True,64.6226,520314.6,161.6,45.0,38.86,154.5125,1170.0,10375.15,,79275.391085,4.530565,,True,100.4,17407,True,28.79667,3276.75,,6.67,False,False


In [4]:
clean_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 547766 entries, 0 to 547765
Data columns (total 43 columns):
 #   Column                       Non-Null Count   Dtype  
---  ------                       --------------   -----  
 0   equipment_id                 547766 non-null  object 
 1   record_id                    547766 non-null  int64  
 2   ess_id                       547766 non-null  int64  
 3   event_timestamp              547766 non-null  object 
 4   event_description            547766 non-null  object 
 5   ecu_software_version         547691 non-null  object 
 6   ecu_serial_number            547691 non-null  object 
 7   ecu_model                    547691 non-null  object 
 8   ecu_make                     547691 non-null  object 
 9   ecu_source                   547766 non-null  int64  
 10  spn                          547766 non-null  int64  
 11  fmi                          547766 non-null  int64  
 12  active                       547766 non-null  bool   
 13 

In [5]:
#determine the number of 5246 derates out of the original dataset
# clean_data_derates = clean_data[clean_data['spn']==5246]
# clean_data_derates

It appears there are 496 derates in the original dataset

In [6]:
#clean_data_derates.shape
#496 derates, 43 columns

In [7]:
#set aside a holdout dataset, ensuring representative 5246 derates
test_time = '2015-09-15 00:00:00'
holdout = clean_data.loc[clean_data['event_timestamp'] <= test_time]
train = clean_data.loc[clean_data['event_timestamp'] > test_time]

#9/15/2015 resulted in 104,537 rows (19%) and 51 derates (10.3%)

In [8]:
holdout.shape

(104537, 43)

In [9]:
#holdout[holdout['spn']== 5246].count()
#51 5246 derates

We've created a holdout set consisting of ~19% of the original dataset and 10.3% of the original dataset's derates

In [10]:
#remove unnecessary columns
train = train.drop(columns = ['ess_id', 'ecu_software_version','ecu_serial_number','ecu_source', 'active', 'location_timestamp'])

In [11]:
#insert column which inserts timestamp for each 5246 derate
train['time_of_derate'] = train.loc[train['spn'] == 5246, 'time_of_derate'] = train.loc[train['spn'] == 5246, 'event_timestamp']
train

Unnamed: 0,equipment_id,record_id,event_timestamp,event_description,ecu_model,ecu_make,spn,fmi,active_transition_count,mct_number,latitude,longitude,accelerator_pedal,barometric_pressure,cruise_control_active,cruise_control_set_speed,distance_ltd,engine_coolant_temperature,engine_load,engine_oil_pressure,engine_oil_temperature,engine_rpm,engine_time_ltd,fuel_level,fuel_ltd,fuel_rate,fuel_temperature,ign_status,intake_manifold_temperature,lamp_status,parking_brake,speed,switched_battery_voltage,throttle,turbo_boost_pressure,5246_derate,1569_derate,time_of_derate
1450,1340,205348,2015-09-16 08:35:53,High (Severity Medium) Catalyst Tank Heater,unknown,unknown,50353,0,2,105399896,37.137546,-85.975972,100.0,14.5000,True,64.6226,535207.6,190.4,98.0,35.38,213.5750,1344.875,10315.50,26.0,81558.233872,19.218580,,True,105.8,2,True,64.379880,3276.75,,28.13,False,False,
1451,1340,207783,2015-09-17 17:13:55,High (Severity Medium) Catalyst Tank Heater,6X1u10D1500000000,CMMNS,3363,16,1,105399896,37.303333,-81.093009,100.0,13.6300,True,64.6226,535974.7,201.2,100.0,35.38,221.7312,1481.000,10331.40,53.6,81674.733747,18.016590,,True,122.0,17407,True,51.554390,3276.75,,30.45,False,False,
1452,1340,214786,2015-09-23 13:09:46,High (Severity Medium) Catalyst Tank Heater,6X1u10D1500000000,CMMNS,3363,16,1,105399896,33.682685,-95.572546,0.0,14.4275,True,64.6226,537668.3,204.8,10.0,17.40,212.1125,650.750,10363.80,61.2,81922.527132,0.779310,,True,141.8,17407,True,1.980621,3276.75,,1.16,False,False,
1453,1340,215141,2015-09-23 17:14:53,High (Severity Medium) Catalyst Tank Heater,6X1u10D1500000000,CMMNS,3363,16,1,105399896,32.744398,-96.808287,0.0,14.4275,True,64.6226,537780.4,190.4,16.0,18.56,204.8562,648.875,10366.50,52.8,81937.320767,1.096317,,True,134.6,17407,True,0.000000,3276.75,,0.00,False,False,
1454,1340,218825,2015-09-27 07:47:19,Low (Severity Low) Engine Coolant Level,6X1u10D1500000000,CMMNS,111,17,2,105399896,36.938472,-80.991018,0.0,13.6300,True,64.6226,539272.0,78.8,31.0,41.18,75.2000,651.125,10394.50,72.4,82175.471872,2.417182,,True,73.4,1023,True,0.000000,3276.75,,0.00,False,False,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
547757,310,1083465,2018-11-21 20:57:59,Incorrect Data J1939 Network #1 Primary Vehicl...,EC60-adv,BNDWS,639,2,127,105411467,35.262500,-86.398425,32.4,14.4275,True,64.6226,288981.4,179.6,12.0,41.18,195.4063,1286.500,8654.55,36.8,39031.288597,1.915253,32.0,True,80.6,1279,True,4.728246,3276.75,100.0,0.87,False,False,
547758,310,1204894,2019-10-09 23:32:35,Incorrect Data J1939 Network #1 Primary Vehicl...,EC60-adv,BNDWS,639,2,127,105442911,35.799722,-86.386851,46.0,14.4275,True,0.0000,324571.2,179.6,27.0,41.18,203.1687,1469.875,9843.65,44.4,43956.512334,4.279601,32.0,True,105.8,1279,True,5.252528,3276.75,100.0,2.61,False,False,
547759,310,1214347,2019-11-12 00:16:41,Incorrect Data Wheel Sensor ABS Axle 1 Left,EC60-adv,BNDWS,789,2,127,105455566,35.273101,-86.397314,0.0,14.3550,True,0.0000,327738.3,183.2,34.0,17.98,208.8500,580.625,9940.25,61.2,44385.924005,1.254821,32.0,True,75.2,1279,True,1.524301,3276.75,100.0,1.16,False,False,
547760,310,1214372,2019-11-12 00:16:41,Incorrect Data Wheel Sensor ABS Axle 1 Left,EC60-adv,BNDWS,789,2,127,105455566,35.273101,-86.397314,0.0,14.3550,True,0.0000,327738.3,183.2,34.0,17.98,208.8500,580.625,9940.25,61.2,44385.924005,1.254821,32.0,True,75.2,1279,True,1.524301,3276.75,100.0,1.16,False,False,


In [12]:
#convert time columns to datetime
train[['event_timestamp', 'time_of_derate']]=train[["event_timestamp", "time_of_derate"]].apply(pd.to_datetime, format='%Y-%m-%d %H:%M:%S.%f')

In [13]:
train['spn']=train['spn'].astype("str")

In [14]:
train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 443229 entries, 1450 to 547761
Data columns (total 38 columns):
 #   Column                       Non-Null Count   Dtype         
---  ------                       --------------   -----         
 0   equipment_id                 443229 non-null  object        
 1   record_id                    443229 non-null  int64         
 2   event_timestamp              443229 non-null  datetime64[ns]
 3   event_description            443229 non-null  object        
 4   ecu_model                    443154 non-null  object        
 5   ecu_make                     443154 non-null  object        
 6   spn                          443229 non-null  object        
 7   fmi                          443229 non-null  int64         
 8   active_transition_count      443229 non-null  int64         
 9   mct_number                   443229 non-null  int64         
 10  latitude                     443229 non-null  float64       
 11  longitude              

In [15]:
#conduct backfill on time_of_next_derate column
train['time_of_derate'] = train['time_of_derate'].fillna(method='bfill')

In [16]:
train['time_until_next_derate'] = train['time_of_derate'] - train['event_timestamp']
train.head()

Unnamed: 0,equipment_id,record_id,event_timestamp,event_description,ecu_model,ecu_make,spn,fmi,active_transition_count,mct_number,latitude,longitude,accelerator_pedal,barometric_pressure,cruise_control_active,cruise_control_set_speed,distance_ltd,engine_coolant_temperature,engine_load,engine_oil_pressure,engine_oil_temperature,engine_rpm,engine_time_ltd,fuel_level,fuel_ltd,fuel_rate,fuel_temperature,ign_status,intake_manifold_temperature,lamp_status,parking_brake,speed,switched_battery_voltage,throttle,turbo_boost_pressure,5246_derate,1569_derate,time_of_derate,time_until_next_derate
1450,1340,205348,2015-09-16 08:35:53,High (Severity Medium) Catalyst Tank Heater,unknown,unknown,50353,0,2,105399896,37.137546,-85.975972,100.0,14.5,True,64.6226,535207.6,190.4,98.0,35.38,213.575,1344.875,10315.5,26.0,81558.233872,19.21858,,True,105.8,2,True,64.37988,3276.75,,28.13,False,False,2015-09-23 10:28:29,7 days 01:52:36
1451,1340,207783,2015-09-17 17:13:55,High (Severity Medium) Catalyst Tank Heater,6X1u10D1500000000,CMMNS,3363,16,1,105399896,37.303333,-81.093009,100.0,13.63,True,64.6226,535974.7,201.2,100.0,35.38,221.7312,1481.0,10331.4,53.6,81674.733747,18.01659,,True,122.0,17407,True,51.55439,3276.75,,30.45,False,False,2015-09-23 10:28:29,5 days 17:14:34
1452,1340,214786,2015-09-23 13:09:46,High (Severity Medium) Catalyst Tank Heater,6X1u10D1500000000,CMMNS,3363,16,1,105399896,33.682685,-95.572546,0.0,14.4275,True,64.6226,537668.3,204.8,10.0,17.4,212.1125,650.75,10363.8,61.2,81922.527132,0.77931,,True,141.8,17407,True,1.980621,3276.75,,1.16,False,False,2015-09-23 10:28:29,-1 days +21:18:43
1453,1340,215141,2015-09-23 17:14:53,High (Severity Medium) Catalyst Tank Heater,6X1u10D1500000000,CMMNS,3363,16,1,105399896,32.744398,-96.808287,0.0,14.4275,True,64.6226,537780.4,190.4,16.0,18.56,204.8562,648.875,10366.5,52.8,81937.320767,1.096317,,True,134.6,17407,True,0.0,3276.75,,0.0,False,False,2015-09-23 10:28:29,-1 days +17:13:36
1454,1340,218825,2015-09-27 07:47:19,Low (Severity Low) Engine Coolant Level,6X1u10D1500000000,CMMNS,111,17,2,105399896,36.938472,-80.991018,0.0,13.63,True,64.6226,539272.0,78.8,31.0,41.18,75.2,651.125,10394.5,72.4,82175.471872,2.417182,,True,73.4,1023,True,0.0,3276.75,,0.0,False,False,2015-09-23 10:28:29,-4 days +02:41:10


In [17]:
#train.info()

There are some rows where time of derate and time until next derate are nulls.  This is because the equipment never
had a derate.  Five equipment ids and 2600 rows (1588 of which are with 1 equipment_id):  309,307,310,308,306

In [18]:
# no_derates = train[train['time_of_derate'].isnull()]
# no_derates.head(10)

In [19]:
#no_derates['equipment_id'].value_counts()

In [20]:
train['time_until_next_derate'] = train['time_until_next_derate'] / np.timedelta64(1, 'h')

In [21]:
#insert a column called 'target' which assigns a value.  First used <8 hrs, then 4 hrs
train['target'] = train['time_until_next_derate'].apply(lambda x: "1" if x > 1 and x <= 4 else "0")
 

In [22]:
#train.head()

In [23]:
train.target.describe()
#using 4 hrs; 443229 / 442815
#using 8 hrs; 443229 / 442398

count     443229
unique         2
top            0
freq      442815
Name: target, dtype: object

In [24]:
#train_csv = train.to_csv('data/train.csv')

Create full df with hot-encoding of most variables, and target


In [25]:
train_codes=pd.get_dummies(data=train, columns=['spn','fmi','lamp_status', 'ecu_model'])

In [26]:
train_codes = train_codes.drop(columns=['equipment_id','record_id','event_timestamp', 'mct_number','latitude','longitude', 'ecu_make', 'event_description',
                                        '5246_derate', '1569_derate','time_of_derate', 'time_until_next_derate', 'spn_5246'])

In [27]:
column_means = train_codes.mean()
train_codes = train_codes.fillna(column_means)

In [28]:
train_codes.head()

Unnamed: 0,active_transition_count,accelerator_pedal,barometric_pressure,cruise_control_active,cruise_control_set_speed,distance_ltd,engine_coolant_temperature,engine_load,engine_oil_pressure,engine_oil_temperature,engine_rpm,engine_time_ltd,fuel_level,fuel_ltd,fuel_rate,fuel_temperature,ign_status,intake_manifold_temperature,parking_brake,speed,switched_battery_voltage,throttle,turbo_boost_pressure,target,spn_0,spn_100,spn_101,spn_102,spn_1023,spn_1024,spn_1028,spn_103,spn_1043,spn_1045,spn_105,spn_1056,spn_1059,spn_1067,spn_1068,spn_107,spn_1071,spn_1072,spn_1075,spn_1078,spn_108,spn_1081,spn_110,spn_111,spn_1127,spn_114863,spn_116,spn_1172,spn_1176,spn_118,spn_1209,spn_1213,spn_1231,spn_1235,spn_1236,spn_1239,spn_1247,spn_125,spn_127,spn_1321,spn_1322,spn_1323,spn_1324,spn_1325,spn_1326,spn_1327,spn_1328,spn_1347,spn_1349,spn_13600,spn_139296,spn_1464,spn_1481,spn_1482,spn_1483,spn_1487,spn_153,spn_153931,spn_1569,spn_157,spn_158,spn_16,spn_160,spn_1612,spn_1659,spn_1668,spn_167,spn_1675,spn_168,spn_17096,spn_171,spn_173,spn_174,spn_175,spn_17590,spn_1761,spn_177,spn_1787,spn_1807,spn_1808,spn_1809,spn_1815,spn_184,spn_188,spn_190,spn_191,spn_196608,spn_2000,spn_2017,spn_2023,spn_2029,spn_228,spn_235,spn_236,spn_237,spn_245,spn_247,spn_248,spn_251,spn_252,spn_255,spn_256,spn_25780,spn_2579,spn_2623,spn_2629,spn_2630,spn_2659,spn_27,spn_2791,spn_2795,spn_2863,spn_2866,spn_2912,spn_2917,spn_29902,spn_3031,spn_3058,spn_3060,spn_3064,spn_32000,spn_3216,spn_3217,spn_3218,spn_3222,spn_3226,spn_3227,spn_3228,spn_3241,spn_3242,spn_3245,spn_3246,spn_3249,spn_3251,spn_3253,spn_33,spn_335040,spn_3360,spn_3361,spn_3362,spn_3363,spn_3364,spn_3464,spn_3480,spn_3482,spn_3490,spn_3509,spn_3510,spn_3511,spn_3512,spn_3513,spn_3515,spn_3521,spn_35527,spn_3556,spn_3583,spn_3597,spn_36017,spn_3605,spn_3610,spn_3663,spn_3695,spn_3696,spn_3697,spn_3698,spn_37,spn_3703,spn_3720,spn_38,spn_3821,spn_39093,spn_3936,spn_4094,spn_4095,spn_4096,spn_411,spn_412,spn_4219,spn_4220,spn_4276,spn_43088,spn_4331,spn_4334,spn_4339,spn_4340,spn_4342,spn_4344,spn_4346,spn_4349,spn_4354,spn_4356,spn_4360,spn_4363,spn_4364,spn_4375,spn_4376,spn_4380,spn_4382,spn_441,spn_442,spn_444,spn_47284,spn_4752,spn_4765,spn_4766,spn_4792,spn_4794,spn_4795,spn_4796,spn_4811,spn_4812,spn_4813,spn_5018,spn_5019,spn_5024,spn_5031,spn_50353,spn_5052,spn_51,spn_5109,spn_5110,spn_5111,spn_5112,spn_5113,spn_5114,spn_5115,...,spn_520413,spn_520953,spn_521032,spn_522,spn_523530,spn_523531,spn_523543,spn_524033,spn_524037,spn_524071,spn_524287,spn_5245,spn_525,spn_5298,spn_5319,spn_5321,spn_5357,spn_5392,spn_5394,spn_5395,spn_5396,spn_5397,spn_5442,spn_5443,spn_5444,spn_5485,spn_5491,spn_5569,spn_5571,spn_5579,spn_558,spn_5585,spn_560,spn_5614,spn_5615,spn_5616,spn_5625,spn_563,spn_5742,spn_5743,spn_5745,spn_5746,spn_576,spn_577,spn_578,spn_583,spn_5835,spn_5848,spn_5851,spn_5853,spn_5862,spn_5902,spn_5903,spn_5909,spn_5939,spn_5941,spn_5942,spn_5953,spn_596,spn_603,spn_609,spn_611,spn_612,spn_614,spn_6145,spn_6146,spn_6147,spn_6148,spn_624,spn_627,spn_628,spn_629,spn_630,spn_632,spn_633,spn_636,spn_639,spn_641,spn_647,spn_649,spn_651,spn_652,spn_653,spn_65302,spn_65303,spn_654,spn_655,spn_65535,spn_656,spn_6713,spn_6773,spn_6780,spn_6802,spn_70,spn_705,spn_709,spn_723,spn_729,spn_7321,spn_7323,spn_74,spn_75,spn_751,spn_752,spn_76339,spn_767,spn_768,spn_77,spn_78,spn_781,spn_78132,spn_7827,spn_7847,spn_7854,spn_788,spn_789,spn_790,spn_791,spn_792,spn_793,spn_794,spn_795,spn_796,spn_797,spn_798,spn_799,spn_800,spn_801,spn_802,spn_803,spn_805,spn_806,spn_807,spn_81,spn_810,spn_829,spn_830,spn_84,spn_862,spn_88121,spn_886,spn_904,spn_905,spn_906,spn_907,spn_91,spn_917,spn_92,spn_929,spn_9295,spn_932,spn_933,spn_934,spn_937,spn_938,spn_939,spn_94,spn_940,spn_941,spn_95,spn_96,spn_97,spn_976,spn_98,fmi_0,fmi_1,fmi_2,fmi_3,fmi_4,fmi_5,fmi_6,fmi_7,fmi_8,fmi_9,fmi_10,fmi_11,fmi_12,fmi_13,fmi_14,fmi_15,fmi_16,fmi_17,fmi_18,fmi_19,fmi_20,fmi_21,fmi_22,fmi_23,fmi_29,fmi_31,lamp_status_0,lamp_status_2,lamp_status_9,lamp_status_11,lamp_status_255,lamp_status_511,lamp_status_544,lamp_status_1023,lamp_status_1279,lamp_status_2035,lamp_status_2047,lamp_status_4351,lamp_status_5119,lamp_status_5375,lamp_status_6143,lamp_status_11801,lamp_status_16639,lamp_status_16895,lamp_status_17407,lamp_status_17663,lamp_status_18419,lamp_status_18431,lamp_status_20735,lamp_status_21503,lamp_status_22515,lamp_status_22527,lamp_status_28436,lamp_status_50175,lamp_status_50431,lamp_status_51199,lamp_status_55295,lamp_status_62463,lamp_status_63487,lamp_status_65535,ecu_model_0USA10_13_0405_2237A,ecu_model_0USA13_13_0415_2238A,ecu_model_202.35.0,ecu_model_6L u13D0890000000,ecu_model_6U13D13,ecu_model_6X1u10D1500000000,ecu_model_6X1u13D1500000000,ecu_model_6X1u17D1500000000,ecu_model_6X1u20D1500000000,ecu_model_CE,ecu_model_CECU3-NAMUX3,ecu_model_CECU3B-NAMUX4,ecu_model_E0031,ecu_model_EC60-adv,ecu_model_EC80ESP,ecu_model_EC80ESP AM000036,ecu_model_EC80ESP+,ecu_model_EEO-xxF112C,ecu_model_FAOM-xx810S-EC3,ecu_model_Gen 4 Boot Loader,ecu_model_MX,ecu_model_MX16U13D13,ecu_model_MX16U15D13,ecu_model_Y044053,ecu_model_Y049568,ecu_model_unknown
1450,2,100.0,14.5,True,64.6226,535207.6,190.4,98.0,35.38,213.575,1344.875,10315.5,26.0,81558.233872,19.21858,35.563064,True,105.8,True,64.37988,3276.75,76.969963,28.13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1451,1,100.0,13.63,True,64.6226,535974.7,201.2,100.0,35.38,221.7312,1481.0,10331.4,53.6,81674.733747,18.01659,35.563064,True,122.0,True,51.55439,3276.75,76.969963,30.45,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1452,1,0.0,14.4275,True,64.6226,537668.3,204.8,10.0,17.4,212.1125,650.75,10363.8,61.2,81922.527132,0.77931,35.563064,True,141.8,True,1.980621,3276.75,76.969963,1.16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1453,1,0.0,14.4275,True,64.6226,537780.4,190.4,16.0,18.56,204.8562,648.875,10366.5,52.8,81937.320767,1.096317,35.563064,True,134.6,True,0.0,3276.75,76.969963,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1454,2,0.0,13.63,True,64.6226,539272.0,78.8,31.0,41.18,75.2,651.125,10394.5,72.4,82175.471872,2.417182,35.563064,True,73.4,True,0.0,3276.75,76.969963,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [29]:
train_codes.columns.values

array(['active_transition_count', 'accelerator_pedal',
       'barometric_pressure', 'cruise_control_active',
       'cruise_control_set_speed', 'distance_ltd',
       'engine_coolant_temperature', 'engine_load', 'engine_oil_pressure',
       'engine_oil_temperature', 'engine_rpm', 'engine_time_ltd',
       'fuel_level', 'fuel_ltd', 'fuel_rate', 'fuel_temperature',
       'ign_status', 'intake_manifold_temperature', 'parking_brake',
       'speed', 'switched_battery_voltage', 'throttle',
       'turbo_boost_pressure', 'target', 'spn_0', 'spn_100', 'spn_101',
       'spn_102', 'spn_1023', 'spn_1024', 'spn_1028', 'spn_103',
       'spn_1043', 'spn_1045', 'spn_105', 'spn_1056', 'spn_1059',
       'spn_1067', 'spn_1068', 'spn_107', 'spn_1071', 'spn_1072',
       'spn_1075', 'spn_1078', 'spn_108', 'spn_1081', 'spn_110',
       'spn_111', 'spn_1127', 'spn_114863', 'spn_116', 'spn_1172',
       'spn_1176', 'spn_118', 'spn_1209', 'spn_1213', 'spn_1231',
       'spn_1235', 'spn_1236', 'spn_1239

In [31]:
train_codes.to_csv('data/train_codes_4hr.csv')

I keep getting errors about multi-index so will try setting a rolling window column on pre-dummies df

In [None]:
#train_roll = train.groupby('equipment_id').rolling(window ='8h', on = 'event_timestamp')['spn'].sum() 


In [None]:
#train_roll_csv = train_roll.to_csv('data/train_roll.csv')

In [None]:
# will not create new dataset and apply get dummies to spn, and then try rolling windows again 
# train_spn = pd.get_dummies(data=train, columns=['spn'])
# train_spn.head()

In [None]:
#spn_cols = [ x for x in train_spn.columns if 'spn_' in x ] 

In [None]:
#train_spn.groupby('equipment_id').rolling(window='8h', on = 'time_of_derate')[spn_cols].sum() 


In [None]:
#train_spn_roll_csv = train_spn_roll.to_csv('data/train_spn_roll.csv')