In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px 
plt.style.use('ggplot')



from sklearn.model_selection import train_test_split
from scipy.stats import norm
from IPython.core.display import HTML

%matplotlib inline

pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 500)

In [2]:
holdout = pd.read_csv('data/holdout.csv', low_memory=False)
holdout.head()


Unnamed: 0.1,Unnamed: 0,equipment_id,record_id,ess_id,event_timestamp,event_description,ecu_software_version,ecu_serial_number,ecu_model,ecu_make,ecu_source,spn,fmi,active,active_transition_count,mct_number,latitude,longitude,location_timestamp,accelerator_pedal,barometric_pressure,cruise_control_active,cruise_control_set_speed,distance_ltd,engine_coolant_temperature,engine_load,engine_oil_pressure,engine_oil_temperature,engine_rpm,engine_time_ltd,fuel_level,fuel_ltd,fuel_rate,fuel_temperature,ign_status,intake_manifold_temperature,lamp_status,parking_brake,speed,switched_battery_voltage,throttle,turbo_boost_pressure,5246_derate,1569_derate
0,0,1327,42689,2241022,2015-05-04 15:38:35,High Voltage (Aftertreatment 1 Particulate Tra...,unknown,unknown,unknown,unknown,49,50353,0,True,2,105383198,35.98875,-83.579583,2015-05-04 15:39:11,100.0,14.2825,True,62.13712,517711.8,183.2,90.0,33.64,216.6125,1359.125,10323.1,,78862.886425,16.85423,,True,113.0,2,True,64.68085,3276.75,,26.39,False,False
1,1,1327,45667,2296851,2015-05-07 06:52:14,Condition Exists Cruise Control Enable Switch,unknown,unknown,unknown,unknown,49,596,31,True,126,105383198,40.1975,-74.661435,2015-05-07 06:52:50,0.0,14.79,True,64.6226,518554.7,185.0,14.0,25.52,165.875,648.375,10338.7,,78990.877785,0.766101,,True,102.2,255,True,0.0,3276.75,,1.74,False,False
2,2,1327,58809,2507532,2015-05-19 12:02:55,Abnormal Update Rate Aftertreatment 1 Intake NOx,04993120*00001782*082113134117*07700053*I0*BBZ*,79419774,6X1u10D1500000000,CMMNS,0,3216,9,True,1,105383198,35.49125,-86.458842,2015-05-19 12:03:32,11.6,14.355,True,64.6226,520309.6,185.0,0.0,31.9,203.8438,1083.125,10374.8,,79274.466482,0.0,,True,109.4,17407,True,27.56364,3276.75,,2.9,False,False
3,3,1327,65879,2610228,2015-05-26 08:11:45,Abnormal Update Rate Aftertreatment 1 Intake NOx,04993120*00001782*082113134117*07700053*I0*BBZ*,79419774,6X1u10D1500000000,CMMNS,0,3216,9,True,1,105383198,35.490787,-86.433842,2015-05-21 14:40:18,11.6,14.355,True,64.6226,520309.6,185.0,0.0,31.9,203.8438,1083.125,10374.8,,79274.466482,0.0,,True,109.4,17407,True,27.56364,3276.75,,2.9,False,False
4,4,1327,65939,2611189,2015-05-26 08:44:10,Abnormal Update Rate Aftertreatment 1 Intake NOx,04993120*00001782*082113134117*07700053*I0*BBZ*,79419774,6X1u10D1500000000,CMMNS,0,3216,9,True,1,105383198,35.523703,-86.440787,2015-05-26 08:52:22,48.8,14.355,True,64.6226,520314.6,161.6,45.0,38.86,154.5125,1170.0,10375.15,,79275.391085,4.530565,,True,100.4,17407,True,28.79667,3276.75,,6.67,False,False


In [3]:
holdout.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 104537 entries, 0 to 104536
Data columns (total 44 columns):
 #   Column                       Non-Null Count   Dtype  
---  ------                       --------------   -----  
 0   Unnamed: 0                   104537 non-null  int64  
 1   equipment_id                 104537 non-null  object 
 2   record_id                    104537 non-null  int64  
 3   ess_id                       104537 non-null  int64  
 4   event_timestamp              104537 non-null  object 
 5   event_description            104537 non-null  object 
 6   ecu_software_version         104537 non-null  object 
 7   ecu_serial_number            104537 non-null  object 
 8   ecu_model                    104537 non-null  object 
 9   ecu_make                     104537 non-null  object 
 10  ecu_source                   104537 non-null  int64  
 11  spn                          104537 non-null  int64  
 12  fmi                          104537 non-null  int64  
 13 

In [4]:
holdout.shape
#should be 104537, 43

(104537, 44)

In [5]:
#determine the number of 5246 derates, should be 51 
holdout_derates = holdout[holdout['spn']==5246].count()


In [6]:
holdout_derates.shape


(44,)

In [7]:
#remove unnecessary columns
holdout = holdout.drop(columns = ['ess_id', 'ecu_software_version','ecu_serial_number','ecu_source', 'active', 'location_timestamp'])

In [8]:
#insert column which inserts timestamp for each 5246 derate
holdout['time_of_derate'] = holdout.loc[holdout['spn'] == 5246, 'time_of_derate'] = holdout.loc[holdout['spn'] == 5246, 'event_timestamp']
holdout

Unnamed: 0.1,Unnamed: 0,equipment_id,record_id,event_timestamp,event_description,ecu_model,ecu_make,spn,fmi,active_transition_count,mct_number,latitude,longitude,accelerator_pedal,barometric_pressure,cruise_control_active,cruise_control_set_speed,distance_ltd,engine_coolant_temperature,engine_load,engine_oil_pressure,engine_oil_temperature,engine_rpm,engine_time_ltd,fuel_level,fuel_ltd,fuel_rate,fuel_temperature,ign_status,intake_manifold_temperature,lamp_status,parking_brake,speed,switched_battery_voltage,throttle,turbo_boost_pressure,5246_derate,1569_derate,time_of_derate
0,0,1327,42689,2015-05-04 15:38:35,High Voltage (Aftertreatment 1 Particulate Tra...,unknown,unknown,50353,0,2,105383198,35.988750,-83.579583,100.0,14.2825,True,62.13712,517711.80,183.2,90.0,33.64,216.6125,1359.125,10323.10,,78862.886425,16.854230,,True,113.0,2,True,64.680850,3276.75,,26.39,False,False,
1,1,1327,45667,2015-05-07 06:52:14,Condition Exists Cruise Control Enable Switch,unknown,unknown,596,31,126,105383198,40.197500,-74.661435,0.0,14.7900,True,64.62260,518554.70,185.0,14.0,25.52,165.8750,648.375,10338.70,,78990.877785,0.766101,,True,102.2,255,True,0.000000,3276.75,,1.74,False,False,
2,2,1327,58809,2015-05-19 12:02:55,Abnormal Update Rate Aftertreatment 1 Intake NOx,6X1u10D1500000000,CMMNS,3216,9,1,105383198,35.491250,-86.458842,11.6,14.3550,True,64.62260,520309.60,185.0,0.0,31.90,203.8438,1083.125,10374.80,,79274.466482,0.000000,,True,109.4,17407,True,27.563640,3276.75,,2.90,False,False,
3,3,1327,65879,2015-05-26 08:11:45,Abnormal Update Rate Aftertreatment 1 Intake NOx,6X1u10D1500000000,CMMNS,3216,9,1,105383198,35.490787,-86.433842,11.6,14.3550,True,64.62260,520309.60,185.0,0.0,31.90,203.8438,1083.125,10374.80,,79274.466482,0.000000,,True,109.4,17407,True,27.563640,3276.75,,2.90,False,False,
4,4,1327,65939,2015-05-26 08:44:10,Abnormal Update Rate Aftertreatment 1 Intake NOx,6X1u10D1500000000,CMMNS,3216,9,1,105383198,35.523703,-86.440787,48.8,14.3550,True,64.62260,520314.60,161.6,45.0,38.86,154.5125,1170.000,10375.15,,79275.391085,4.530565,,True,100.4,17407,True,28.796670,3276.75,,6.67,False,False,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
104532,547416,310,174607,2015-08-20 10:19:55,Condition Exists Cruise Control Enable Switch,unknown,unknown,596,31,20,105442984,36.701111,-86.529861,0.8,14.3550,True,66.48672,96089.09,179.6,0.0,41.76,218.7500,1408.875,2232.65,54.0,13273.853097,0.000000,32.0,True,100.4,255,True,68.710060,3276.75,0.0,0.87,False,False,
104533,547762,R1762,4246,2015-02-24 13:45:06,Low (Severity Low) Catalyst Tank Level,6X1u13D1500000000,CMMNS,1761,17,43,105442816,41.254166,-85.088888,0.0,14.0650,True,66.48672,79796.12,185.0,83.0,42.34,222.6875,1333.250,1527.45,38.0,11368.512172,14.727640,32.0,True,82.4,1023,True,65.962430,3276.75,47.2,16.82,False,False,
104534,547763,R1762,4428,2015-02-24 15:31:17,Low (Severity Medium) Catalyst Tank Level,6X1u13D1500000000,CMMNS,1761,18,11,105442816,39.944444,-86.016990,0.0,14.1375,True,66.48672,79913.24,179.6,53.0,42.34,222.6875,1352.875,1529.25,22.0,11390.042194,9.166799,32.0,True,82.4,1023,True,66.836240,3276.75,67.2,8.99,False,False,
104535,547764,R1762,6439,2015-02-26 13:12:11,Low (Severity Medium) Catalyst Tank Level,6X1u13D1500000000,CMMNS,5848,9,1,105442816,39.952870,-81.936990,0.0,14.2100,True,66.48672,80838.70,134.6,11.0,38.86,148.7188,595.750,1547.55,22.4,11527.675833,0.660432,32.0,True,84.2,17407,True,2.058292,3276.75,0.0,0.29,False,False,


In [9]:
#convert time columns to datetime
holdout[['event_timestamp', 'time_of_derate']]=holdout[["event_timestamp", "time_of_derate"]].apply(pd.to_datetime, format='%Y-%m-%d %H:%M:%S.%f')

In [10]:
holdout['spn']=holdout['spn'].astype("str")

In [11]:
holdout.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 104537 entries, 0 to 104536
Data columns (total 39 columns):
 #   Column                       Non-Null Count   Dtype         
---  ------                       --------------   -----         
 0   Unnamed: 0                   104537 non-null  int64         
 1   equipment_id                 104537 non-null  object        
 2   record_id                    104537 non-null  int64         
 3   event_timestamp              104537 non-null  datetime64[ns]
 4   event_description            104537 non-null  object        
 5   ecu_model                    104537 non-null  object        
 6   ecu_make                     104537 non-null  object        
 7   spn                          104537 non-null  object        
 8   fmi                          104537 non-null  int64         
 9   active_transition_count      104537 non-null  int64         
 10  mct_number                   104537 non-null  int64         
 11  latitude                  

In [12]:
#conduct backfill on time_of_next_derate column
holdout['time_of_derate'] = holdout['time_of_derate'].fillna(method='bfill')

In [13]:
holdout['time_until_next_derate'] = holdout['time_of_derate'] - holdout['event_timestamp']
holdout.head()

Unnamed: 0.1,Unnamed: 0,equipment_id,record_id,event_timestamp,event_description,ecu_model,ecu_make,spn,fmi,active_transition_count,mct_number,latitude,longitude,accelerator_pedal,barometric_pressure,cruise_control_active,cruise_control_set_speed,distance_ltd,engine_coolant_temperature,engine_load,engine_oil_pressure,engine_oil_temperature,engine_rpm,engine_time_ltd,fuel_level,fuel_ltd,fuel_rate,fuel_temperature,ign_status,intake_manifold_temperature,lamp_status,parking_brake,speed,switched_battery_voltage,throttle,turbo_boost_pressure,5246_derate,1569_derate,time_of_derate,time_until_next_derate
0,0,1327,42689,2015-05-04 15:38:35,High Voltage (Aftertreatment 1 Particulate Tra...,unknown,unknown,50353,0,2,105383198,35.98875,-83.579583,100.0,14.2825,True,62.13712,517711.8,183.2,90.0,33.64,216.6125,1359.125,10323.1,,78862.886425,16.85423,,True,113.0,2,True,64.68085,3276.75,,26.39,False,False,2015-02-25 13:53:08,-69 days +22:14:33
1,1,1327,45667,2015-05-07 06:52:14,Condition Exists Cruise Control Enable Switch,unknown,unknown,596,31,126,105383198,40.1975,-74.661435,0.0,14.79,True,64.6226,518554.7,185.0,14.0,25.52,165.875,648.375,10338.7,,78990.877785,0.766101,,True,102.2,255,True,0.0,3276.75,,1.74,False,False,2015-02-25 13:53:08,-71 days +07:00:54
2,2,1327,58809,2015-05-19 12:02:55,Abnormal Update Rate Aftertreatment 1 Intake NOx,6X1u10D1500000000,CMMNS,3216,9,1,105383198,35.49125,-86.458842,11.6,14.355,True,64.6226,520309.6,185.0,0.0,31.9,203.8438,1083.125,10374.8,,79274.466482,0.0,,True,109.4,17407,True,27.56364,3276.75,,2.9,False,False,2015-02-25 13:53:08,-83 days +01:50:13
3,3,1327,65879,2015-05-26 08:11:45,Abnormal Update Rate Aftertreatment 1 Intake NOx,6X1u10D1500000000,CMMNS,3216,9,1,105383198,35.490787,-86.433842,11.6,14.355,True,64.6226,520309.6,185.0,0.0,31.9,203.8438,1083.125,10374.8,,79274.466482,0.0,,True,109.4,17407,True,27.56364,3276.75,,2.9,False,False,2015-02-25 13:53:08,-90 days +05:41:23
4,4,1327,65939,2015-05-26 08:44:10,Abnormal Update Rate Aftertreatment 1 Intake NOx,6X1u10D1500000000,CMMNS,3216,9,1,105383198,35.523703,-86.440787,48.8,14.355,True,64.6226,520314.6,161.6,45.0,38.86,154.5125,1170.0,10375.15,,79275.391085,4.530565,,True,100.4,17407,True,28.79667,3276.75,,6.67,False,False,2015-02-25 13:53:08,-90 days +05:08:58


In [14]:
holdout.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 104537 entries, 0 to 104536
Data columns (total 40 columns):
 #   Column                       Non-Null Count   Dtype          
---  ------                       --------------   -----          
 0   Unnamed: 0                   104537 non-null  int64          
 1   equipment_id                 104537 non-null  object         
 2   record_id                    104537 non-null  int64          
 3   event_timestamp              104537 non-null  datetime64[ns] 
 4   event_description            104537 non-null  object         
 5   ecu_model                    104537 non-null  object         
 6   ecu_make                     104537 non-null  object         
 7   spn                          104537 non-null  object         
 8   fmi                          104537 non-null  int64          
 9   active_transition_count      104537 non-null  int64          
 10  mct_number                   104537 non-null  int64          
 11  latitude     

There are some rows where time of derate and time until next derate are nulls.  This is because the equipment never
had a derate.  17 equipment ids and 3951 rows:  

In [29]:
no_derates = holdout[holdout['time_of_derate'].isnull()]
no_derates

Unnamed: 0.1,Unnamed: 0,equipment_id,record_id,event_timestamp,event_description,ecu_model,ecu_make,spn,fmi,active_transition_count,mct_number,latitude,longitude,accelerator_pedal,barometric_pressure,cruise_control_active,cruise_control_set_speed,distance_ltd,engine_coolant_temperature,engine_load,engine_oil_pressure,engine_oil_temperature,engine_rpm,engine_time_ltd,fuel_level,fuel_ltd,fuel_rate,fuel_temperature,ign_status,intake_manifold_temperature,lamp_status,parking_brake,speed,switched_battery_voltage,throttle,turbo_boost_pressure,5246_derate,1569_derate,time_of_derate,time_until_next_derate,target
100586,534180,2206,1236126,2000-06-08 12:06:28,Low (Severity High) Transmission Air Tank Pres...,EEO-xxF112C,EATON,37,1,1,105351510,41.625833,-88.100462,0.0,14.3550,True,0.00000,124808.90,170.6,16.0,24.94,182.8062,608.750,4425.30,47.6,17626.880170,0.686850,,True,122.0,50175,True,0.000000,,100.0,0.00,False,False,NaT,,0
100587,534360,2212,1211462,2000-03-19 11:22:12,Low (Severity Medium) Transmission Air Tank Pr...,EEO-xxF112C,EATON,37,18,37,105306529,35.256712,-81.042129,20.4,14.4275,True,0.00000,118713.30,181.4,29.0,17.98,204.5188,635.750,2837.60,52.8,15958.501575,1.281238,,True,123.8,50175,True,0.000000,,100.0,0.29,False,False,NaT,,0
100588,534361,2212,1211464,2000-03-19 11:23:00,Low (Severity High) Transmission Air Tank Pres...,EEO-xxF112C,EATON,37,1,5,105306529,35.256712,-81.042129,0.0,14.4275,True,0.00000,118713.30,181.4,14.0,18.56,203.2250,600.375,2837.65,53.2,15958.633661,0.620806,,True,104.0,50175,True,0.660207,,100.0,2.03,False,False,NaT,,0
100589,534658,2218,1212455,2011-01-01 05:09:43,Low (Severity Medium) Transmission Air Tank Pr...,EEO-xxF112C,EATON,37,18,5,105435718,40.226851,-76.721296,0.0,14.5000,True,0.00000,119422.70,132.8,20.0,35.38,138.1438,601.250,2488.75,54.8,16047.791729,0.792519,,True,105.8,50175,True,0.000000,,100.0,0.29,False,False,NaT,,0
100590,534659,2218,1212442,2011-01-01 05:09:43,Low (Severity Medium) Transmission Air Tank Pr...,EEO-xxF112C,EATON,37,18,5,105435718,40.226851,-76.721296,0.0,14.5000,True,0.00000,119422.70,132.8,20.0,35.38,138.1438,601.250,2488.75,54.8,16047.791729,0.792519,,True,105.8,50175,True,0.000000,,100.0,0.29,False,False,NaT,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
104532,547416,310,174607,2015-08-20 10:19:55,Condition Exists Cruise Control Enable Switch,unknown,unknown,596,31,20,105442984,36.701111,-86.529861,0.8,14.3550,True,66.48672,96089.09,179.6,0.0,41.76,218.7500,1408.875,2232.65,54.0,13273.853097,0.000000,32.0,True,100.4,255,True,68.710060,3276.75,0.0,0.87,False,False,NaT,,0
104533,547762,R1762,4246,2015-02-24 13:45:06,Low (Severity Low) Catalyst Tank Level,6X1u13D1500000000,CMMNS,1761,17,43,105442816,41.254166,-85.088888,0.0,14.0650,True,66.48672,79796.12,185.0,83.0,42.34,222.6875,1333.250,1527.45,38.0,11368.512172,14.727640,32.0,True,82.4,1023,True,65.962430,3276.75,47.2,16.82,False,False,NaT,,0
104534,547763,R1762,4428,2015-02-24 15:31:17,Low (Severity Medium) Catalyst Tank Level,6X1u13D1500000000,CMMNS,1761,18,11,105442816,39.944444,-86.016990,0.0,14.1375,True,66.48672,79913.24,179.6,53.0,42.34,222.6875,1352.875,1529.25,22.0,11390.042194,9.166799,32.0,True,82.4,1023,True,66.836240,3276.75,67.2,8.99,False,False,NaT,,0
104535,547764,R1762,6439,2015-02-26 13:12:11,Low (Severity Medium) Catalyst Tank Level,6X1u13D1500000000,CMMNS,5848,9,1,105442816,39.952870,-81.936990,0.0,14.2100,True,66.48672,80838.70,134.6,11.0,38.86,148.7188,595.750,1547.55,22.4,11527.675833,0.660432,32.0,True,84.2,17407,True,2.058292,3276.75,0.0,0.29,False,False,NaT,,0


In [16]:
no_derates['equipment_id'].value_counts()

305      1751
302       963
309       568
304       448
307       143
310        18
301        17
308        14
306        11
2336        5
R1762       3
2218        2
2236        2
2283        2
2212        2
R1764       1
2206        1
Name: equipment_id, dtype: int64

In [17]:
#convert the time_until_next_derate to hours
holdout['time_until_next_derate'] = holdout['time_until_next_derate'] / np.timedelta64(1, 'h')

In [18]:
#insert a column called 'target' which assigns a value.  Use 2 hours as the window period
holdout['target'] = holdout['time_until_next_derate'].apply(lambda x: "1" if x > 1 and x <= 2 else "0")
 

In [19]:
holdout.head()

Unnamed: 0.1,Unnamed: 0,equipment_id,record_id,event_timestamp,event_description,ecu_model,ecu_make,spn,fmi,active_transition_count,mct_number,latitude,longitude,accelerator_pedal,barometric_pressure,cruise_control_active,cruise_control_set_speed,distance_ltd,engine_coolant_temperature,engine_load,engine_oil_pressure,engine_oil_temperature,engine_rpm,engine_time_ltd,fuel_level,fuel_ltd,fuel_rate,fuel_temperature,ign_status,intake_manifold_temperature,lamp_status,parking_brake,speed,switched_battery_voltage,throttle,turbo_boost_pressure,5246_derate,1569_derate,time_of_derate,time_until_next_derate,target
0,0,1327,42689,2015-05-04 15:38:35,High Voltage (Aftertreatment 1 Particulate Tra...,unknown,unknown,50353,0,2,105383198,35.98875,-83.579583,100.0,14.2825,True,62.13712,517711.8,183.2,90.0,33.64,216.6125,1359.125,10323.1,,78862.886425,16.85423,,True,113.0,2,True,64.68085,3276.75,,26.39,False,False,2015-02-25 13:53:08,-1633.7575,0
1,1,1327,45667,2015-05-07 06:52:14,Condition Exists Cruise Control Enable Switch,unknown,unknown,596,31,126,105383198,40.1975,-74.661435,0.0,14.79,True,64.6226,518554.7,185.0,14.0,25.52,165.875,648.375,10338.7,,78990.877785,0.766101,,True,102.2,255,True,0.0,3276.75,,1.74,False,False,2015-02-25 13:53:08,-1696.985,0
2,2,1327,58809,2015-05-19 12:02:55,Abnormal Update Rate Aftertreatment 1 Intake NOx,6X1u10D1500000000,CMMNS,3216,9,1,105383198,35.49125,-86.458842,11.6,14.355,True,64.6226,520309.6,185.0,0.0,31.9,203.8438,1083.125,10374.8,,79274.466482,0.0,,True,109.4,17407,True,27.56364,3276.75,,2.9,False,False,2015-02-25 13:53:08,-1990.163056,0
3,3,1327,65879,2015-05-26 08:11:45,Abnormal Update Rate Aftertreatment 1 Intake NOx,6X1u10D1500000000,CMMNS,3216,9,1,105383198,35.490787,-86.433842,11.6,14.355,True,64.6226,520309.6,185.0,0.0,31.9,203.8438,1083.125,10374.8,,79274.466482,0.0,,True,109.4,17407,True,27.56364,3276.75,,2.9,False,False,2015-02-25 13:53:08,-2154.310278,0
4,4,1327,65939,2015-05-26 08:44:10,Abnormal Update Rate Aftertreatment 1 Intake NOx,6X1u10D1500000000,CMMNS,3216,9,1,105383198,35.523703,-86.440787,48.8,14.355,True,64.6226,520314.6,161.6,45.0,38.86,154.5125,1170.0,10375.15,,79275.391085,4.530565,,True,100.4,17407,True,28.79667,3276.75,,6.67,False,False,2015-02-25 13:53:08,-2154.850556,0


In [20]:
holdout.target.describe()
#Note there are only 50 rows that meet the 2-hour parameter

count     104537
unique         2
top            0
freq      104487
Name: target, dtype: object

In [21]:
#train_csv = train.to_csv('data/train.csv')

Create full df with hot-encoding of most variables, and target


In [22]:
holdout_codes=pd.get_dummies(data=holdout, columns=['spn','fmi','lamp_status', 'ecu_model'])

In [23]:
holdout_codes = holdout_codes.drop(columns=['equipment_id','record_id','event_timestamp', 'mct_number','latitude','longitude', 'ecu_make', 'event_description',
                                        '5246_derate', '1569_derate','time_of_derate', 'time_until_next_derate', 'spn_5246'])

In [24]:
holdout_codes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 104537 entries, 0 to 104536
Columns: 338 entries, Unnamed: 0 to ecu_model_unknown
dtypes: bool(3), float64(19), int64(2), object(1), uint8(313)
memory usage: 49.0+ MB


In [25]:
#there are columns where the fill forward/fill backward did not fully populate

In [26]:
# replace remaining nulls with each column's mean
column_means = holdout_codes.mean()
holdout_codes = holdout_codes.fillna(column_means)

In [27]:
holdout_codes.head()

Unnamed: 0.1,Unnamed: 0,active_transition_count,accelerator_pedal,barometric_pressure,cruise_control_active,cruise_control_set_speed,distance_ltd,engine_coolant_temperature,engine_load,engine_oil_pressure,engine_oil_temperature,engine_rpm,engine_time_ltd,fuel_level,fuel_ltd,fuel_rate,fuel_temperature,ign_status,intake_manifold_temperature,parking_brake,speed,switched_battery_voltage,throttle,turbo_boost_pressure,target,spn_0,spn_100,spn_101,spn_102,spn_1024,spn_1028,spn_103,spn_1045,spn_105,spn_1056,spn_1059,spn_1067,spn_1068,spn_1072,spn_1078,spn_110,spn_111,spn_1172,spn_1176,spn_1209,spn_1231,spn_1236,spn_1247,spn_1279,spn_1322,spn_1323,spn_1325,spn_1326,spn_1327,spn_1328,spn_1347,spn_1481,spn_1482,spn_1483,spn_1569,spn_157,spn_1668,spn_1675,spn_168,spn_17096,spn_171,spn_175,spn_17590,spn_1761,spn_177,spn_1787,spn_1807,spn_1808,spn_1809,spn_1815,spn_184,spn_188,spn_190,spn_2017,spn_2023,spn_235,spn_236,spn_245,spn_247,spn_248,spn_256,spn_25780,spn_2579,spn_2623,spn_2629,spn_2659,spn_27,spn_2791,spn_2795,spn_2863,spn_2866,spn_29902,spn_3031,spn_3058,spn_3060,spn_3064,spn_3216,spn_3217,spn_3218,spn_3226,spn_3228,spn_3241,spn_3242,spn_3245,spn_3246,spn_3249,spn_3251,spn_3253,spn_32894,spn_3360,spn_3361,spn_3362,spn_3363,spn_3364,spn_3464,spn_3480,spn_3482,spn_3490,spn_3509,spn_3510,spn_3511,spn_3513,spn_3514,spn_35527,spn_3556,spn_3584,spn_3597,spn_36017,spn_3605,spn_3610,spn_3663,spn_3697,spn_3698,spn_37,spn_3703,spn_3720,spn_37265,spn_3821,spn_3936,spn_4094,spn_4096,spn_411,spn_412,spn_42190,spn_4276,spn_43088,spn_4334,spn_4340,spn_4342,spn_4344,spn_4346,spn_4354,spn_4360,spn_4363,spn_4364,spn_4375,spn_4376,spn_444,spn_4607,spn_46262,spn_47284,spn_4752,spn_4765,spn_4794,spn_4796,spn_4811,spn_5019,spn_5024,spn_5031,spn_50353,spn_5113,spn_512,spn_51923,spn_520200,spn_520203,spn_520298,spn_520302,spn_520330,spn_523530,spn_523531,spn_523543,spn_524033,spn_524037,spn_524287,spn_5298,spn_5319,spn_5394,spn_53958,spn_5396,spn_5397,spn_5442,spn_5443,spn_5444,spn_54478,spn_5491,spn_5571,spn_558,spn_5585,spn_563,spn_56503,spn_5742,spn_5743,spn_576,spn_578,spn_5848,spn_5851,spn_5853,spn_5862,spn_596,spn_609,spn_611,spn_612,spn_614,spn_624,spn_627,spn_629,spn_630,spn_632,spn_636,spn_639,spn_641,spn_647,spn_649,spn_651,spn_652,spn_65287,spn_653,spn_654,spn_655,spn_65535,spn_656,spn_677,spn_70,spn_723,spn_768,spn_77,spn_789,spn_790,spn_791,spn_792,spn_793,spn_794,spn_795,spn_797,spn_798,spn_799,spn_800,spn_801,spn_802,spn_803,spn_806,spn_807,spn_81,spn_810,spn_811,spn_829,spn_84,spn_862,spn_886,spn_905,spn_907,spn_91,spn_917,spn_929,spn_94,spn_95,spn_96,spn_97,spn_98,fmi_0,fmi_1,fmi_2,fmi_3,fmi_4,fmi_5,fmi_6,fmi_7,fmi_8,fmi_9,fmi_10,fmi_11,fmi_12,fmi_13,fmi_14,fmi_15,fmi_16,fmi_17,fmi_18,fmi_19,fmi_20,fmi_21,fmi_23,fmi_31,lamp_status_0,lamp_status_2,lamp_status_9,lamp_status_11,lamp_status_255,lamp_status_511,lamp_status_617,lamp_status_1023,lamp_status_1279,lamp_status_2035,lamp_status_2047,lamp_status_4351,lamp_status_5119,lamp_status_5375,lamp_status_6143,lamp_status_16639,lamp_status_16895,lamp_status_17407,lamp_status_17663,lamp_status_18419,lamp_status_18431,lamp_status_21503,lamp_status_22527,lamp_status_50175,lamp_status_51199,lamp_status_62463,lamp_status_63487,lamp_status_65535,ecu_model_0USA13_13_0415_2238A,ecu_model_20412511P07,ecu_model_6X1u10D1500000000,ecu_model_6X1u13D1500000000,ecu_model_6X1u17D1500000000,ecu_model_CECU3B-NAMUX4,ecu_model_EC60-adv,ecu_model_EC80ESP,ecu_model_EEO-xxF112C,ecu_model_MX,ecu_model_________Y043718,ecu_model_unknown
0,0,2,100.0,14.2825,True,62.13712,517711.8,183.2,90.0,33.64,216.6125,1359.125,10323.1,65.16582,78862.886425,16.85423,41.592694,True,113.0,True,64.68085,3276.75,23.091571,26.39,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1,1,126,0.0,14.79,True,64.6226,518554.7,185.0,14.0,25.52,165.875,648.375,10338.7,65.16582,78990.877785,0.766101,41.592694,True,102.2,True,0.0,3276.75,23.091571,1.74,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2,2,1,11.6,14.355,True,64.6226,520309.6,185.0,0.0,31.9,203.8438,1083.125,10374.8,65.16582,79274.466482,0.0,41.592694,True,109.4,True,27.56364,3276.75,23.091571,2.9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
3,3,1,11.6,14.355,True,64.6226,520309.6,185.0,0.0,31.9,203.8438,1083.125,10374.8,65.16582,79274.466482,0.0,41.592694,True,109.4,True,27.56364,3276.75,23.091571,2.9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
4,4,1,48.8,14.355,True,64.6226,520314.6,161.6,45.0,38.86,154.5125,1170.0,10375.15,65.16582,79275.391085,4.530565,41.592694,True,100.4,True,28.79667,3276.75,23.091571,6.67,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0


In [28]:
holdout_codes.to_csv('data/holdout_codes_2hr.csv')