In [13]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 

In [14]:
total = pd.read_csv('./total_v0.csv').iloc[:, :-2]

In [15]:
'''calculate fuel rate using the formula: 
fuel rate [L/H] = (MAF * 3600) / AFR / FD
where AFR = air-to-fuel ratio and FD = fuel density
both AFR and FD are constants 

calculate fuel consumption using the formula: 
fuel consumption [L/100KM] = fuel rate / speed * 100 '''

total['FUEL_RATE'] = (total['MAF'] * 3600) / 14.7 / 820
total['FUEL_RATE_SQRT'] = np.sqrt(total['FUEL_RATE'])
total['MAF_SQRT'] = np.sqrt(total['MAF'])
total['ENGINE_RPM_SQRT'] = np.sqrt(total['ENGINE_RPM'])
total['ENGINE_RPM_PWR_2'] = np.power(total['ENGINE_RPM'],2)
total['ENGINE_RPM_PWR_3'] = np.power(total['ENGINE_RPM'],3)
total['MAF_PWR_2'] = np.power(total['MAF'],2)
total['AIR_INTAKE_TEMP_SQRT'] = np.sqrt(total['AIR_INTAKE_TEMP'])
total['AIR_INTAKE_TEMP_PWR_2'] = np.power(total['AIR_INTAKE_TEMP'],2)
total['AIR_INTAKE_TEMP_PWR_3'] = np.power(total['AIR_INTAKE_TEMP'],3)
total['SPEED_SQRT'] = np.sqrt(total['SPEED'])
total['SPEED_PWR_2'] = np.power(total['SPEED'],2)
total['SPEED_PWR_3'] = np.power(total['SPEED'],3)
total['THROTTLE_POS_SQRT'] = np.sqrt(total['THROTTLE_POS'])
total['ENGINE_LOAD_SQRT'] = np.sqrt(total['ENGINE_LOAD'])
total['ENGINE_LOAD_PWR_2'] = np.power(total['ENGINE_LOAD'],2)
total['SPEED_X_AIT'] = total['SPEED'] * total['AIR_INTAKE_TEMP']
total['AIT_X_EL'] = total['AIR_INTAKE_TEMP'] * total['ENGINE_LOAD']
total['TIMING_ADVANCE_SQRT'] = np.sqrt(total['TIMING_ADVANCE'])
total['FUEL_LEVEL_SQRT'] = np.sqrt(total['FUEL_LEVEL'])
total['FUEL_LEVEL_PWR_2'] = np.power(total['FUEL_LEVEL'],2)
total['FUEL_LEVEL_PWR_3'] = np.power(total['FUEL_LEVEL'],3)
total['INTAKE_MANIFOLD_PRESSURE_SQRT'] = np.sqrt(total['INTAKE_MANIFOLD_PRESSURE'])
total['INTAKE_MANIFOLD_PRESSURE_PWR_2'] = np.power(total['INTAKE_MANIFOLD_PRESSURE'],2)
total['SPEED_EL_AIT'] = total['AIR_INTAKE_TEMP'] * total['ENGINE_LOAD'] * total['SPEED']



In [16]:
# adding more features -> 95 features
total['ENGINE_LOAD_PWR_3'] = np.power(total['ENGINE_LOAD'],3)
total['SPEED_X_ENGINE_LOAD'] = total['SPEED'] * total['ENGINE_LOAD']
total['1'] = total['SPEED_SQRT'] * total['TIMING_ADVANCE']
total['2'] = total['AIR_INTAKE_TEMP_SQRT'] * total['MAF']
total['3'] = total['ENGINE_LOAD_SQRT'] * total['SPEED_X_AIT']
total['4'] = total['SPEED_SQRT'] * total['AIR_INTAKE_TEMP_SQRT']
total['5'] = total['AIR_INTAKE_TEMP_SQRT'] * total['ENGINE_LOAD_SQRT']
total['6'] = total['SPEED_SQRT'] * total['ENGINE_LOAD_SQRT']
total['7'] = total['AIR_INTAKE_TEMP_PWR_2'] * total['SPEED_PWR_2']
total['8'] = total['AIR_INTAKE_TEMP_PWR_2'] * total['ENGINE_LOAD_SQRT']
total['9'] = total['ENGINE_LOAD_SQRT'] * total['SPEED_PWR_2']
total['10'] = total['AIR_INTAKE_TEMP_PWR_3'] * total['SPEED_PWR_3']
total['11'] = total['AIR_INTAKE_TEMP_PWR_3'] * total['ENGINE_LOAD_SQRT']
total['12'] = total['ENGINE_LOAD_SQRT'] * total['SPEED_PWR_3']
total['13'] = total['SPEED_SQRT'] * total['AIR_INTAKE_TEMP'] * total['TIMING_ADVANCE']
total['14'] = total['SPEED_SQRT'] * total['THROTTLE_POS'] * total['TIMING_ADVANCE']
total['15'] = total['SPEED_SQRT'] * total['AIT_X_EL'] * total['SPEED_X_AIT']
total['16'] = total['TIMING_ADVANCE'] * total['SPEED_PWR_2']
total['17'] = total['THROTTLE_POS'] * total['SPEED_SQRT']
total['18'] = total['TIMING_ADVANCE'] * total['AIR_INTAKE_TEMP_SQRT']
total['19'] = total['TIMING_ADVANCE'] * total['SPEED_PWR_3']
total['20'] = total['THROTTLE_POS'] * total['AIR_INTAKE_TEMP_PWR_3']
total['21'] = total['TIMING_ADVANCE'] * total['ENGINE_RPM']
total['22'] = total['SPEED_SQRT'] * total['AIR_INTAKE_TEMP'] * total['ENGINE_RPM']
total['23'] = total['SPEED_SQRT'] * total['AIR_INTAKE_TEMP'] * total['THROTTLE_POS']
total['24'] = total['SPEED_SQRT'] * total['AIR_INTAKE_TEMP_PWR_2']
total['25'] = total['SPEED_SQRT'] * total['AIR_INTAKE_TEMP_PWR_3']
total['26'] = total['SPEED_SQRT'] * total['THROTTLE_POS_SQRT']
total['27'] = total['SPEED_PWR_3'] * total['AIR_INTAKE_TEMP_PWR_2']
total['28'] = total['1'] * total['1']
total['29'] = total['5'] * total['2']
total['30'] = total['3'] * total['4']
total['31'] = total['13'] * total['14']
total['32'] = total['15'] * total['18']
total['33'] = total['17'] * total['18']
total['34'] = total['4'] * total['5']
total['35'] = total['6'] * total['7']
total['36'] = total['8'] * total['9']
total['37'] = total['20'] * total['27']
total['38'] = total['22'] * total['23']
total['39'] = total['24'] * total['25']
total['40'] = total['10'] * total['11']
total['41'] = total['27'] * total['26']
total['42'] = total['26'] * total['25']
total['43'] = total['26'] * total['24']
total['44'] = total['26'] * total['23']
total['45'] = total['9'] * total['4']
total['46'] = total['4'] * total['12']
total['47'] = total['4'] * total['18']
total['48'] = total['10'] * total['23']
total['49'] = total['8'] * total['1']
total['50'] = total['20'] * total['11']
total['51'] = total['1'] * total['12']
total['52'] = total['24'] * total['13']
total['53'] = total['10'] * total['14']
total['FUEL_CONSUMPTION'] = (total['FUEL_RATE'] / total['SPEED']) * 100

In [17]:
total.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17105 entries, 0 to 17104
Data columns (total 96 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   BAROMETRIC_PRESSURE(KPA)        17105 non-null  float64
 1   ENGINE_COOLANT_TEMP             17105 non-null  float64
 2   FUEL_LEVEL                      17105 non-null  float64
 3   ENGINE_LOAD                     17105 non-null  float64
 4   AMBIENT_AIR_TEMP                17105 non-null  float64
 5   ENGINE_RPM                      17105 non-null  float64
 6   INTAKE_MANIFOLD_PRESSURE        17105 non-null  float64
 7   MAF                             17105 non-null  float64
 8   AIR_INTAKE_TEMP                 17105 non-null  float64
 9   SPEED                           17105 non-null  float64
 10  SHORT TERM FUEL TRIM BANK 1     17105 non-null  float64
 11  ENGINE_RUNTIME                  17105 non-null  float64
 12  THROTTLE_POS                    

In [18]:
# split shuffled train and test sets
train_shuffled = total.iloc[:10000, :]
test_shuffled = total.iloc[10000:, :]

In [19]:
total.to_csv("{}.csv".format('total_v3'), index = False)
train_shuffled.to_csv("{}.csv".format('train_shuffled_v3'), index = False)
test_shuffled.to_csv("{}.csv".format('test_shuffled_v3'), index = False)