In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
data = pd.read_csv('data.csv')
data

Unnamed: 0,Fuel_consumption,Accelerator_Pedal_value,Throttle_position_signal,Short_Term_Fuel_Trim_Bank1,Intake_air_pressure,Filtered_Accelerator_Pedal_value,Absolute_throttle_position,Engine_soacking_time,Inhibition_of_engine_fuel_cut_off,Engine_in_fuel_cut_off,...,Acceleration_speed_-_Longitudinal,Indication_of_brake_switch_ON/OFF,Master_cylinder_pressure,Calculated_road_gradient,Acceleration_speed_-_Lateral,Steering_wheel_speed,Steering_wheel_angle,Time(s),Class,PathOrder
0,268.8,0.0,5.2,0.0,33,0,13.3,3,0,0,...,-8.5,1,325.5,0.0,-8.8,0,-3.4,1,A,1
1,243.2,0.0,6.1,0.0,40,0,13.7,3,0,0,...,0.1,1,0.9,0.0,-0.2,0,-3.6,2,A,1
2,217.6,0.0,5.2,0.0,41,0,13.7,3,0,0,...,0.1,1,0.9,0.0,-0.2,0,-3.6,3,A,1
3,204.8,0.0,4.7,0.0,38,0,13.3,3,0,0,...,0.1,1,0.9,0.0,-0.2,0,-3.6,4,A,1
4,217.6,0.0,5.7,0.0,40,0,13.7,3,0,0,...,0.1,1,0.9,0.0,-0.2,0,-3.5,5,A,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94375,345.6,0.0,6.6,7.0,0,0,14.5,6,0,0,...,-0.2,2,2.3,0.0,0.0,0,-13.2,2564,D,2
94376,345.6,0.0,6.6,7.0,0,0,14.5,6,0,0,...,0.1,2,8.7,0.0,-0.1,0,-13.0,2565,D,2
94377,345.6,0.0,6.6,7.0,0,0,14.5,6,0,0,...,-0.2,2,12.6,0.0,0.0,0,-13.2,2566,D,2
94378,332.8,0.0,5.7,6.3,0,0,14.1,6,0,0,...,-0.2,2,13.0,0.0,0.0,0,-13.3,2567,D,2


In [3]:
data=data.dropna()

In [4]:
data.dtypes

Fuel_consumption                                float64
Accelerator_Pedal_value                         float64
Throttle_position_signal                        float64
Short_Term_Fuel_Trim_Bank1                      float64
Intake_air_pressure                               int64
Filtered_Accelerator_Pedal_value                  int64
Absolute_throttle_position                      float64
Engine_soacking_time                              int64
Inhibition_of_engine_fuel_cut_off                 int64
Engine_in_fuel_cut_off                            int64
Fuel_Pressure                                     int64
Long_Term_Fuel_Trim_Bank1                       float64
Engine_speed                                      int64
Engine_torque_after_correction                  float64
Torque_of_friction                              float64
Flywheel_torque_(after_torque_interventions)    float64
Current_spark_timing                            float64
Engine_coolant_temperature                      

In [5]:
from sklearn.preprocessing import LabelEncoder
# Encode the 'Class' column
data['Class'] = LabelEncoder().fit_transform(data['Class'].astype(str))


In [6]:
corr_matrix = data.corr()

In [7]:
# Get the correlation coefficients with the output feature
corr_with_fuel_consumption = corr_matrix['Fuel_consumption'].abs().sort_values(ascending=False)

# Print the top correlated features
print(corr_with_fuel_consumption.head(20))

Fuel_consumption                                1.000000
Engine_torque_after_correction                  0.884005
Engine_torque                                   0.882200
Absolute_throttle_position                      0.823044
Throttle_position_signal                        0.815271
Calculated_LOAD_value                           0.779371
Flywheel_torque_(after_torque_interventions)    0.770214
Flywheel_torque                                 0.768785
Engine_speed                                    0.731023
Accelerator_Pedal_value                         0.680555
Intake_air_pressure                             0.631606
Maximum_indicated_engine_torque                 0.597687
Torque_converter_turbine_speed_-_Unfiltered     0.589829
Torque_converter_speed                          0.589325
Acceleration_speed_-_Longitudinal               0.582281
Minimum_indicated_engine_torque                 0.538849
Wheel_velocity_front_left-hand                  0.439027
Wheel_velocity_front_right-hand

In [8]:

# Create a new dataframe with the selected features and the output feature
features = ['Absolute_throttle_position', 'Engine_speed','Current_Gear','Steering_wheel_angle','Steering_wheel_speed','Acceleration_speed_-_Longitudinal','Calculated_LOAD_value','Engine_coolant_temperature','Activation_of_Air_compressor','Intake_air_pressure','Engine_torque','Vehicle_speed','Accelerator_Pedal_value','Indication_of_brake_switch_ON/OFF']
X = data[features]
y = data['Fuel_consumption']

In [9]:
X

Unnamed: 0,Absolute_throttle_position,Engine_speed,Current_Gear,Steering_wheel_angle,Steering_wheel_speed,Acceleration_speed_-_Longitudinal,Calculated_LOAD_value,Engine_coolant_temperature,Activation_of_Air_compressor,Intake_air_pressure,Engine_torque,Vehicle_speed,Accelerator_Pedal_value,Indication_of_brake_switch_ON/OFF
0,13.3,929,0,-3.4,0,-8.5,23.9,95,0,33,5.5,0,0.0,1
1,13.7,726,0,-3.6,0,0.1,30.6,95,0,40,7.0,0,0.0,1
2,13.7,685,0,-3.6,0,0.1,31.8,95,0,41,7.0,0,0.0,1
3,13.3,675,0,-3.6,0,0.1,29.0,95,0,38,7.0,0,0.0,1
4,13.7,716,0,-3.5,0,0.1,30.2,95,1,40,8.2,0,0.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94375,14.5,669,14,-13.2,0,-0.2,45.5,86,1,0,20.3,0,0.0,2
94376,14.5,669,14,-13.0,0,0.1,46.3,86,1,0,20.3,0,0.0,2
94377,14.5,667,0,-13.2,0,-0.2,45.9,86,1,0,20.3,0,0.0,2
94378,14.1,657,0,-13.3,0,-0.2,44.3,86,1,0,18.0,0,0.0,2


In [10]:
# Outlier
def detect_outliers_iqr(data, feature):
  q1 = data[feature].quantile(0.25)
  q3 = data[feature].quantile(0.75)
  iqr = q3 - q1
  lower_bound = q1 - 1.5 * iqr
  upper_bound = q3 + 1.5 * iqr
  return data[(data[feature] > lower_bound) & (data[feature] < upper_bound)]

# Apply outlier detection to each feature
for feature in features:
  data = detect_outliers_iqr(data, feature)



In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [12]:
# xgboost regressor

from xgboost import XGBRegressor

xgb_model = XGBRegressor(random_state=42)
xgb_model.fit(X_train, y_train)
y_pred_xgb = xgb_model.predict(X_test)

mse_xgb = mean_squared_error(y_test, y_pred_xgb)
r2_xgb = r2_score(y_test, y_pred_xgb)

print('XGBoost Regressor:')
print('Mean Squared Error:', mse_xgb)
print('R^2 Score:', r2_xgb)


XGBoost Regressor:
Mean Squared Error: 8067.118497340233
R^2 Score: 0.9858503510989212


In [14]:
new_data = {
    'Absolute_throttle_position': [50],  # Mid-range throttle position (example)
    'Engine_speed': [2500],  # Highway cruising speed RPM (example)
    'Current_Gear': 4,  # Estimate based on speed (replace with your car's gear)
    'Steering_wheel_angle': 5,  # Small steering angle for straight driving (example)
    'Steering_wheel_speed': 100,  # Slow turning motion (example)
    'Acceleration_speed_-_Longitudinal': 1,  # Slight acceleration (example)
    'Calculated_LOAD_value': 40,  # Engine load dependent, consult manual (example)
    'Engine_coolant_temperature': 90,  # Typical operating temperature (example)
    'Activation_of_Air_compressor': 0,  # Air compressor likely off (example)
    'Intake_air_pressure': 100,  # Common air pressure value (example)
    'Engine_torque': [30],  # Engine power and RPM dependent, consult manual (example)
    'Vehicle_speed': [80],  # Highway cruising speed (example)
    'Accelerator_Pedal_value': 60,  # Moderate pedal press (example)
    'Indication_of_brake_switch_ON/OFF': 0  # Brakes likely not applied (example)
}



new_data_df = pd.DataFrame(new_data)
predicted_fuel_consumption = xgb_model.predict(new_data_df)

print("Predicted Fuel Consumption (MCC):")
print(predicted_fuel_consumption[0])

Predicted Fuel Consumption (MCC):
969.64703


In [None]:
import pickle

with open('xgboost_model.pkl', 'wb') as f:
  pickle.dump(xgb_model, f)
