In [None]:
import pandas as pd
import numpy as np
import joblib


In [None]:
df = pd.read_csv("CO2 Emissions_Canada.csv")

print(df.head())
print(df.columns)


    Make       Model Vehicle Class  Engine Size(L)  Cylinders Transmission  \
0  ACURA         ILX       COMPACT             2.0          4          AS5   
1  ACURA         ILX       COMPACT             2.4          4           M6   
2  ACURA  ILX HYBRID       COMPACT             1.5          4          AV7   
3  ACURA     MDX 4WD   SUV - SMALL             3.5          6          AS6   
4  ACURA     RDX AWD   SUV - SMALL             3.5          6          AS6   

  Fuel Type  Fuel Consumption City (L/100 km)  \
0         Z                               9.9   
1         Z                              11.2   
2         Z                               6.0   
3         Z                              12.7   
4         Z                              12.1   

   Fuel Consumption Hwy (L/100 km)  Fuel Consumption Comb (L/100 km)  \
0                              6.7                               8.5   
1                              7.7                               9.6   
2                   

In [None]:
np.random.seed(42)

# Simulate vehicle age (0–15 years)
df['Vehicle Age'] = np.random.randint(0, 16, size=len(df))

# Simulate mileage based on age (realistic assumption)
df['Mileage'] = df['Vehicle Age'] * np.random.randint(8000, 15000, size=len(df))


In [None]:
important_features = [
    'Engine Size(L)',
    'Cylinders',
    'Fuel Consumption City (L/100 km)',
    'Fuel Consumption Hwy (L/100 km)',
    'Fuel Consumption Comb (L/100 km)',
    'Fuel Type',
    'Vehicle Age',
    'Mileage'
]

X = df[important_features]
y = df['CO2 Emissions(g/km)']


In [None]:
X = pd.get_dummies(X, columns=['Fuel Type'], drop_first=True)

print(X.head())


   Engine Size(L)  Cylinders  Fuel Consumption City (L/100 km)  \
0             2.0          4                               9.9   
1             2.4          4                              11.2   
2             1.5          4                               6.0   
3             3.5          6                              12.7   
4             3.5          6                              12.1   

   Fuel Consumption Hwy (L/100 km)  Fuel Consumption Comb (L/100 km)  \
0                              6.7                               8.5   
1                              7.7                               9.6   
2                              5.8                               5.9   
3                              9.1                              11.1   
4                              8.7                              10.6   

   Vehicle Age  Mileage  Fuel Type_E  Fuel Type_N  Fuel Type_X  Fuel Type_Z  
0            6    59808        False        False        False         True  
1            3

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [17]:
from sklearn.ensemble import RandomForestRegressor

rf_model = RandomForestRegressor(
    n_estimators=200,
    random_state=42
)

rf_model.fit(X_train, y_train)


In [21]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
y_pred = rf_model.predict(X_test)

# Calculate metrics
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

# Print results
print("Model Evaluation Results")
print("------------------------")
print("MAE :", mae)
print("MSE :", mse)
print("RMSE:", rmse)
print("R²  :", r2)


Model Evaluation Results
------------------------
MAE : 2.249116242705613
MSE : 16.32538365355596
RMSE: 4.040468246819415
R²  : 0.9952537348712317


In [None]:
joblib.dump(rf_model, "rf_model.pkl")
joblib.dump(X.columns, "model_columns.pkl")

print("Model and columns saved successfully")


Model and columns saved successfully


In [None]:
model_columns = joblib.load("model_columns.pkl")

user_input = pd.DataFrame(
    [[3.5, 6, 12, 8, 10, 'Z', 2, 10000]],
    columns=[
        'Engine Size(L)',
        'Cylinders',
        'Fuel Consumption City (L/100 km)',
        'Fuel Consumption Hwy (L/100 km)',
        'Fuel Consumption Comb (L/100 km)',
        'Fuel Type',
        'Vehicle Age',
        'Mileage'
    ]
)

user_input = pd.get_dummies(user_input, drop_first=True)
user_input = user_input.reindex(columns=model_columns, fill_value=0)

prediction = rf_model.predict(user_input)[0]
print("Predicted CO₂:", prediction)


Predicted CO₂: 246.6
