# 🌍 CO2 Emissions Estimation from Vehicle Specs

This notebook demonstrates how to predict vehicle CO2 emissions using features like engine size, fuel consumption, and fuel type. We use a regression model (Random Forest) with feature scaling and categorical encoding.

In [None]:
# 📦 Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import warnings
warnings.filterwarnings("ignore")


## 📂 Load Dataset

In [None]:
# Update path as needed
df = pd.read_csv("co2_emissions.csv")
df.head()

## 🧾 Dataset Overview

In [None]:
df.info()

In [None]:
df.describe()

## 📊 Data Visualization and Correlations

In [None]:
sns.pairplot(df[['EngineSize', 'FuelConsumption_Comb', 'Cylinders', 'CO2Emissions']])
plt.suptitle("Pairwise Relationships", y=1.02)
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.heatmap(df.corr(), annot=True, cmap="coolwarm")
plt.title("Correlation Heatmap")
plt.show()

## 🛠️ Feature and Target Separation

In [None]:
X = df.drop(columns=["CO2Emissions"])
y = df["CO2Emissions"]

numeric_features = ["EngineSize", "Cylinders", "FuelConsumption_City", "FuelConsumption_Hwy", "FuelConsumption_Comb"]
categorical_features = ["FuelType"]


## 🔀 Train/Test Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## 🤖 Model Training: Random Forest Regressor

In [None]:

preprocessor = ColumnTransformer([
    ("num", StandardScaler(), numeric_features),
    ("cat", OneHotEncoder(), categorical_features)
])

pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("model", RandomForestRegressor(n_estimators=100, random_state=42))
])

pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)


## 📈 Model Evaluation

In [None]:

mae = mean_absolute_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)

print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R² Score: {r2:.2f}")


## 🧪 Actual vs Predicted CO2 Emissions

In [None]:

plt.figure(figsize=(8,6))
plt.scatter(y_test, y_pred, alpha=0.4)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], "r--")
plt.xlabel("Actual CO2 Emissions")
plt.ylabel("Predicted CO2 Emissions")
plt.title("Actual vs Predicted CO2 Emissions")
plt.grid(True)
plt.show()
