# Carbon footprint Prediction

In [38]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score

In [17]:
df = pd.read_csv('data/carbon_footprint_india.csv')
df.head()

Unnamed: 0,Year,State/UT,Sector,CO₂ Emissions (Million Tonnes),CH₄ Emissions (Million Tonnes CO₂e),N₂O Emissions (Million Tonnes CO₂e),Energy Consumption (TWh),Electricity Generation (TWh),Fossil Fuel Consumption (MTOE),Vehicle Count (Million Units),Industrial Output (GDP Contribution in ₹ Cr),Agricultural Land Usage (Million Hectares),Deforestation Rate (sq. km per year),Waste Generation (Million Tonnes),Renewable Energy Share (%)
0,2006,Gujarat,Energy,224.26,14.9,9.02,40.25,279.66,107.25,47.16,9144.38,48.15,107.42,47.2,6.88
1,2019,Jharkhand,Waste,132.17,4.2,6.28,152.16,188.59,218.93,35.95,36044.41,46.64,27.46,35.83,39.76
2,2014,Meghalaya,Transport,341.46,12.53,9.38,269.18,81.57,17.66,30.39,36520.51,42.7,31.41,27.45,18.72
3,2010,Kerala,Energy,71.69,47.87,13.96,373.22,174.96,14.73,8.88,71955.77,37.77,51.19,91.14,69.71
4,2007,Tripura,Agriculture,430.54,15.94,9.69,416.05,86.6,162.34,23.88,68045.72,27.01,58.79,61.55,59.24


In [18]:
GWP_CH4 = 28  # Methane GWP
GWP_N2O = 265  # Nitrous Oxide GWP

# Calculate total carbon footprint
df["Carbon Footprint (Million Tonnes CO₂e)"] = (
    df["CO₂ Emissions (Million Tonnes)"] +
    (df["CH₄ Emissions (Million Tonnes CO₂e)"] * GWP_CH4) +
    (df["N₂O Emissions (Million Tonnes CO₂e)"] * GWP_N2O)
)

In [19]:
df.head()

Unnamed: 0,Year,State/UT,Sector,CO₂ Emissions (Million Tonnes),CH₄ Emissions (Million Tonnes CO₂e),N₂O Emissions (Million Tonnes CO₂e),Energy Consumption (TWh),Electricity Generation (TWh),Fossil Fuel Consumption (MTOE),Vehicle Count (Million Units),Industrial Output (GDP Contribution in ₹ Cr),Agricultural Land Usage (Million Hectares),Deforestation Rate (sq. km per year),Waste Generation (Million Tonnes),Renewable Energy Share (%),Carbon Footprint (Million Tonnes CO₂e)
0,2006,Gujarat,Energy,224.26,14.9,9.02,40.25,279.66,107.25,47.16,9144.38,48.15,107.42,47.2,6.88,3031.76
1,2019,Jharkhand,Waste,132.17,4.2,6.28,152.16,188.59,218.93,35.95,36044.41,46.64,27.46,35.83,39.76,1913.97
2,2014,Meghalaya,Transport,341.46,12.53,9.38,269.18,81.57,17.66,30.39,36520.51,42.7,31.41,27.45,18.72,3178.0
3,2010,Kerala,Energy,71.69,47.87,13.96,373.22,174.96,14.73,8.88,71955.77,37.77,51.19,91.14,69.71,5111.45
4,2007,Tripura,Agriculture,430.54,15.94,9.69,416.05,86.6,162.34,23.88,68045.72,27.01,58.79,61.55,59.24,3444.71


In [20]:
df.drop(columns=['Year'], inplace=True)

In [21]:
df = pd.get_dummies(df, columns=['Sector'], drop_first=True)

In [22]:
df = pd.get_dummies(df, columns=['State/UT'], drop_first=True)

In [23]:
df.head()

Unnamed: 0,CO₂ Emissions (Million Tonnes),CH₄ Emissions (Million Tonnes CO₂e),N₂O Emissions (Million Tonnes CO₂e),Energy Consumption (TWh),Electricity Generation (TWh),Fossil Fuel Consumption (MTOE),Vehicle Count (Million Units),Industrial Output (GDP Contribution in ₹ Cr),Agricultural Land Usage (Million Hectares),Deforestation Rate (sq. km per year),...,State/UT_Puducherry,State/UT_Punjab,State/UT_Rajasthan,State/UT_Sikkim,State/UT_Tamil Nadu,State/UT_Telangana,State/UT_Tripura,State/UT_Uttar Pradesh,State/UT_Uttarakhand,State/UT_West Bengal
0,224.26,14.9,9.02,40.25,279.66,107.25,47.16,9144.38,48.15,107.42,...,0,0,0,0,0,0,0,0,0,0
1,132.17,4.2,6.28,152.16,188.59,218.93,35.95,36044.41,46.64,27.46,...,0,0,0,0,0,0,0,0,0,0
2,341.46,12.53,9.38,269.18,81.57,17.66,30.39,36520.51,42.7,31.41,...,0,0,0,0,0,0,0,0,0,0
3,71.69,47.87,13.96,373.22,174.96,14.73,8.88,71955.77,37.77,51.19,...,0,0,0,0,0,0,0,0,0,0
4,430.54,15.94,9.69,416.05,86.6,162.34,23.88,68045.72,27.01,58.79,...,0,0,0,0,0,0,1,0,0,0


In [24]:
X = df.drop(columns=['Carbon Footprint (Million Tonnes CO₂e)'])
Y = df['Carbon Footprint (Million Tonnes CO₂e)']

In [25]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [27]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [33]:
linear_model = LinearRegression()
linear_model.fit(X_train_scaled, Y_train)

LinearRegression()

In [34]:
tree_model = DecisionTreeRegressor(random_state=42)
tree_model.fit(X_train_scaled, Y_train)

DecisionTreeRegressor(random_state=42)

In [35]:
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, Y_train)

RandomForestRegressor(random_state=42)

In [37]:
Y_pred = linear_model.predict(X_test_scaled)
Y_pred

array([5051.33, 3530.23, 4850.84, ..., 5981.59, 2636.33, 6164.88])

In [39]:
mae = mean_absolute_error(Y_test, Y_pred)
mse = mean_squared_error(Y_test, Y_pred)
r2 = r2_score(Y_test, Y_pred)

In [40]:
print(f"{mae}, {mse}, {r2}")

2.7732767193811012e-12, 1.1888663047441193e-23, 1.0


In [41]:
Y_pred = tree_model.predict(X_test_scaled)
Y_pred

array([5116.22, 3602.14, 4972.44, ..., 6017.98, 2672.67, 6208.79])

In [42]:
mae = mean_absolute_error(Y_test, Y_pred)
mse = mean_squared_error(Y_test, Y_pred)
r2 = r2_score(Y_test, Y_pred)

In [43]:
print(f"{mae}, {mse}, {r2}")

82.5738, 10840.0223363, 0.9955045329961458


In [44]:
Y_pred = rf_model.predict(X_test_scaled)
Y_pred

array([5036.0842, 3495.7596, 4800.9821, ..., 5977.0251, 2643.6432,
       6187.766 ])

In [45]:
mae = mean_absolute_error(Y_test, Y_pred)
mse = mean_squared_error(Y_test, Y_pred)
r2 = r2_score(Y_test, Y_pred)

In [46]:
print(f"{mae}, {mse}, {r2}")

39.487226199999974, 2520.7744731802, 0.998954609306441
