In [None]:
import pandas as pd


df = pd.read_csv('climate_change_dataset.csv')


df = df.groupby(['Year', 'Country'], as_index=False).agg({
    'Avg Temperature (°C)': 'mean',
    'CO2 Emissions (Tons/Capita)': 'mean',
    'Sea Level Rise (mm)': 'mean',
    'Rainfall (mm)': 'mean',
    'Population': 'mean',
    'Renewable Energy (%)': 'mean',
    'Extreme Weather Events': 'mean',
    'Forest Area (%)': 'mean',
})

df = df.sort_values(by=['Country', 'Year']).reset_index(drop=True)
df['SMA_CO2'] = df.groupby('Country')['CO2 Emissions (Tons/Capita)'].transform('mean')


latest_year = df['Year'].max()
baseline_pred = df[df['Year'] == latest_year].groupby('Country')['SMA_CO2'].mean()

actual_values = df[df['Year'] == latest_year].groupby('Country')['CO2 Emissions (Tons/Capita)'].mean()

baseline_pred = baseline_pred.reset_index()
actual_values = actual_values.reset_index()
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np
# Calculate RMSE
rmse = np.sqrt(mean_squared_error(actual_values['CO2 Emissions (Tons/Capita)'], baseline_pred['SMA_CO2']))

# Calculate MAE
mae = mean_absolute_error(actual_values['CO2 Emissions (Tons/Capita)'], baseline_pred['SMA_CO2'])

print(f"RMSE: {rmse:.2f}")
print(f"MAE: {mae:.2f}")

RMSE: 3.17
MAE: 2.41


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, VotingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


df = pd.read_csv('climate_change_dataset.csv')


df_sorted = df.sort_values(['Country', 'Year'])


df['SMA_CO2'] = df.groupby('Country')['CO2 Emissions (Tons/Capita)'].transform('mean')



latest_year = df['Year'].max()
baseline_pred = df[df['Year'] == latest_year].groupby('Country')['SMA_CO2'].mean()


features = ['Avg Temperature (°C)', 'Sea Level Rise (mm)',  'Rainfall (mm)', 'Renewable Energy (%)', 'Extreme Weather Events','Forest Area (%)','Population']
target = 'CO2 Emissions (Tons/Capita)'

df = pd.get_dummies(df, columns=['Country'], drop_first=True)


X = df[features + list(df.filter(regex='Country_'))]
y = df[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)


print(f'MAE: {mean_absolute_error(y_test, y_pred):.2f}')
print(f'RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.2f}')




MAE: 4.90
RMSE: 5.68


In [None]:
from sklearn.ensemble import RandomForestRegressor

rf= RandomForestRegressor(n_estimators=200,random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

print(f'MAE: {mean_absolute_error(y_test, y_pred_rf):.2f}')
print(f'RMSE: {np.sqrt(mean_squared_error(y_test, y_pred_rf)):.2f}')
print(f'R²: {r2_score(y_test, y_pred_rf):.2f}\n')

import pandas as pd
import numpy as np


importances = rf.feature_importances_

feature_names = X_train.columns


feat_importances = pd.DataFrame({
    'Feature': feature_names,
    'Importance': importances
})


feat_importances_sorted = feat_importances.sort_values(by='Importance', ascending=False)

print(feat_importances_sorted)



MAE: 5.03
RMSE: 5.89
R²: -0.06

                   Feature  Importance
0     Avg Temperature (°C)    0.141665
6               Population    0.141468
3     Renewable Energy (%)    0.136188
2            Rainfall (mm)    0.132682
5          Forest Area (%)    0.127663
1      Sea Level Rise (mm)    0.112285
4   Extreme Weather Events    0.083166
17          Country_Russia    0.012530
19              Country_UK    0.010972
20             Country_USA    0.010829
7        Country_Australia    0.010495
18    Country_South Africa    0.010263
12         Country_Germany    0.009462
13           Country_India    0.008576
10           Country_China    0.008235
14       Country_Indonesia    0.008090
11          Country_France    0.007670
16          Country_Mexico    0.007589
15           Country_Japan    0.007139
8           Country_Brazil    0.006902
9           Country_Canada    0.006130


In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20]
}
from sklearn.ensemble import RandomForestRegressor
grid_search = GridSearchCV(RandomForestRegressor(random_state=42), param_grid, cv=3, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)
best_rf = grid_search.best_estimator_

y_pred_tuned = best_rf.predict(X_test)
print(f"MAE: {mean_absolute_error(y_test, y_pred_tuned):.2f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred_tuned)):.2f}")
print(f"R²: {r2_score(y_test, y_pred_tuned):.2f}\n")

import pandas as pd
import numpy as np


importances = rf.feature_importances_

feature_names = X_train.columns


feat_importances = pd.DataFrame({
    'Feature': feature_names,
    'Importance': importances
})


feat_importances_sorted = feat_importances.sort_values(by='Importance', ascending=False)

print(feat_importances_sorted)


MAE: 4.96
RMSE: 5.78
R²: -0.02

                   Feature  Importance
0     Avg Temperature (°C)    0.141665
6               Population    0.141468
3     Renewable Energy (%)    0.136188
2            Rainfall (mm)    0.132682
5          Forest Area (%)    0.127663
1      Sea Level Rise (mm)    0.112285
4   Extreme Weather Events    0.083166
17          Country_Russia    0.012530
19              Country_UK    0.010972
20             Country_USA    0.010829
7        Country_Australia    0.010495
18    Country_South Africa    0.010263
12         Country_Germany    0.009462
13           Country_India    0.008576
10           Country_China    0.008235
14       Country_Indonesia    0.008090
11          Country_France    0.007670
16          Country_Mexico    0.007589
15           Country_Japan    0.007139
8           Country_Brazil    0.006902
9           Country_Canada    0.006130


In [None]:
from sklearn.ensemble import GradientBoostingRegressor


gb = GradientBoostingRegressor(n_estimators=200, learning_rate=0.01, random_state=42)
gb.fit(X_train, y_train)
y_pred_gb = gb.predict(X_test)


print(f'MAE: {mean_absolute_error(y_test, y_pred_gb):.2f}')
print(f'RMSE: {np.sqrt(mean_squared_error(y_test, y_pred_gb)):.2f}')
print(f'R²: {r2_score(y_test, y_pred_gb):.2f}\n')

import pandas as pd
import numpy as np


importances = rf.feature_importances_

feature_names = X_train.columns


feat_importances = pd.DataFrame({
    'Feature': feature_names,
    'Importance': importances
})


feat_importances_sorted = feat_importances.sort_values(by='Importance', ascending=False)

print(feat_importances_sorted)



MAE: 4.97
RMSE: 5.76
R²: -0.02

                   Feature  Importance
0     Avg Temperature (°C)    0.141665
6               Population    0.141468
3     Renewable Energy (%)    0.136188
2            Rainfall (mm)    0.132682
5          Forest Area (%)    0.127663
1      Sea Level Rise (mm)    0.112285
4   Extreme Weather Events    0.083166
17          Country_Russia    0.012530
19              Country_UK    0.010972
20             Country_USA    0.010829
7        Country_Australia    0.010495
18    Country_South Africa    0.010263
12         Country_Germany    0.009462
13           Country_India    0.008576
10           Country_China    0.008235
14       Country_Indonesia    0.008090
11          Country_France    0.007670
16          Country_Mexico    0.007589
15           Country_Japan    0.007139
8           Country_Brazil    0.006902
9           Country_Canada    0.006130


In [None]:
param_grid_gb = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7],
    'subsample': [0.8, 1.0]
}

gb_grid = GridSearchCV(
    GradientBoostingRegressor(random_state=42),
    param_grid_gb,
    cv=5,
    scoring='neg_mean_squared_error',
    n_jobs=-1
)
gb_grid.fit(X_train, y_train)
best_gb = gb_grid.best_estimator_

y_pred_tuned = best_gb.predict(X_test)
print(f"Tuned MAE: {mean_absolute_error(y_test, y_pred_tuned):.2f}")
print(f"Tuned RMSE: {np.sqrt(mean_squared_error(y_test, y_pred_tuned)):.2f}")

import pandas as pd
import numpy as np


importances = rf.feature_importances_

feature_names = X_train.columns


feat_importances = pd.DataFrame({
    'Feature': feature_names,
    'Importance': importances
})


feat_importances_sorted = feat_importances.sort_values(by='Importance', ascending=False)

print(feat_importances_sorted)


Tuned MAE: 4.93
Tuned RMSE: 5.70
Tuned R²: 0.00

                   Feature  Importance
0     Avg Temperature (°C)    0.141665
6               Population    0.141468
3     Renewable Energy (%)    0.136188
2            Rainfall (mm)    0.132682
5          Forest Area (%)    0.127663
1      Sea Level Rise (mm)    0.112285
4   Extreme Weather Events    0.083166
17          Country_Russia    0.012530
19              Country_UK    0.010972
20             Country_USA    0.010829
7        Country_Australia    0.010495
18    Country_South Africa    0.010263
12         Country_Germany    0.009462
13           Country_India    0.008576
10           Country_China    0.008235
14       Country_Indonesia    0.008090
11          Country_France    0.007670
16          Country_Mexico    0.007589
15           Country_Japan    0.007139
8           Country_Brazil    0.006902
9           Country_Canada    0.006130


In [None]:
from sklearn.ensemble import VotingRegressor
from sklearn.linear_model import LinearRegression
voting = VotingRegressor([
    ('lr', LinearRegression()),
    ('rf', RandomForestRegressor(random_state=42)),
    ('gb', GradientBoostingRegressor(random_state=42))
])
voting.fit(X_train, y_train)
y_pred_voting = voting.predict(X_test)

print(f'MAE: {mean_absolute_error(y_test, y_pred_voting):.2f}')
print(f'RMSE: {np.sqrt(mean_squared_error(y_test, y_pred_voting)):.2f}')

import pandas as pd
import numpy as np


importances = rf.feature_importances_

feature_names = X_train.columns


feat_importances = pd.DataFrame({
    'Feature': feature_names,
    'Importance': importances
})


feat_importances_sorted = feat_importances.sort_values(by='Importance', ascending=False)

print(feat_importances_sorted)



MAE: 4.97
RMSE: 5.79
R²: -0.03

                   Feature  Importance
0     Avg Temperature (°C)    0.141665
6               Population    0.141468
3     Renewable Energy (%)    0.136188
2            Rainfall (mm)    0.132682
5          Forest Area (%)    0.127663
1      Sea Level Rise (mm)    0.112285
4   Extreme Weather Events    0.083166
17          Country_Russia    0.012530
19              Country_UK    0.010972
20             Country_USA    0.010829
7        Country_Australia    0.010495
18    Country_South Africa    0.010263
12         Country_Germany    0.009462
13           Country_India    0.008576
10           Country_China    0.008235
14       Country_Indonesia    0.008090
11          Country_France    0.007670
16          Country_Mexico    0.007589
15           Country_Japan    0.007139
8           Country_Brazil    0.006902
9           Country_Canada    0.006130
