In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
# from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_squared_error, r2_score

import matplotlib.pyplot as plt

In [2]:
# original

# Load your dataset
data = pd.read_csv('merged_data_inner.csv')

# Preprocessing: Handle missing values if any
data = data.dropna()

# Feature selection
features = ['Year', 'Annual nitrous oxide emissions in CO₂ equivalents',
            'Annual methane emissions in CO₂ equivalents', 'Annual CO₂ emissions']
X = data[features]
y = data['AverageTemperature']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model training
lin_model = LinearRegression()
lin_model.fit(X_train, y_train)

rf_model = RandomForestRegressor(n_estimators=2000, random_state=42)
rf_model.fit(X_train, y_train)

xgb_model = XGBRegressor(n_estimators=2000, learning_rate=0.01, max_depth=4, random_state=42)
xgb_model.fit(X_train, y_train)


# Model evaluation
lin_y_pred = lin_model.predict(X_test)
rf_y_pred = rf_model.predict(X_test)
xgb_y_pred = xgb_model.predict(X_test)

lin_mse = mean_squared_error(y_test, lin_y_pred)
rf_mse = mean_squared_error(y_test, rf_y_pred)
xgb_mse = mean_squared_error(y_test, xgb_y_pred)

lin_rmse = np.sqrt(lin_mse)
rf_rmse = np.sqrt(rf_mse)
xgb_rmse = np.sqrt(xgb_mse)

lin_r2 = r2_score(y_test, lin_y_pred)
rf_r2 = r2_score(y_test, rf_y_pred)
xgb_r2 = r2_score(y_test, xgb_y_pred)

print(f'Linear Regression Mean Squared Error: {lin_mse}')
print(f'Random Forest Mean Squared Error: {rf_mse}')
print(f'XGBoost Mean Squared Error: {xgb_mse}')

print(f'Linear Regression Root Mean Squared Error: {lin_rmse}')
print(f'Random Forest Root Mean Squared Error: {rf_rmse}')
print(f'XGBoost Root Mean Squared Error: {xgb_rmse}')

print(f'Linear Regression R-squared: {lin_r2}')
print(f'Random Forest R-squared: {rf_r2}')
print(f'XGBoost R-squared: {xgb_r2}')

# Predict future temperatures
future_years = np.arange(2014, 2101).reshape(-1, 1)
future_features = np.hstack([future_years, np.zeros((future_years.shape[0], 3))])  # Assuming emissions remain constant

# Set hypothetical growth rates for emissions (2% per year for illustration)
growth_rate = 0.02
base_no2 = data['Annual nitrous oxide emissions in CO₂ equivalents'].mean()
base_ch4 = data['Annual methane emissions in CO₂ equivalents'].mean()
base_co2 = data['Annual CO₂ emissions'].mean()

for i in range(future_years.shape[0]):
    year = 2014 + i
    future_features[i, 1] = base_no2 * (1 + growth_rate) ** (year - 2014)  # NO2
    future_features[i, 2] = base_ch4 * (1 + growth_rate) ** (year - 2014)  # CH4
    future_features[i, 3] = base_co2 * (1 + growth_rate) ** (year - 2014)  # CO2

# Predictions
lin_future_temperatures = lin_model.predict(future_features)
rf_future_temperatures = rf_model.predict(future_features)
xgb_future_temperatures = xgb_model.predict(future_features)

# Find the year when temperature breaches 2 degrees
pre_industrial_temp = data[data['Year'] < 1900]['AverageTemperature'].mean()

lin_breach_year = future_years[np.argmax(lin_future_temperatures >= pre_industrial_temp + 2)][0]
rf_breach_year = future_years[np.argmax(rf_future_temperatures >= pre_industrial_temp + 2)][0]
xgb_breach_year = future_years[np.argmax(xgb_future_temperatures >= pre_industrial_temp + 2)][0]

# Output results
print(f'Future Years: {future_years.flatten()}')
print(f'Linear Regression future temperatures: {lin_future_temperatures}')
print(f'Random Forest future temperatures: {rf_future_temperatures}')
print(f'XGBoost future temperatures: {xgb_future_temperatures}')
print(f'Linear Regression: The global temperature is expected to breach the 2°C mark in the year {lin_breach_year}')
print(f'Random Forest: The global temperature is expected to breach the 2°C mark in the year {rf_breach_year}')
print(f'XGBoost: The global temperature is expected to breach the 2°C mark in the year {xgb_breach_year}')


Linear Regression Mean Squared Error: 56.71443921640465
Random Forest Mean Squared Error: 7.142229155316167
XGBoost Mean Squared Error: 26.27684427416718
Linear Regression Root Mean Squared Error: 7.530898964692373
Random Forest Root Mean Squared Error: 2.672494930830771
XGBoost Root Mean Squared Error: 5.12609444647357
Linear Regression R-squared: 0.017506308283300687
Random Forest R-squared: 0.8762714542037912
XGBoost R-squared: 0.5447925767354884
Future Years: [2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027
 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041
 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055
 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069
 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083
 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097
 2098 2099 2100]
Linear Regression future temperatures: [19.46588923 19.47593111 19.48589603 19.49578243 1



In [None]:
# Model evaluation
lin_y_pred = lin_model.predict(X_test)
rf_y_pred = rf_model.predict(X_test)
xgb_y_pred = xgb_model.predict(X_test)

lin_mse = mean_squared_error(y_test, lin_y_pred)
rf_mse = mean_squared_error(y_test, rf_y_pred)
xgb_mse = mean_squared_error(y_test, xgb_y_pred)

lin_rmse = np.sqrt(lin_mse)
rf_rmse = np.sqrt(rf_mse)
xgb_rmse = np.sqrt(xgb_mse)

lin_r2 = r2_score(y_test, lin_y_pred)
rf_r2 = r2_score(y_test, rf_y_pred)
xgb_r2 = r2_score(y_test, xgb_y_pred)

print(f'Linear Regression Mean Squared Error: {lin_mse}')
print(f'Random Forest Mean Squared Error: {rf_mse}')
print(f'XGBoost Mean Squared Error: {xgb_mse}')

print(f'Linear Regression Root Mean Squared Error: {lin_rmse}')
print(f'Random Forest Root Mean Squared Error: {rf_rmse}')
print(f'XGBoost Root Mean Squared Error: {xgb_rmse}')

print(f'Linear Regression R-squared: {lin_r2}')
print(f'Random Forest R-squared: {rf_r2}')
print(f'XGBoost R-squared: {xgb_r2}')

## HYPERPARAMETER TUNING - RANDOM FOREST

In [5]:


# Load your dataset
data = pd.read_csv('merged_data_inner.csv')

# Preprocessing: Handle missing values if any
data = data.dropna()

# Feature selection
features = ['Year', 'Annual nitrous oxide emissions in CO₂ equivalents',
            'Annual methane emissions in CO₂ equivalents', 'Annual CO₂ emissions']
X = data[features]
y = data['AverageTemperature']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model training
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
rf_model = RandomForestRegressor(n_estimators=5, random_state=42)
rf_model.fit(X_train, y_train)
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
#===========================================================================================================================

# Model evaluation

rf_y_pred = rf_model.predict(X_test)

rf_mse = mean_squared_error(y_test, rf_y_pred)

rf_rmse = np.sqrt(rf_mse)

rf_r2 = r2_score(y_test, rf_y_pred)

print(f'Random Forest Mean Squared Error: {rf_mse}')
print()
print(f'Random Forest Root Mean Squared Error: {rf_rmse}')
print()
print(f'Random Forest R-squared: {rf_r2}')
print()
#===========================================================================================================================
# Predict future temperatures
future_years = np.arange(2014, 2101).reshape(-1, 1)
future_features = np.hstack([future_years, np.zeros((future_years.shape[0], 3))])  # Assuming emissions remain constant

# Set hypothetical growth rates for emissions (2% per year for illustration)
growth_rate = 0.02
base_no2 = data['Annual nitrous oxide emissions in CO₂ equivalents'].mean()
base_ch4 = data['Annual methane emissions in CO₂ equivalents'].mean()
base_co2 = data['Annual CO₂ emissions'].mean()

for i in range(future_years.shape[0]):
    year = 2014 + i
    future_features[i, 1] = base_no2 * (1 + growth_rate) ** (year - 2014)  # NO2
    future_features[i, 2] = base_ch4 * (1 + growth_rate) ** (year - 2014)  # CH4
    future_features[i, 3] = base_co2 * (1 + growth_rate) ** (year - 2014)  # CO2

# Predictions
rf_future_temperatures = rf_model.predict(future_features)

# Find the year when temperature breaches 2 degrees
pre_industrial_temp = data[data['Year'] < 1900]['AverageTemperature'].mean()
print()
print(pre_industrial_temp)
print()
rf_breach_year = future_years[np.argmax(rf_future_temperatures >= pre_industrial_temp + 2)][0]

# Output results
print(f'Random Forest future temperatures: {rf_future_temperatures}')
print()
print(f'Random Forest: The global temperature is expected to breach the 2°C mark in the year ----------- {rf_breach_year}')


Random Forest Mean Squared Error: 10.560095004936366

Random Forest Root Mean Squared Error: 3.249629979695591

Random Forest R-squared: 0.8170619886288504


17.833763596191048

Random Forest future temperatures: [19.81188154 19.40625676 16.18101348 14.36139893 14.36139893 14.38333256
 12.86826187 13.8515577  14.96433775 14.99991427 13.90940013 12.68007778
 14.88387601 17.18097879 17.06178182 17.08333106 17.08333106 17.11304394
 17.11304394 17.11304394 14.96657066 14.96657066 16.26204765 15.48024443
 15.48024443 16.67045371 19.11962433 18.45798916 19.6821564  19.77394911
 19.3443189  23.24804946 23.08270078 23.08270078 23.3357203  24.69745394
 24.62354577 24.62354577 24.12617004 19.84258835 19.84258835 19.68898586
 23.28268148 23.28268148 23.53714494 23.53714494 23.5433599  23.47403359
 23.47403359 23.32162012 23.74817714 23.28811731 23.11584377 22.88496877
 18.39245637 18.49512753 14.51890325 10.92699441 10.92699441 10.92699441
  9.4999398  13.03910446  9.57155833  9.57155833  9.35851



## HYPERPARAMETER TUNING TEST - XGBOOST

In [4]:


# Load your dataset
data = pd.read_csv('merged_data_inner.csv')

# Preprocessing: Handle missing values if any
data = data.dropna()

# Feature selection
features = ['Year', 'Annual nitrous oxide emissions in CO₂ equivalents',
            'Annual methane emissions in CO₂ equivalents', 'Annual CO₂ emissions']
X = data[features]
y = data['AverageTemperature']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model training
lin_model = LinearRegression()
lin_model.fit(X_train, y_train)

# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
xgb_model = XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42)
xgb_model.fit(X_train, y_train)
#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

#===========================================================================================================================
# Model evaluation
xgb_y_pred = xgb_model.predict(X_test)

xgb_mse = mean_squared_error(y_test, xgb_y_pred)

# lin_rmse = np.sqrt(lin_mse)
# rf_rmse = np.sqrt(rf_mse)
xgb_rmse = np.sqrt(xgb_mse)

xgb_r2 = r2_score(y_test, xgb_y_pred)
print()
# print(f'XGBoost Mean Squared Error: {xgb_mse}')
print()
print(f'XGBoost Root Mean Squared Error: {xgb_rmse}')
print()
print(f'XGBoost R-squared: {xgb_r2}')
print()
#===========================================================================================================================

# Predict future temperatures
future_years = np.arange(2014, 2101).reshape(-1, 1)
future_features = np.hstack([future_years, np.zeros((future_years.shape[0], 3))])  # Assuming emissions remain constant

# Set hypothetical growth rates for emissions (2% per year for illustration)
growth_rate = 0.02
base_no2 = data['Annual nitrous oxide emissions in CO₂ equivalents'].mean()
base_ch4 = data['Annual methane emissions in CO₂ equivalents'].mean()
base_co2 = data['Annual CO₂ emissions'].mean()

for i in range(future_years.shape[0]):
    year = 2014 + i
    future_features[i, 1] = base_no2 * (1 + growth_rate) ** (year - 2014)  # NO2
    future_features[i, 2] = base_ch4 * (1 + growth_rate) ** (year - 2014)  # CH4
    future_features[i, 3] = base_co2 * (1 + growth_rate) ** (year - 2014)  # CO2

# Predictions
lin_future_temperatures = lin_model.predict(future_features)
# rf_future_temperatures = rf_model.predict(future_features)
xgb_future_temperatures = xgb_model.predict(future_features)

# Find the year when temperature breaches 2 degrees
pre_industrial_temp = data[data['Year'] < 1900]['AverageTemperature'].mean()

lin_breach_year = future_years[np.argmax(lin_future_temperatures >= pre_industrial_temp + 2)][0]
# rf_breach_year = future_years[np.argmax(rf_future_temperatures >= pre_industrial_temp + 2)][0]
xgb_breach_year = future_years[np.argmax(xgb_future_temperatures >= pre_industrial_temp + 2)][0]

# Output results
print(f'XGBoost future temperatures: {xgb_future_temperatures}')
print()
print(f'XGBoost: The global temperature is expected to breach the 2°C mark in the year ---------------- {xgb_breach_year}')
print()



XGBoost Root Mean Squared Error: 5.06429871725066

XGBoost R-squared: 0.5557015915138803

XGBoost future temperatures: [16.906061 16.931013 15.909924 16.30284  16.30284  16.070976 15.911932
 15.210163 14.896375 14.896375 14.607156 14.607156 14.607156 14.607156
 14.862129 14.402308 14.402308 14.402308 14.402308 15.544984 15.544984
 16.021948 15.930583 15.930583 15.930583 15.930583 15.930583 17.311258
 17.311258 17.311258 19.121298 19.121298 18.901842 19.528421 19.528421
 19.528421 19.979939 19.979939 19.979939 20.258636 20.258636 21.114784
 21.114784 21.395882 21.395882 21.395882 21.395882 21.395882 21.926645
 21.926645 21.926645 22.051746 22.051746 21.399664 21.399664 21.399664
 21.399664 20.300856 16.56694  16.56694  16.56694  16.785538 16.785538
 15.635115 15.635115 15.635115 15.635115 15.635115 15.635115 15.635115
 14.939956 14.939956 14.721703 14.721703 14.721703 14.721703 16.025358
 16.025358 16.025358 17.626476 17.626476 17.626476 17.626476 17.626476
 17.180502 17.180502 16.833



## PARAMETER SELECTION USING GRIDSEARCH - DO NOT RUN

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

# Load your dataset
data = pd.read_csv('merged_data_inner.csv')

# Preprocessing: Handle missing values if any
data = data.dropna()

# Feature selection
features = ['Year', 'Annual nitrous oxide emissions in CO₂ equivalents',
            'Annual methane emissions in CO₂ equivalents', 'Annual CO₂ emissions']
X = data[features]
y = data['AverageTemperature']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Linear Regression model
lin_model = LinearRegression()
lin_model.fit(X_train, y_train)

# Random Forest with Grid Search CV
rf_params = {
    'n_estimators': [800, 1200, 1700, 2000],
    'max_depth': [None, 5, 10, 20],
    'min_samples_split': [2, 5, 10]
}
rf_model = RandomForestRegressor(random_state=42)
rf_grid_search = GridSearchCV(estimator=rf_model, param_grid=rf_params, cv=3, n_jobs=-1, scoring='neg_mean_squared_error')
rf_grid_search.fit(X_train, y_train)
rf_best_model = rf_grid_search.best_estimator_

# XGBoost with Grid Search CV
xgb_params = {
    'n_estimators': [1000, 1500, 1700, 2000],
    'learning_rate': [0.01, 0.05, 0.1, 0.3],
    'max_depth': [3, 4, 5, 6]
}
xgb_model = XGBRegressor(random_state=42)
xgb_grid_search = GridSearchCV(estimator=xgb_model, param_grid=xgb_params, cv=3, n_jobs=-1, scoring='neg_mean_squared_error')
xgb_grid_search.fit(X_train, y_train)
xgb_best_model = xgb_grid_search.best_estimator_

# Model evaluation
lin_y_pred = lin_model.predict(X_test)
rf_y_pred = rf_best_model.predict(X_test)
xgb_y_pred = xgb_best_model.predict(X_test)

lin_mse = mean_squared_error(y_test, lin_y_pred)
rf_mse = mean_squared_error(y_test, rf_y_pred)
xgb_mse = mean_squared_error(y_test, xgb_y_pred)

lin_r2 = r2_score(y_test, lin_y_pred)
rf_r2 = r2_score(y_test, rf_y_pred)
xgb_r2 = r2_score(y_test, xgb_y_pred)

print(f'Linear Regression Mean Squared Error: {lin_mse}, R2: {lin_r2}')
print(f'Random Forest Mean Squared Error: {rf_mse}, R2: {rf_r2}')
print(f'XGBoost Mean Squared Error: {xgb_mse}, R2: {xgb_r2}')

# Predict future temperatures
future_years = np.arange(2014, 2101).reshape(-1, 1)
future_features = np.hstack([future_years, np.zeros((future_years.shape[0], 3))])  # Assuming emissions remain constant

# Set hypothetical growth rates for emissions (2% per year for illustration)
growth_rate = 0.02
base_no2 = data['Annual nitrous oxide emissions in CO₂ equivalents'].mean()
base_ch4 = data['Annual methane emissions in CO₂ equivalents'].mean()
base_co2 = data['Annual CO₂ emissions'].mean()

for i in range(future_years.shape[0]):
    year = 2014 + i
    future_features[i, 1] = base_no2 * (1 + growth_rate) ** (year - 2014)  # NO2
    future_features[i, 2] = base_ch4 * (1 + growth_rate) ** (year - 2014)  # CH4
    future_features[i, 3] = base_co2 * (1 + growth_rate) ** (year - 2014)  # CO2

# Predictions
lin_future_temperatures = lin_model.predict(future_features)
rf_future_temperatures = rf_best_model.predict(future_features)
xgb_future_temperatures = xgb_best_model.predict(future_features)

# Find the year when temperature breaches 2 degrees
pre_industrial_temp = data[data['Year'] < 1900]['AverageTemperature'].mean()

lin_breach_year = future_years[np.argmax(lin_future_temperatures >= pre_industrial_temp + 2)][0]
rf_breach_year = future_years[np.argmax(rf_future_temperatures >= pre_industrial_temp + 2)][0]
xgb_breach_year = future_years[np.argmax(xgb_future_temperatures >= pre_industrial_temp + 2)][0]

# Output results
print(f'Future Years: {future_years.flatten()}')
print(f'Linear Regression future temperatures: {lin_future_temperatures}')
print(f'Random Forest future temperatures: {rf_future_temperatures}')
print(f'XGBoost future temperatures: {xgb_future_temperatures}')
print(f'Linear Regression: The global temperature is expected to breach the 2°C mark in the year {lin_breach_year}')
print(f'Random Forest: The global temperature is expected to breach the 2°C mark in the year {rf_breach_year}')
print(f'XGBoost: The global temperature is expected to breach the 2°C mark in the year {xgb_breach_year}')


### testing RF - TEMP ARRAY TESTING

In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import pickle

# Load your dataset
data = pd.read_csv('merged_data_inner.csv')

# Preprocessing: Handle missing values if any
data = data.dropna()

# Feature selection
features = ['Year', 'Annual nitrous oxide emissions in CO₂ equivalents',
            'Annual methane emissions in CO₂ equivalents', 'Annual CO₂ emissions']
X = data[features]
y = data['AverageTemperature']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model training
rf_model = RandomForestRegressor(n_estimators=50, random_state=42)
rf_model.fit(X_train, y_train)

# Model evaluation
rf_y_pred = rf_model.predict(X_test)
rf_mse = mean_squared_error(y_test, rf_y_pred)
rf_rmse = np.sqrt(rf_mse)
rf_r2 = r2_score(y_test, rf_y_pred)

print(f'Random Forest Mean Squared Error: {rf_mse}')
print(f'Random Forest Root Mean Squared Error: {rf_rmse}')
print(f'Random Forest R-squared: {rf_r2}')

# Predict future temperatures
future_years = np.arange(2014, 2101).reshape(-1, 1)

# Set hypothetical growth rates for emissions (2% per year for illustration)
growth_rate = 0.02
base_no2 = data['Annual nitrous oxide emissions in CO₂ equivalents'].mean()
base_ch4 = data['Annual methane emissions in CO₂ equivalents'].mean()
base_co2 = data['Annual CO₂ emissions'].mean()

future_features = np.zeros((future_years.shape[0], len(features)))
future_features[:, 0] = future_years.flatten()  # Set the year

# Apply exponential growth rates to the emissions
future_features[:, 1] = base_no2 * (1 + growth_rate) ** (future_years.flatten() - 2014)
future_features[:, 2] = base_ch4 * (1 + growth_rate) ** (future_years.flatten() - 2014)
future_features[:, 3] = base_co2 * (1 + growth_rate) ** (future_years.flatten() - 2014)

# Predictions
rf_future_temperatures = rf_model.predict(future_features)

# Find the year when temperature breaches 2 degrees
pre_industrial_temp = data[data['Year'] < 1900]['AverageTemperature'].mean()
rf_breach_year = future_years[np.argmax(rf_future_temperatures >= pre_industrial_temp + 2)][0]

# Output results
print(f'Random Forest future temperatures: {rf_future_temperatures}')
print(f'Random Forest: The global temperature is expected to breach the 2°C mark in the year {rf_breach_year}')


Random Forest Mean Squared Error: 7.468405349948641
Random Forest Root Mean Squared Error: 2.732838332201274
Random Forest R-squared: 0.8706209345470286
Random Forest future temperatures: [16.03540204 15.92064193 16.08735788 15.81735617 14.99322199 14.98437802
 15.16332084 15.42684629 15.458882   15.535816   15.71813902 15.0418169
 15.90468146 15.48370989 15.48908697 14.94001925 14.07507231 13.47250697
 13.49475396 14.30726404 12.96452729 12.58882665 13.03766243 13.68972072
 12.77496758 14.14308495 14.63595657 13.42841963 15.08295439 15.44977101
 16.83300327 18.23338623 18.63065581 19.50433509 19.95380678 20.37808947
 21.18472762 22.30339212 23.79135323 23.45672073 23.89387827 23.19109125
 23.47455896 22.92634677 22.76204566 22.15587938 21.43178428 23.28119305
 23.28741585 22.90985036 21.84570261 22.03927906 21.58233988 21.9037282
 17.84136247 18.04089933 16.30022618 16.04653883 14.30584529 14.73107514
 14.11351063 14.15053788 13.9184348  13.7479308  13.61615138 13.52524832
 13.9323832



In [2]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import pickle

# Load your dataset
data = pd.read_csv('merged_data_inner.csv')

# Preprocessing: Handle missing values if any
data = data.dropna()

# Feature selection
features = ['Year', 'Annual nitrous oxide emissions in CO₂ equivalents',
            'Annual methane emissions in CO₂ equivalents', 'Annual CO₂ emissions']
X = data[features]
y = data['AverageTemperature']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model training
rf_model = RandomForestRegressor(n_estimators=50, random_state=42)
rf_model.fit(X_train, y_train)

# Model evaluation
rf_y_pred = rf_model.predict(X_test)
rf_mse = mean_squared_error(y_test, rf_y_pred)
rf_rmse = np.sqrt(rf_mse)
rf_r2 = r2_score(y_test, rf_y_pred)

print(f'Random Forest Mean Squared Error: {rf_mse}')
print(f'Random Forest Root Mean Squared Error: {rf_rmse}')
print(f'Random Forest R-squared: {rf_r2}')

# Predict future temperatures
future_years = np.arange(2014, 2101).reshape(-1, 1)

# Set hypothetical growth rates for emissions (2% per year for illustration)
growth_rate = 0.02
base_no2 = data['Annual nitrous oxide emissions in CO₂ equivalents'].mean()
base_ch4 = data['Annual methane emissions in CO₂ equivalents'].mean()
base_co2 = data['Annual CO₂ emissions'].mean()

future_features = np.zeros((future_years.shape[0], len(features)))
future_features[:, 0] = future_years.flatten()  # Set the year

# Apply exponential growth rates to the emissions
future_features[:, 1] = base_no2 * (1 + growth_rate) ** (future_years.flatten() - 2014)
future_features[:, 2] = base_ch4 * (1 + growth_rate) ** (future_years.flatten() - 2014)
future_features[:, 3] = base_co2 * (1 + growth_rate) ** (future_years.flatten() - 2014)

# Convert to DataFrame and ensure column names match the training data
future_features_df = pd.DataFrame(future_features, columns=features)

# Check future_features to ensure correctness
print(future_features_df.head())

# Predictions
rf_future_temperatures = rf_model.predict(future_features_df)

# Find the year when temperature breaches 2 degrees
pre_industrial_temp = data[data['Year'] < 1900]['AverageTemperature'].mean()
rf_breach_year = future_years[np.argmax(rf_future_temperatures >= pre_industrial_temp + 2)][0]

# Output results
print(f'Random Forest future temperatures: {rf_future_temperatures}')
print(f'Random Forest: The global temperature is expected to breach the 2°C mark in the year {rf_breach_year}')


Random Forest Mean Squared Error: 7.468405349948641
Random Forest Root Mean Squared Error: 2.732838332201274
Random Forest R-squared: 0.8706209345470286
     Year  Annual nitrous oxide emissions in CO₂ equivalents  \
0  2014.0                                       6.331687e+06   
1  2015.0                                       6.458321e+06   
2  2016.0                                       6.587487e+06   
3  2017.0                                       6.719237e+06   
4  2018.0                                       6.853622e+06   

   Annual methane emissions in CO₂ equivalents  Annual CO₂ emissions  
0                                 2.609767e+07          8.666723e+07  
1                                 2.661963e+07          8.840057e+07  
2                                 2.715202e+07          9.016858e+07  
3                                 2.769506e+07          9.197195e+07  
4                                 2.824896e+07          9.381139e+07  
Random Forest future temperatures: [