# ***Renewable*** **Energy** **Consumption**

**Data** **Preprocessing**


In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

In [8]:
# Load the dataset
data = pd.read_csv("/dataset.csv")


In [10]:
# One-hot encoding the 'Sector' column
encoder = OneHotEncoder(sparse_output=False)
sector_encoded = encoder.fit_transform(data[['Sector']])
sector_encoded_df = pd.DataFrame(sector_encoded, columns=encoder.get_feature_names_out(['Sector']))

In [11]:
# Concatenate the encoded DataFrame with the original data (excluding the original 'Sector' column)
data_preprocessed = pd.concat([data.drop(['Sector', 'Total Renewable Energy'], axis=1), sector_encoded_df], axis=1)
target = data['Total Renewable Energy']


In [12]:
# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data_preprocessed, target, test_size=0.2, random_state=42)

**Model** **Training**

In [13]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

In [14]:
linear_model = LinearRegression()
rf_model = RandomForestRegressor(random_state=42)
gb_model = GradientBoostingRegressor(random_state=42)

In [15]:
# Training models
linear_model.fit(X_train, y_train)
rf_model.fit(X_train, y_train)
gb_model.fit(X_train, y_train)

**Model** **Evaluation**

In [16]:
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

In [18]:
# Prediction and evaluation
def evaluate_model(model, X_test, y_test):
    predictions = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, predictions))
    r2 = r2_score(y_test, predictions)
    return rmse, r2



In [19]:
rmse_linear, r2_linear = evaluate_model(linear_model, X_test, y_test)
rmse_rf, r2_rf = evaluate_model(rf_model, X_test, y_test)
rmse_gb, r2_gb = evaluate_model(gb_model, X_test, y_test)

**Hyperparameter** **Tuning**

In [20]:
#Random Forrest

from sklearn.model_selection import GridSearchCV

# Define parameter grid
param_grid_rf = {
    'n_estimators': [100, 200, 300],
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}

# Grid search
grid_search_rf = GridSearchCV(estimator=rf_model, param_grid=param_grid_rf, cv=3, scoring='neg_mean_squared_error', verbose=2, n_jobs=-1)
grid_search_rf.fit(X_train, y_train)

# Best parameters and RMSE for RF
best_rf_model = grid_search_rf.best_estimator_
best_rf_rmse, best_rf_r2 = evaluate_model(best_rf_model, X_test, y_test)

Fitting 3 folds for each of 108 candidates, totalling 324 fits


**Gradient** **Boosting**

In [21]:
# Define parameter grid
param_grid_gb = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.1, 0.2],
    'subsample': [0.8, 0.9, 1.0],
    'max_depth': [3, 5, 7]
}

# Grid search
grid_search_gb = GridSearchCV(estimator=gb_model, param_grid=param_grid_gb, cv=3, scoring='neg_mean_squared_error', verbose=2, n_jobs=-1)
grid_search_gb.fit(X_train, y_train)

# Best parameters and RMSE for GB
best_gb_model = grid_search_gb.best_estimator_
best_gb_rmse, best_gb_r2 = evaluate_model(best_gb_model, X_test, y_test)

Fitting 3 folds for each of 81 candidates, totalling 243 fits


**Model** **Performance**

In [22]:
# all model performances
model_performance = {
    "Linear Regression": {"RMSE": rmse_linear, "R2": r2_linear},
    "Random Forest (default)": {"RMSE": rmse_rf, "R2": r2_rf},
    "Gradient Boosting (default)": {"RMSE": rmse_gb, "R2": r2_gb},
    "Random Forest (tuned)": {"RMSE": best_rf_rmse, "R2": best_rf_r2},
    "Gradient Boosting (tuned)": {"RMSE": best_gb_rmse, "R2": best_gb_r2}
}

# Displaying model performances
for model, metrics in model_performance.items():
    print(f"{model} - RMSE: {metrics['RMSE']:.3f}, R2: {metrics['R2']:.3f}")

# Determining the best model based on RMSE
best_model = min(model_performance, key=lambda x: model_performance[x]['RMSE'])
best_model_metrics = model_performance[best_model]
print(f"\nBest Model: {best_model}")
print(f"Best RMSE: {best_model_metrics['RMSE']:.3f}")
print(f"Best R2: {best_model_metrics['R2']:.3f}")

# If you want to explore the best hyperparameters for the tuned models
print("\nBest Random Forest Parameters:", grid_search_rf.best_params_)
print("Best Gradient Boosting Parameters:", grid_search_gb.best_params_)


Linear Regression - RMSE: 0.375, R2: 1.000
Random Forest (default) - RMSE: 2.063, R2: 0.999
Gradient Boosting (default) - RMSE: 2.787, R2: 0.998
Random Forest (tuned) - RMSE: 3.637, R2: 0.997
Gradient Boosting (tuned) - RMSE: 2.109, R2: 0.999

Best Model: Linear Regression
Best RMSE: 0.375
Best R2: 1.000

Best Random Forest Parameters: {'max_depth': None, 'max_features': 'sqrt', 'min_samples_split': 2, 'n_estimators': 200}
Best Gradient Boosting Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 300, 'subsample': 0.8}
