In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import chardet

In [10]:
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.utils import resample
from sklearn.model_selection import train_test_split, cross_val_score, RepeatedKFold,GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

In [3]:
file_path = 'IRENA_RenewableEnergy_Statistics_2000-2022.csv'

with open(file_path, 'rb') as f:
    result = chardet.detect(f.read())

df_irena = pd.read_csv(file_path, encoding=result['encoding'])

file_path_1 = 'organised_Gen.csv'

with open(file_path_1, 'rb') as f:
    result = chardet.detect(f.read())

df_us_data = pd.read_csv(file_path_1, encoding=result['encoding'])

file_path_2 = '02 modern-renewable-energy-consumption.csv'

with open(file_path_2, 'rb') as f:
    result = chardet.detect(f.read())

df_world_data = pd.read_csv(file_path_2, encoding=result['encoding'])

In [8]:
def run_rf(X_train, y_train):    
    param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [None, 5, 10, 20],
        'min_samples_split': [2, 5, 10],
    }

    # Initialize the RandomForestRegressor
    forest_regressor = RandomForestRegressor(random_state=42)

    # Perform RandomizedSearch CV
    rand_search = RandomizedSearchCV(
        estimator=forest_regressor,
        param_distributions=param_grid,
        scoring='neg_mean_squared_error',
        cv=5,
        n_jobs=-1
    )

    rand_search.fit(X_train, y_train)

    # Extract the results
    results = pd.DataFrame(rand_search.cv_results_)
    results["MSE"] = -results["mean_test_score"]
    results["RMSE"] = np.sqrt(results["MSE"])
    results_sorted = results.sort_values("RMSE").head(10)  # Top 10 results
    
    return results_sorted

In [11]:
df_total_only = df_us_data[df_us_data["ENERGY SOURCE"] == "Total"].reset_index(drop=True)

df_total_only = df_total_only.iloc[:, 1:]

X = df_total_only.drop(columns=["GENERATION (Megawatthours)"])
y = df_total_only["GENERATION (Megawatthours)"]

categorical_cols = X.select_dtypes(include=["object"]).columns.tolist()

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ],
    remainder='passthrough'
)

X_encoded = preprocessor.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

rf_results = run_rf(X_train, y_train)

In [12]:
rf_results

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_n_estimators,param_min_samples_split,param_max_depth,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score,MSE,RMSE
6,32.600152,0.28937,0.26721,0.019061,100,5,,"{'n_estimators': 100, 'min_samples_split': 5, ...",-830721800000.0,-625720100000.0,-823614900000.0,-1130252000000.0,-704586200000.0,-822978900000.0,171700200000.0,1,822978900000.0,907181.9
8,39.645146,2.765444,0.156838,0.013727,50,2,,"{'n_estimators': 50, 'min_samples_split': 2, '...",-890927300000.0,-631077800000.0,-846824500000.0,-1110098000000.0,-777764000000.0,-851338400000.0,156507100000.0,2,851338400000.0,922680.0
2,44.606803,0.564003,0.451069,0.053303,200,10,,"{'n_estimators': 200, 'min_samples_split': 10,...",-874767600000.0,-723538500000.0,-934375900000.0,-1395637000000.0,-732425500000.0,-932148900000.0,245571500000.0,3,932148900000.0,965478.6
3,22.145603,0.528482,0.215264,0.016914,100,10,,"{'n_estimators': 100, 'min_samples_split': 10,...",-871945500000.0,-736675500000.0,-953707100000.0,-1417650000000.0,-741779200000.0,-944351500000.0,250402400000.0,4,944351500000.0,971777.5
1,11.71516,0.092842,0.135105,0.022317,50,10,,"{'n_estimators': 50, 'min_samples_split': 10, ...",-869322600000.0,-733570300000.0,-937267600000.0,-1452198000000.0,-765515200000.0,-951574700000.0,260667600000.0,5,951574700000.0,975486.9
4,9.38423,0.082321,0.079541,0.006405,100,10,20.0,"{'n_estimators': 100, 'min_samples_split': 10,...",-1715601000000.0,-1582801000000.0,-1765809000000.0,-2238200000000.0,-1507281000000.0,-1761938000000.0,255315000000.0,6,1761938000000.0,1327380.0
7,2.228367,0.038044,0.019385,0.001597,50,10,10.0,"{'n_estimators': 50, 'min_samples_split': 10, ...",-3444162000000.0,-3431767000000.0,-3533125000000.0,-4136688000000.0,-3298594000000.0,-3568867000000.0,293630100000.0,7,3568867000000.0,1889145.0
0,2.115846,0.056368,0.030959,0.004467,100,2,5.0,"{'n_estimators': 100, 'min_samples_split': 2, ...",-9192126000000.0,-8683420000000.0,-8838647000000.0,-9416961000000.0,-9217032000000.0,-9069637000000.0,268284600000.0,8,9069637000000.0,3011584.0
5,4.11228,0.045603,0.058667,0.011875,200,10,5.0,"{'n_estimators': 200, 'min_samples_split': 10,...",-9228684000000.0,-8675986000000.0,-8805492000000.0,-9466687000000.0,-9198488000000.0,-9075067000000.0,291249800000.0,9,9075067000000.0,3012485.0
9,1.029596,0.027324,0.014855,0.00279,50,5,5.0,"{'n_estimators': 50, 'min_samples_split': 5, '...",-9197237000000.0,-8719644000000.0,-8752025000000.0,-9508292000000.0,-9232721000000.0,-9081984000000.0,302624200000.0,10,9081984000000.0,3013633.0
