# This code is an implementation of XGBoost learning method to predict turbine power. This is part of ISEN 619 class project and a Wind Turbine Competition. 

$\color{Green}{\textbf{Author}}$: Youssef Hebaish

$\color{Green}{\textbf{Date}}$: 04/11/2023

## Loading necessary libraries and traininng datasets

In [47]:
# Loading necessary libraries
import xgboost as xgb
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint
from sklearn.utils import shuffle
import joblib  # Import joblib directly
import os
from tqdm.notebook import tqdm
import time
import itertools
from xgboost import plot_tree



In [44]:
# Load datasets

# There are two main types of paths added because I used two different devices: 1) My Macbook, and 2) My work machine. 
N_kel_train_path = r"C:\Users\hebaish\OneDrive - Texas A&M University\A&M\Academics\Spring 23\ISEN 619\Project\Datasets\Training\Kelmarsh_training_data"
N_pen_train_path = r"C:\Users\hebaish\OneDrive - Texas A&M University\A&M\Academics\Spring 23\ISEN 619\Project\Datasets\Training\Penmanshiel_training_data"
N_kel_test_path = r"C:\Users\hebaish\OneDrive - Texas A&M University\A&M\Academics\Spring 23\ISEN 619\Project\Datasets\Testing\Kelmarsh_test_data"
N_pen_test_path = r"C:\Users\hebaish\OneDrive - Texas A&M University\A&M\Academics\Spring 23\ISEN 619\Project\Datasets\Testing\Penmanshiel_test_data"


kel_train_path = '/Users/hebaish/Library/CloudStorage/OneDrive-TexasA&MUniversity/A&M/Academics/Spring 23/ISEN 619/Project/Datasets/Training/Kelmarsh_training_data'
pen_train_path = '/Users/hebaish/Library/CloudStorage/OneDrive-TexasA&MUniversity/A&M/Academics/Spring 23/ISEN 619/Project/Datasets/Training/Penmanshiel_training_data'
kel_test_path = '/Users/hebaish/Library/CloudStorage/OneDrive-TexasA&MUniversity/A&M/Academics/Spring 23/ISEN 619/Project/Datasets/Testing/Kelmarsh_test_data'
pen_test_path = '/Users/hebaish/Library/CloudStorage/OneDrive-TexasA&MUniversity/A&M/Academics/Spring 23/ISEN 619/Project/Datasets/Testing/Penmanshiel_test_data'

folder_paths = [kel_train_path, pen_train_path, kel_test_path, pen_test_path]
N_folder_paths = [N_kel_train_path, N_pen_train_path, N_kel_test_path, N_pen_test_path]

# Preparing training and test dataset
for folder_path in N_folder_paths:
    # loop through all files in folder_path
    for file in os.listdir(folder_path):
        if file.endswith('.csv'):
            # load the CSV file as a pandas dataframe
            df = pd.read_csv(os.path.join(folder_path, file))
            
            # Converting time to day/night and encoding it as 0/1
            # Shuffling training datasets
            if 'training' in file:
                df = df.dropna()
                df['time'] = pd.to_datetime(df['time'], format='%m/%d/%Y %H:%M')
                df['month'] = df['time'].dt.month
                df['Day.Night'] = df['time'].dt.hour.apply(lambda x: 1 if x<6 or x>=18 else 0)
                df = df.drop('time', axis = 1)
                df = shuffle(df)
                pass
            
            # Changing month in test datasets to 1-12 encoding
            # Changing day/night to 0/1 encoding
            elif 'test' in file:
                df = df.drop('time', axis = 1)
                month_map = {'January': 1, 'February': 2, 'March': 3, 'April': 4, 'May': 5, 'June': 6, 'July': 7,
                             'August': 8, 'September': 9, 'October': 10, 'November': 11, 'December': 12}

                # Use the map() method to convert the month_text column to numeric format
                df['month'] = df['month'].map(month_map)

                # Define a dictionary to map 'Day' to 0 and 'Night' to 1
                day_night_map = {'Day': 0, 'Night': 1}

                # Use the map() method to convert the day_night column to numeric format
                df['Day.Night'] = df['Day.Night'].map(day_night_map)
                pass
            
            else:
                pass
            
            
            # use the filename (without the ".csv" extension) to name the dataframe variable
            df_name = file[:-4] 
            globals()[df_name] = df # store the dataframe in a variable with the same name as the filename

In [1]:
def get_df_name(df):
    name =[x for x in globals() if globals()[x] is df][0]
    return name

## Optimizing an XGBoost model for Penmanshiel with hyperparameters tuning

In [4]:
%pip install jupyter_contrib_nbextensions
%jupyter contrib nbextension install --user
%jupyter nbextension enable varInspector/main

Note: you may need to restart the kernel to use updated packages.


UsageError: Line magic function `%jupyter` not found.


In [6]:
Pen_training = [Pen_df1_training, Pen_df2_training, Pen_df4_training,
                Pen_df5_training, Pen_df6_training, Pen_df7_training, 
                Pen_df8_training, Pen_df9_training, Pen_df10_training, 
                Pen_df11_training, Pen_df12_training, Pen_df13_training, 
                Pen_df14_training, Pen_df15_training]

n_estimators = [600,700,800,900,1000]
learning_rate = [0.05, 0.1]
max_depth =  [6, 7, 8, 9]
subsample = [0.7, 0.8, 0.9]
colsample_bytree =  [0.7, 0.8, 0.9]

params_list = []

# Get all possible combinations of the values in the five arrays
for n, rate, depth, sub, colsample in itertools.product(n_estimators, learning_rate, max_depth, subsample, colsample_bytree):
    params = {
        'objective': 'reg:squarederror',
        'eval_metric': 'rmse',
        'learning_rate': rate,
        'max_depth': depth,
        'subsample': sub,
        'colsample_bytree': colsample,
        'n_estimators': n
    }
    params_list.append(params)

val_rmse = []

for param in tqdm(params_list, desc=' Parameters', position=0):
    for Pen_df in tqdm(Pen_training, desc=' Dataset', position=1, leave=False):
        rmse = []
        X = Pen_df.drop('power', axis=1)
        y = Pen_df['power']
        train_X = X.iloc[:30000]
        train_y = y.iloc[:30000]
        val_X = X.iloc[30000:]
        val_y = y.iloc[30000:]
        model = xgb.XGBRegressor(**param)
                        
        # Fit the model to the training data
        model.fit(train_X, train_y)
        
        # Evaluate the model on the testing data
        y_pred = model.predict(val_X)

        rmse.append(np.sqrt(mean_squared_error(val_y, y_pred)))
    val_rmse.append(np.mean(rmse))
    #print(f'Tested parameters for: \n {param}')
    #print(f'RMSE for tested parameters: {round(np.mean(rmse),4)}')



 Parameters:   0%|          | 0/360 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

In [7]:
print(f'The lowest validation RMSE is {min(val_rmse)}')
print(f'parameters corresponding to the lowest validation RMSE are: \n {params_list[val_rmse.index(min(val_rmse))]}')

The lowest validation RMSE is 0.02982723138661624
parameters corresponding to the lowest validation RMSE are: 
 {'objective': 'reg:squarederror', 'eval_metric': 'rmse', 'learning_rate': 0.05, 'max_depth': 6, 'subsample': 0.7, 'colsample_bytree': 0.7, 'n_estimators': 600}


In [8]:
val_rmse

[0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.02982723138661624,
 0.0298272

In [20]:
Pen_training = [Pen_df1_training, Pen_df2_training, Pen_df4_training,
                Pen_df5_training, Pen_df6_training, Pen_df7_training, 
                Pen_df8_training, Pen_df9_training, Pen_df10_training, 
                Pen_df11_training, Pen_df12_training, Pen_df13_training, 
                Pen_df14_training, Pen_df15_training]

n_estimators = [600,700,800,900,1000]
learning_rate = [0.05, 0.1]
max_depth =  [6, 7, 8, 9]
subsample = [0.7, 0.8, 0.9]
colsample_bytree =  [0.7, 0.8, 0.9]

params_list = []

# Get all possible combinations of the values in the five arrays
for n, rate, depth, sub, colsample in itertools.product(n_estimators, learning_rate, max_depth, subsample, colsample_bytree):
    params = {
        'objective': 'reg:squarederror',
        'eval_metric': 'rmse',
        'learning_rate': rate,
        'max_depth': depth,
        'subsample': sub,
        'colsample_bytree': colsample,
        'n_estimators': n
    }
    params_list.append(params)

datasets = []
Pen_params = []
Pen_errs = []

i = 0    
for Pen_df in tqdm(Pen_training, desc=' Dataset', position=0):
    print(f'Tuning for {get_df_name(Pen_training[i])}')
    df_rmse = 1
    j = 1
    for param in tqdm(params_list, desc=' Parameters', position=1, leave=False):

        X = Pen_df.drop('power', axis=1)
        y = Pen_df['power']
        train_X = X.iloc[:30000]
        train_y = y.iloc[:30000]
        val_X = X.iloc[30000:]
        val_y = y.iloc[30000:]
        model = xgb.XGBRegressor(**param)
                        
        # Fit the model to the training data
        model.fit(train_X, train_y)
        
        # Evaluate the model on the testing data
        y_pred = model.predict(val_X)

        err = np.sqrt(mean_squared_error(val_y, y_pred))
        
        if err < df_rmse:
            df_rmse = err
            current_params = param
            print(f'{j}- New error for {get_df_name(Pen_training[i])}: {df_rmse}')
        j+=1
        #else:
            #print(f'No change')
    
    print(f'Optimal parameters for {get_df_name(Pen_training[i])} are: \n {current_params}')
    print('==========================================================================')
    Pen_params.append(current_params)
    Pen_errs.append(df_rmse)
    datasets.append(get_df_name(Pen_training[i]))
    i+=1

 Dataset:   0%|          | 0/14 [00:00<?, ?it/s]

Tuning for Pen_df1_training


 Parameters:   0%|          | 0/360 [00:00<?, ?it/s]

1- New error for Pen_df1_training: 0.028240351902033164
2- New error for Pen_df1_training: 0.028190891208974316
3- New error for Pen_df1_training: 0.028026556294441948
4- New error for Pen_df1_training: 0.027977813078462112
5- New error for Pen_df1_training: 0.027895637325279617
9- New error for Pen_df1_training: 0.02788918021126689
39- New error for Pen_df1_training: 0.02762462194630814
111- New error for Pen_df1_training: 0.027604844245562886
183- New error for Pen_df1_training: 0.02760238769681806
255- New error for Pen_df1_training: 0.027591060368722987
327- New error for Pen_df1_training: 0.027587340415267954
330- New error for Pen_df1_training: 0.027585740334952126
Optimal parameters for Pen_df1_training are: 
 {'objective': 'reg:squarederror', 'eval_metric': 'rmse', 'learning_rate': 0.1, 'max_depth': 6, 'subsample': 0.8, 'colsample_bytree': 0.9, 'n_estimators': 1000}
Tuning for Pen_df2_training


 Parameters:   0%|          | 0/360 [00:00<?, ?it/s]

1- New error for Pen_df2_training: 0.02749416593936685
2- New error for Pen_df2_training: 0.027205625352347596
3- New error for Pen_df2_training: 0.027072310834063217
8- New error for Pen_df2_training: 0.026922112615868802
35- New error for Pen_df2_training: 0.02690212006679611
36- New error for Pen_df2_training: 0.02688676115799492
80- New error for Pen_df2_training: 0.026851526621255548
152- New error for Pen_df2_training: 0.02679904596154214
224- New error for Pen_df2_training: 0.026765122061008336
296- New error for Pen_df2_training: 0.02674679359046396
Optimal parameters for Pen_df2_training are: 
 {'objective': 'reg:squarederror', 'eval_metric': 'rmse', 'learning_rate': 0.05, 'max_depth': 6, 'subsample': 0.9, 'colsample_bytree': 0.8, 'n_estimators': 1000}
Tuning for Pen_df4_training


 Parameters:   0%|          | 0/360 [00:00<?, ?it/s]

1- New error for Pen_df4_training: 0.024987540447459152
2- New error for Pen_df4_training: 0.024862075263410548
3- New error for Pen_df4_training: 0.024608693669022484
42- New error for Pen_df4_training: 0.024574608504822067
44- New error for Pen_df4_training: 0.024515625040278828
116- New error for Pen_df4_training: 0.024511064669496724
147- New error for Pen_df4_training: 0.02447154513381513
219- New error for Pen_df4_training: 0.02442365160969092
291- New error for Pen_df4_training: 0.02439065204480031
Optimal parameters for Pen_df4_training are: 
 {'objective': 'reg:squarederror', 'eval_metric': 'rmse', 'learning_rate': 0.05, 'max_depth': 6, 'subsample': 0.7, 'colsample_bytree': 0.9, 'n_estimators': 1000}
Tuning for Pen_df5_training


 Parameters:   0%|          | 0/360 [00:00<?, ?it/s]

1- New error for Pen_df5_training: 0.026202059112418505
13- New error for Pen_df5_training: 0.02607921593713988
38- New error for Pen_df5_training: 0.02591390214501735
110- New error for Pen_df5_training: 0.025901019358518802
254- New error for Pen_df5_training: 0.025867411878498985
Optimal parameters for Pen_df5_training are: 
 {'objective': 'reg:squarederror', 'eval_metric': 'rmse', 'learning_rate': 0.1, 'max_depth': 6, 'subsample': 0.7, 'colsample_bytree': 0.8, 'n_estimators': 900}
Tuning for Pen_df6_training


 Parameters:   0%|          | 0/360 [00:00<?, ?it/s]

1- New error for Pen_df6_training: 0.024051481691184762
2- New error for Pen_df6_training: 0.02353312329366061
11- New error for Pen_df6_training: 0.02321210060231837
12- New error for Pen_df6_training: 0.02317639347420885
83- New error for Pen_df6_training: 0.023151284964968974
84- New error for Pen_df6_training: 0.023105865227540587
156- New error for Pen_df6_training: 0.02307311631531033
228- New error for Pen_df6_training: 0.023063966250882795
300- New error for Pen_df6_training: 0.02303864528889565
Optimal parameters for Pen_df6_training are: 
 {'objective': 'reg:squarederror', 'eval_metric': 'rmse', 'learning_rate': 0.05, 'max_depth': 7, 'subsample': 0.7, 'colsample_bytree': 0.9, 'n_estimators': 1000}
Tuning for Pen_df7_training


 Parameters:   0%|          | 0/360 [00:00<?, ?it/s]

1- New error for Pen_df7_training: 0.02839563126473378
2- New error for Pen_df7_training: 0.02831774462939763
3- New error for Pen_df7_training: 0.027968298329110998
5- New error for Pen_df7_training: 0.0279468768040223
11- New error for Pen_df7_training: 0.02775993421433631
12- New error for Pen_df7_training: 0.027581708125818814
84- New error for Pen_df7_training: 0.027534795822454383
156- New error for Pen_df7_training: 0.027500895227713335
228- New error for Pen_df7_training: 0.02747888256348634
300- New error for Pen_df7_training: 0.02744895433569814
Optimal parameters for Pen_df7_training are: 
 {'objective': 'reg:squarederror', 'eval_metric': 'rmse', 'learning_rate': 0.05, 'max_depth': 7, 'subsample': 0.7, 'colsample_bytree': 0.9, 'n_estimators': 1000}
Tuning for Pen_df8_training


 Parameters:   0%|          | 0/360 [00:00<?, ?it/s]

1- New error for Pen_df8_training: 0.027615848385364215
3- New error for Pen_df8_training: 0.027568342978613165
5- New error for Pen_df8_training: 0.027535035140357437
6- New error for Pen_df8_training: 0.027114970023721364
11- New error for Pen_df8_training: 0.02698655144110401
24- New error for Pen_df8_training: 0.026977975554146678
41- New error for Pen_df8_training: 0.02661997784719504
113- New error for Pen_df8_training: 0.02661478026800646
185- New error for Pen_df8_training: 0.02661234437706744
257- New error for Pen_df8_training: 0.02660236773868808
Optimal parameters for Pen_df8_training are: 
 {'objective': 'reg:squarederror', 'eval_metric': 'rmse', 'learning_rate': 0.1, 'max_depth': 6, 'subsample': 0.8, 'colsample_bytree': 0.8, 'n_estimators': 900}
Tuning for Pen_df9_training


 Parameters:   0%|          | 0/360 [00:00<?, ?it/s]

1- New error for Pen_df9_training: 0.026845644882204093
2- New error for Pen_df9_training: 0.02664299596236862
3- New error for Pen_df9_training: 0.026203347799006866
38- New error for Pen_df9_training: 0.026115261015578203
39- New error for Pen_df9_training: 0.025861548460629588
111- New error for Pen_df9_training: 0.025836871680820682
183- New error for Pen_df9_training: 0.025826292169647054
Optimal parameters for Pen_df9_training are: 
 {'objective': 'reg:squarederror', 'eval_metric': 'rmse', 'learning_rate': 0.1, 'max_depth': 6, 'subsample': 0.7, 'colsample_bytree': 0.9, 'n_estimators': 800}
Tuning for Pen_df10_training


 Parameters:   0%|          | 0/360 [00:00<?, ?it/s]

1- New error for Pen_df10_training: 0.026101948312170817
3- New error for Pen_df10_training: 0.02599916662452421
4- New error for Pen_df10_training: 0.025619753972118537
5- New error for Pen_df10_training: 0.025536509418508272
6- New error for Pen_df10_training: 0.02535894338118167
78- New error for Pen_df10_training: 0.02530926599736151
150- New error for Pen_df10_training: 0.025271646999407378
222- New error for Pen_df10_training: 0.025222022847870878
294- New error for Pen_df10_training: 0.025215075463822317
Optimal parameters for Pen_df10_training are: 
 {'objective': 'reg:squarederror', 'eval_metric': 'rmse', 'learning_rate': 0.05, 'max_depth': 6, 'subsample': 0.8, 'colsample_bytree': 0.9, 'n_estimators': 1000}
Tuning for Pen_df11_training


 Parameters:   0%|          | 0/360 [00:00<?, ?it/s]

1- New error for Pen_df11_training: 0.03152616182817946
2- New error for Pen_df11_training: 0.03128763067607952
11- New error for Pen_df11_training: 0.031027325591804416
15- New error for Pen_df11_training: 0.03095134101532145
87- New error for Pen_df11_training: 0.03090587016269567
159- New error for Pen_df11_training: 0.030877988933918875
231- New error for Pen_df11_training: 0.030852384106725937
303- New error for Pen_df11_training: 0.03083918941447768
Optimal parameters for Pen_df11_training are: 
 {'objective': 'reg:squarederror', 'eval_metric': 'rmse', 'learning_rate': 0.05, 'max_depth': 7, 'subsample': 0.8, 'colsample_bytree': 0.9, 'n_estimators': 1000}
Tuning for Pen_df12_training


 Parameters:   0%|          | 0/360 [00:00<?, ?it/s]

1- New error for Pen_df12_training: 0.03066256559912538
2- New error for Pen_df12_training: 0.029910202204352192
9- New error for Pen_df12_training: 0.029797616272067003
14- New error for Pen_df12_training: 0.029778472470352178
21- New error for Pen_df12_training: 0.02952957237175097
93- New error for Pen_df12_training: 0.02949996029492608
165- New error for Pen_df12_training: 0.02946646074760308
237- New error for Pen_df12_training: 0.029433094585244176
309- New error for Pen_df12_training: 0.029405832448993478
Optimal parameters for Pen_df12_training are: 
 {'objective': 'reg:squarederror', 'eval_metric': 'rmse', 'learning_rate': 0.05, 'max_depth': 8, 'subsample': 0.7, 'colsample_bytree': 0.9, 'n_estimators': 1000}
Tuning for Pen_df13_training


 Parameters:   0%|          | 0/360 [00:00<?, ?it/s]

1- New error for Pen_df13_training: 0.033012315223958116
2- New error for Pen_df13_training: 0.032768796105989983
3- New error for Pen_df13_training: 0.032699860544874976
41- New error for Pen_df13_training: 0.03246282325123913
113- New error for Pen_df13_training: 0.032431127867228834
185- New error for Pen_df13_training: 0.0324059978827207
257- New error for Pen_df13_training: 0.032392596179527416
329- New error for Pen_df13_training: 0.03239224553870475
Optimal parameters for Pen_df13_training are: 
 {'objective': 'reg:squarederror', 'eval_metric': 'rmse', 'learning_rate': 0.1, 'max_depth': 6, 'subsample': 0.8, 'colsample_bytree': 0.8, 'n_estimators': 1000}
Tuning for Pen_df14_training


 Parameters:   0%|          | 0/360 [00:00<?, ?it/s]

1- New error for Pen_df14_training: 0.03010435987556557
2- New error for Pen_df14_training: 0.029750809109534877
3- New error for Pen_df14_training: 0.029720619672182952
20- New error for Pen_df14_training: 0.029562621319921538
37- New error for Pen_df14_training: 0.029057754821397105
109- New error for Pen_df14_training: 0.029020682540441503
181- New error for Pen_df14_training: 0.028991609469799354
253- New error for Pen_df14_training: 0.028971447479712125
325- New error for Pen_df14_training: 0.02895934411164952
Optimal parameters for Pen_df14_training are: 
 {'objective': 'reg:squarederror', 'eval_metric': 'rmse', 'learning_rate': 0.1, 'max_depth': 6, 'subsample': 0.7, 'colsample_bytree': 0.7, 'n_estimators': 1000}
Tuning for Pen_df15_training


 Parameters:   0%|          | 0/360 [00:00<?, ?it/s]

1- New error for Pen_df15_training: 0.02926477534755477
3- New error for Pen_df15_training: 0.02922132977375556
4- New error for Pen_df15_training: 0.029057400589469422
5- New error for Pen_df15_training: 0.02904029004562882
7- New error for Pen_df15_training: 0.029020977274916773
12- New error for Pen_df15_training: 0.02892495595396945
39- New error for Pen_df15_training: 0.028812216092252308
111- New error for Pen_df15_training: 0.028797174434134064
183- New error for Pen_df15_training: 0.02878354783419099
223- New error for Pen_df15_training: 0.028769464692948374
295- New error for Pen_df15_training: 0.028736693471194417
Optimal parameters for Pen_df15_training are: 
 {'objective': 'reg:squarederror', 'eval_metric': 'rmse', 'learning_rate': 0.05, 'max_depth': 6, 'subsample': 0.9, 'colsample_bytree': 0.7, 'n_estimators': 1000}


In [40]:
Pen_Train_Results = {'Datasets': datasets, 
                    'OptParams': Pen_params,
                    'ValRMSE': Pen_errs}
Pen_Train_Results = pd.DataFrame(Pen_Train_Results)
Pen_Train_Results.to_csv(r"C:\Users\hebaish\OneDrive - Texas A&M University\A&M\Academics\Spring 23\ISEN 619\Project\Submissions\XGBoost\Pen_Tuned.csv", index=False)
Pen_Train_Results


Unnamed: 0,Datasets,OptParams,ValRMSE
0,Pen_df1_training,"{'objective': 'reg:squarederror', 'eval_metric...",0.027586
1,Pen_df2_training,"{'objective': 'reg:squarederror', 'eval_metric...",0.026747
2,Pen_df4_training,"{'objective': 'reg:squarederror', 'eval_metric...",0.024391
3,Pen_df5_training,"{'objective': 'reg:squarederror', 'eval_metric...",0.025867
4,Pen_df6_training,"{'objective': 'reg:squarederror', 'eval_metric...",0.023039
5,Pen_df7_training,"{'objective': 'reg:squarederror', 'eval_metric...",0.027449
6,Pen_df8_training,"{'objective': 'reg:squarederror', 'eval_metric...",0.026602
7,Pen_df9_training,"{'objective': 'reg:squarederror', 'eval_metric...",0.025826
8,Pen_df10_training,"{'objective': 'reg:squarederror', 'eval_metric...",0.025215
9,Pen_df11_training,"{'objective': 'reg:squarederror', 'eval_metric...",0.030839


In [29]:
Kel_training = [Kel_df1_training, Kel_df2_training, Kel_df3_training, Kel_df4_training,
                Kel_df5_training, Kel_df6_training]

n_estimators = [600,700,800,900,1000]
learning_rate = [0.05, 0.1]
max_depth =  [6, 7, 8, 9]
subsample = [0.7, 0.8, 0.9]
colsample_bytree =  [0.7, 0.8, 0.9]

params_list = []

# Get all possible combinations of the values in the five arrays
for n, rate, depth, sub, colsample in itertools.product(n_estimators, learning_rate, max_depth, subsample, colsample_bytree):
    params = {
        'objective': 'reg:squarederror',
        'eval_metric': 'rmse',
        'learning_rate': rate,
        'max_depth': depth,
        'subsample': sub,
        'colsample_bytree': colsample,
        'n_estimators': n
    }
    params_list.append(params)

Kel_datasets = []
Kel_params = []
Kel_errs = []

i = 0    
for Kel_df in tqdm(Kel_training, desc=' Dataset', position=0):
    print(f'Tuning for {get_df_name(Kel_training[i])}')
    df_rmse = 1
    j = 1
    for param in tqdm(params_list, desc=' Parameters', position=1, leave=False):

        X = Kel_df.drop('power', axis=1)
        y = Kel_df['power']
        train_X = X.iloc[:30000]
        train_y = y.iloc[:30000]
        val_X = X.iloc[30000:]
        val_y = y.iloc[30000:]
        model = xgb.XGBRegressor(**param)
                        
        # Fit the model to the training data
        model.fit(train_X, train_y)
        
        # Evaluate the model on the testing data
        y_pred = model.predict(val_X)

        err = np.sqrt(mean_squared_error(val_y, y_pred))
        
        if err < df_rmse:
            df_rmse = err
            current_params = param
            print(f'{j}- New error for {get_df_name(Kel_training[i])}: {df_rmse}')
        j+=1
        #else:
            #print(f'No change')
    
    print(f'Optimal parameters for {get_df_name(Kel_training[i])} are: \n {current_params}')
    print('==========================================================================')
    Kel_params.append(current_params)
    Kel_errs.append(df_rmse)
    Kel_datasets.append(get_df_name(Kel_training[i]))
    i+=1

 Dataset:   0%|          | 0/6 [00:00<?, ?it/s]

Tuning for Kel_df1_training


 Parameters:   0%|          | 0/360 [00:00<?, ?it/s]

1- New error for Kel_df1_training: 0.025345429478437306
6- New error for Kel_df1_training: 0.02519840328181004
78- New error for Kel_df1_training: 0.02518698153257002
150- New error for Kel_df1_training: 0.025183078806439517
153- New error for Kel_df1_training: 0.025182388613517234
222- New error for Kel_df1_training: 0.02516995969956346
294- New error for Kel_df1_training: 0.025169243669987636
297- New error for Kel_df1_training: 0.025159962427181642
Optimal parameters for Kel_df1_training are: 
 {'objective': 'reg:squarederror', 'eval_metric': 'rmse', 'learning_rate': 0.05, 'max_depth': 6, 'subsample': 0.9, 'colsample_bytree': 0.9, 'n_estimators': 1000}
Tuning for Kel_df2_training


 Parameters:   0%|          | 0/360 [00:00<?, ?it/s]

1- New error for Kel_df2_training: 0.026586169591915832
2- New error for Kel_df2_training: 0.02653910135922604
3- New error for Kel_df2_training: 0.026479604654367214
75- New error for Kel_df2_training: 0.026447760266436308
147- New error for Kel_df2_training: 0.026427044268088402
219- New error for Kel_df2_training: 0.026415496017535622
291- New error for Kel_df2_training: 0.026407410973852175
Optimal parameters for Kel_df2_training are: 
 {'objective': 'reg:squarederror', 'eval_metric': 'rmse', 'learning_rate': 0.05, 'max_depth': 6, 'subsample': 0.7, 'colsample_bytree': 0.9, 'n_estimators': 1000}
Tuning for Kel_df3_training


 Parameters:   0%|          | 0/360 [00:00<?, ?it/s]

1- New error for Kel_df3_training: 0.02659244807332835
2- New error for Kel_df3_training: 0.026511109736622837
4- New error for Kel_df3_training: 0.026456740291862603
17- New error for Kel_df3_training: 0.02644811760388354
76- New error for Kel_df3_training: 0.026415651040869
148- New error for Kel_df3_training: 0.026393071173496627
220- New error for Kel_df3_training: 0.026378334075849337
292- New error for Kel_df3_training: 0.02637074247918747
Optimal parameters for Kel_df3_training are: 
 {'objective': 'reg:squarederror', 'eval_metric': 'rmse', 'learning_rate': 0.05, 'max_depth': 6, 'subsample': 0.8, 'colsample_bytree': 0.7, 'n_estimators': 1000}
Tuning for Kel_df4_training


 Parameters:   0%|          | 0/360 [00:00<?, ?it/s]

1- New error for Kel_df4_training: 0.025975782617496456
2- New error for Kel_df4_training: 0.02596074194027195
10- New error for Kel_df4_training: 0.02588896342608852
82- New error for Kel_df4_training: 0.025864285235570605
154- New error for Kel_df4_training: 0.02584618309213559
226- New error for Kel_df4_training: 0.02583896618599311
298- New error for Kel_df4_training: 0.02583145694187052
Optimal parameters for Kel_df4_training are: 
 {'objective': 'reg:squarederror', 'eval_metric': 'rmse', 'learning_rate': 0.05, 'max_depth': 7, 'subsample': 0.7, 'colsample_bytree': 0.7, 'n_estimators': 1000}
Tuning for Kel_df5_training


 Parameters:   0%|          | 0/360 [00:00<?, ?it/s]

1- New error for Kel_df5_training: 0.02426036295668491
4- New error for Kel_df5_training: 0.02418999050117191
22- New error for Kel_df5_training: 0.024176475067225308
49- New error for Kel_df5_training: 0.024159161982975026
148- New error for Kel_df5_training: 0.02413734113017049
220- New error for Kel_df5_training: 0.02411175978312897
Optimal parameters for Kel_df5_training are: 
 {'objective': 'reg:squarederror', 'eval_metric': 'rmse', 'learning_rate': 0.05, 'max_depth': 6, 'subsample': 0.8, 'colsample_bytree': 0.7, 'n_estimators': 900}
Tuning for Kel_df6_training


 Parameters:   0%|          | 0/360 [00:00<?, ?it/s]

1- New error for Kel_df6_training: 0.02398812798905717
2- New error for Kel_df6_training: 0.02396225359547525
3- New error for Kel_df6_training: 0.02389223733899572
4- New error for Kel_df6_training: 0.02381747118037667
13- New error for Kel_df6_training: 0.023810235556088563
16- New error for Kel_df6_training: 0.0237763508791837
76- New error for Kel_df6_training: 0.023773879229501065
88- New error for Kel_df6_training: 0.023745776688718127
160- New error for Kel_df6_training: 0.023737284991908143
Optimal parameters for Kel_df6_training are: 
 {'objective': 'reg:squarederror', 'eval_metric': 'rmse', 'learning_rate': 0.05, 'max_depth': 7, 'subsample': 0.9, 'colsample_bytree': 0.7, 'n_estimators': 800}


In [37]:
Kel_Train_Results = {'Datasets': Kel_datasets, 
                    'OptParams': Kel_params,
                    'ValRMSE': Kel_errs}
Kel_Train_Results = pd.DataFrame(Kel_Train_Results)
Kel_Train_Results.to_csv(r"C:\Users\hebaish\OneDrive - Texas A&M University\A&M\Academics\Spring 23\ISEN 619\Project\Submissions\XGBoost\Kel_Tuned.csv", index=False)
Kel_Train_Results


Unnamed: 0,Datasets,OptParams,ValRMSE
0,Kel_df1_training,"{'objective': 'reg:squarederror', 'eval_metric...",0.02516
1,Kel_df2_training,"{'objective': 'reg:squarederror', 'eval_metric...",0.026407
2,Kel_df3_training,"{'objective': 'reg:squarederror', 'eval_metric...",0.026371
3,Kel_df4_training,"{'objective': 'reg:squarederror', 'eval_metric...",0.025831
4,Kel_df5_training,"{'objective': 'reg:squarederror', 'eval_metric...",0.024112
5,Kel_df6_training,"{'objective': 'reg:squarederror', 'eval_metric...",0.023737


In [45]:
Pen_training = [Pen_df1_training, Pen_df2_training, Pen_df4_training,
                Pen_df5_training, Pen_df6_training, Pen_df7_training, 
                Pen_df8_training, Pen_df9_training, Pen_df10_training, 
                Pen_df11_training, Pen_df12_training, Pen_df13_training, 
                Pen_df14_training, Pen_df15_training]

Pen_test =  [Pen_df1_test, Pen_df2_test, Pen_df4_test, Pen_df5_test,
             Pen_df6_test, Pen_df7_test, Pen_df8_test, Pen_df9_test, 
             Pen_df10_test, Pen_df11_test, Pen_df12_test, Pen_df13_test, 
             Pen_df14_test, Pen_df15_test]


# Prediction for Pen

for i in range(len(Pen_training)):
    X = Pen_training[i].drop('power', axis=1)
    y = Pen_training[i]['power']
    # Train a model on the i-th training set
    # Define the XGBoost model
    params = Pen_params[i]
    model = xgb.XGBRegressor(**params)

    model.fit(X, y)
    Pen_test[i] = Pen_test[i].drop('power', axis=1)
    y_pred = model.predict(Pen_test[i])
    # Add an empty 'time' column to the beginning of the DataFrame
    Pen_test[i].insert(0, 'power', y_pred)

    # Add the 'power' column to the beginning of the DataFrame with the array of values
    Pen_test[i].insert(0, 'time', np.nan)
    if i>=2:
        j = i+2
    else:
        j = i+1

    Pen_test[i].to_csv(fr"C:\Users\hebaish\OneDrive - Texas A&M University\A&M\Academics\Spring 23\ISEN 619\Project\Submissions\Tuned XGBoost\Predictions\7_#7_Pen_df{j}_test.csv", index = False)        

In [46]:
Kel_training = [Kel_df1_training, Kel_df2_training, Kel_df3_training, Kel_df4_training, 
                Kel_df5_training, Kel_df6_training]

Kel_test = [Kel_df1_test, Kel_df2_test, Kel_df3_test, Kel_df4_test, Kel_df5_test, 
            Kel_df6_test]

for i in range(len(Kel_training)):
    X = Kel_training[i].drop('power', axis=1)
    y = Kel_training[i]['power']
    # Train a model on the i-th training set
    # Define the XGBoost model
    params = Kel_params[i]
    model = xgb.XGBRegressor(**params)
    
    model.fit(X, y)
    Kel_test[i] = Kel_test[i].drop('power', axis=1)
    y_pred = model.predict(Kel_test[i])
    # Add an empty 'time' column to the beginning of the DataFrame
    Kel_test[i].insert(0, 'power', y_pred)

    # Add the 'power' column to the beginning of the DataFrame with the array of values
    Kel_test[i].insert(0, 'time', np.nan)


    Kel_test[i].to_csv(fr"C:\Users\hebaish\OneDrive - Texas A&M University\A&M\Academics\Spring 23\ISEN 619\Project\Submissions\Tuned XGBoost\Predictions\7_#6_Kel_df{i+1}_test.csv", index = False)
    