# Packages

In [1]:
#basic packages
import time
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os

#data pre-processing packages
from datetime import datetime


#results and analysis packages
from sklearn.metrics import mean_absolute_percentage_error as mape
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
# data modelling & results
from yellowbrick.regressor import PredictionError, ResidualsPlot
from sklearn.model_selection import train_test_split
import math as math

#NN
import xgboost as xgb

from tensorflow import keras
from keras.utils.vis_utils import plot_model
from scipy.stats import reciprocal
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV

#feature importance
import shap


# Script

## Error computation

In [3]:
#defining the Root Mean Squared Error

def rmse(y_true, y_predicted):
    
    return np.sqrt(mean_squared_error(y_true, y_predicted))

In [4]:
#errors computation

def errors_computation(data):
    
    df=pd.DataFrame()
    #df.at['RMSE (as root mean)', 'Wind']= round(rmse(data['Target'], data['WS_pred']), 3)
    df.at['MAE (in avg)', 'Wind']= round(mae(data['Target'], data['WS_pred']), 3)
    df.at['MAPE (%)', 'Wind']= round(mape(data['Target'], data['WS_pred'])*100, 3)
    
    #df.at['RMSE (as root mean)', 'Power']= round(rmse(data['P'], data['P_pred']), 3)
    df.at['MAE (in avg)', 'Power']= round(mae(data['P'], data['P_pred']), 3)
    df.at['MAPE (%)', 'Power']= round(mape(data['P'], data['P_pred'])*100, 3)
    
    
    print('Wind RMSE: ', round(rmse(data['Target'], data['WS_pred']), 3), 'm/s as root mean')
    print('Wind MAE: ', round(mae(data['Target'], data['WS_pred']), 3), 'm/s in avg')
    print('Wind MAPE: ', round(mape(data['Target'], data['WS_pred'])*100, 3), '%')
    
    print('Power RMSE: ', round(rmse(data['P'], data['P_pred']), 3), 'kW as root mean')
    print('Power MAE: ', round(mae(data['P'], data['P_pred']), 3), 'kW in avg')
    print('Power MAPE: ', round(mape(data['P'], data['P_pred'])*100, 3), '%')
    
    return df

In [5]:
def error_plot(data, title):
    
    #title is expected to be an str
    #WS_pred and Target should be the variables names

    #plotting the reference
    plt.figure(figsize=(12,8))
    plt.plot([-1,17.5],[-1,17.5], 'green', linewidth=4, alpha=.12)
    plt.plot(data['WS_pred'], data['Target'], marker='o', ls='', label='Regression', markersize=5, alpha=.1)


    plt.legend()

    ax=plt.gca()
    ax.set(xlabel='y predicted', ylabel='y actual');
    ax.set_title(title)
    ax.set_ylim(ymin=4, ymax=17.5)
    ax.set_xlim(xmin=4, xmax=17.5)
    
    return print('')

In [6]:
def powercurve_computation(data, power_curve):
    
    from scipy import interpolate
    
    #this function computes the power at a observation given the information at a observation:
    # the WS (in m/s) at the wind turbine location and at the hub height (Target)
    # the power curve of the wind turbine in an xslx
    
    
    x=power_curve['Wind Speed [m/s]']
    y=power_curve['Warranted Power Curve [kW]']
    x_new=data['Target']
    
    f = interpolate.interp1d(x, y)
    #, kind='linear'
    data['P']=f(x_new)
    
    if 'WS_pred' in data.keys():
        x_new2=data['WS_pred']
        data['P_pred']=f(x_new2)
    
    print('power curve computation performed')
    
    return data

In [7]:
def control_power_computation (data_test, data_train, power_curve):
    
    results_test=pd.DataFrame()
    results_train=pd.DataFrame()
    
    
    results_test=powercurve_computation(data_test, power_curve)
    results_train=powercurve_computation(data_train, power_curve)

    return results_test, results_train

In [8]:
def compute_results(data_test, data_train, power_curve, plot_error):
    
    #this function computes and plots the results of a modelling:

    results_test, results_train=control_power_computation (data_test, data_train, power_curve)
    
    
    print('Modelling errors for training set:')
    errors_computation(results_train)
    print('')
    print('Modelling errors for test set:')
    errors_computation(results_test)
    print('')
    
    if plot_error:
        print('')
        error_plot(results_test, 'Error plot for test set wind speed')

    print('')
    return print('Showing the results of the modelling: ')

In [9]:
def compute_results1(data_test, data_train, power_curve, plot_error):
    
    #this function computes and plots the results of a modelling:

    results_test, results_train=control_power_computation (data_test, data_train, power_curve)
    
    
    print('Modelling errors for training set:')
    errors_computation(results_train)
    print('')
    print('Modelling errors for test set:')
    errors_computation(results_test)
    print('')
    print('Modelling errors in AEP terms for test set')
    data=AEP_computation(results_test, power_curve)
    
    if plot_error:
        print('')
        error_plot(results_test, 'Error plot for test set wind speed')

    print('')
    print('Showing the results of the modelling: ')
    return data

In [10]:
def AEP_computation(data, power_curve):
    
    #binning the WS_Pred
    data['WS_pred_bin']=pd.cut(data['WS_pred'],
                                       bins=list(np.arange(-0.25,28.0,0.5)),
                                                 labels=list(np.arange(0.0,28.0,0.5)))

        
    #creating the AEP (Annual Energy Production table)
        
    AEP_table=pd.DataFrame()
    
    AEP_table['WS']=PC['Wind Speed [m/s]']
    AEP_table['WPC']=PC['Warranted Power Curve [kW]']
    AEP_table.set_index('WS', inplace=True)
    AEP_table['MPC_SC']=data.groupby('WS_pred_bin', as_index=True)['P'].mean()

    #puedo rellenar los NaN de MPC_SC copiando lo que tendría que ser: WPC
    
#     for i in AEP_table.index:
#         if math.isnan(AEP_table.at[i, 'MPC_SC']):
#             AEP_table.at[i, 'MPC_SC']=AEP_table.at[i, 'WPC']
    
    AEP_table['WSAD']=power_curve['Wind Speed Distribution [Hr/Year]']
    
    #o igual aquí tendría que cortar la tabla para NaN values
    
#     AEP_table=AEP_table.dropna()
    
    AEP_table['WAEP']=AEP_table['WPC']*AEP_table['WSAD']
    AEP_table['MAEP_SC']=AEP_table['MPC_SC']*AEP_table['WSAD']
    
    #computing the metrics for later printing
    
    MAEP_SC=AEP_table['MAEP_SC'].sum()
    WAEP=AEP_table['WAEP'].sum()
    AEP_perc=(MAEP_SC/WAEP)*100
    AEP_diff=(MAEP_SC-WAEP)/1000
    
    #printing the results

    print('')
    print('The computed AEP_table is: ')
    AEP_table(AEP_table)
    print('')
    print('The AEP% of the Site Calibration is: ', round(AEP_perc, 3), '%')
    print('')
    print('The AEP difference of the Site Calibration is: ', round(AEP_diff, 3), 'MWh')
    print('')
    AEP_plot(data, AEP_table)                                
    
    
    return data

In [11]:
def AEP_plot(data, AEP_table):
    
    #data is expected to contain: Target, WS_pred, P, P_pred
    #AEP_table is expected to contain: WS, WPC, MPC-SC

    #plotting the Power curves
    plt.figure(figsize=(10,6))
    plt.plot(data['WS_pred'], data['P'], marker='o', ls='', label='measured scatter', markersize=5, alpha=.1)
    plt.plot(AEP_table.index, AEP_table['WPC'], marker='o', color='black', linewidth=0.5, label='WPC', markersize=5)
    plt.plot(AEP_table.index, AEP_table['MPC_SC'], marker='o', color='red', linewidth=0.5, label='MPC-SC', markersize=5)


    plt.legend()

    ax=plt.gca()
    ax.set(ylabel='Power interpolated with WPC and Target WS (kW)', xlabel='Predicted Wind Speed (m/s)');
    ax.set_title('Site Calibration Power Curve')
    ax.set_xlim(xmin=0, xmax=20)
#     ax.set_xlim(xmin=0, xmax=27.5)
    
    return print('Error plot performed')

In [12]:
def AEP_table(AEP_table):
    
    #AEP_table is expected to contain: WPC, MPC_SC, WPC, WSAD, WAEP, MAEP
    
    WS= AEP_table.index
    WPC=AEP_table['WPC']
    MPC_SC=round(AEP_table['MPC_SC'],0)
    WSAD=AEP_table['WASD']
    WAEP=round(AEP_table['WAEP']/1000000, 3)
    MAEP=round(AEP_table['MAEP']/1000000, 3)
    

    row_headers=None
    column_headers=('Wind speed bin (m/s)', 'WPC (kW)', 'MPC_SC (kW)',
                    'Annual Wind Distribution (h/year)', 'WAEP (GWh)', 'MAEP (GWh)')

    rcolors = plt.cm.BuPu(np.full(len(row_headers), 0.1))
    ccolors = plt.cm.BuPu(np.full(len(column_headers), 0.1))

    l_2d=[list(WS), list(WPC), list(MPC), list(WSAD), list(WAEP), list(MAEP)]

    cell_text=np.array(l_2d).T

    fig, ax = plt.subplots(figsize=(12,5)) 
    ax.set_axis_off()

    the_table=ax.table(cellText=cell_text,
                      rowLabels=row_headers,
                      rowColours=rcolors,
                      rowLoc='right',
                      colColours=ccolors,
                      colLabels=column_headers,
                      cellLoc ='center',
                      loc='upper center',
                      colWidths = [0.1] * 5)

    ax.set_title('Annual Energy Production for Site Calibration PC', 
                 fontweight ="bold", fontsize=14) 

    the_table.auto_set_font_size(False)
    the_table.set_fontsize(14)
    the_table.scale(2, 2)
    
    return ('AEP table printed')

## Data uploading

In [13]:
def uploading_csv(file_folder,file_name):
    
    #file folder required
    #file name required
    #file is expected to be in the data root: r'C:\Users\irgaa\Irma\Data'
    #this function uploads and formats csv/txt/xlsx datasets into DataFrame
    
    
    data_root=r'C:\Users\irgaa\Irma\Data'
    data_folder=str(file_folder)
    data_file=str(file_name)
    
    data_path=data_root+data_folder+data_file
    
    data1 = pd.read_csv(data_path)

    
    # We will save the WD_bin as the index
    
    return data1

In [14]:
#this function saves a data csv

def save (data, file_folder,file_name):
    
    #file folder required
    #file name required
    #file is expected to be saved in the data root: r'C:\Users\irgaa\Irma\Data'
    #this function saves a csv/txt/xlsx into Irma's folder
    #the saved file will keep the columns names but not the index
    
    data_root=r'C:\Users\irgaa\Irma\Data'
    data_folder=str(file_folder)
    data_file=str(file_name)
    
    data_path=data_root+data_folder+data_file
    
    data.to_csv (data_path, index = False, header=True)
    
    
    return print('file', data_file, 'saved in', data_folder, 'folder')

## Data selection

In [15]:
def data_selection(X_train, X_test, inputs):
    
    #this function returns the columns of the training and test sets in the inputs list
    
    X_train1 = pd.DataFrame()
    X_test1 = pd.DataFrame()
    
    
    X_train1 = X_train[inputs]
    X_test1 = X_test[inputs]

    
    return X_train1,X_test1

In [16]:
def data_drop(X_train, X_test, list_2drop):
    
    #this function returns the columns of the training and test sets in the inputs list

    X_train1 = X_train.drop(columns=list_2drop)
    X_test1 = X_test.drop(columns=list_2drop)

    
    
    return X_train1,X_test1

## Subsets

In [17]:
def subset_selection (data, number):
    
    
    #this function returns a subset of sensors as dataframe
    #data is the original dataset with all sensors named: WS1, tod, WSVer, etc.
    #number is a value that indicates the desired subset of sensors
    #final is the dataframe returned by this function with the desired subset of sensors
    
    
    
    #first we identify the sensors one by one:
    WS1=pd.DataFrame(data['WS1'].copy())
    WS3=pd.DataFrame(data['WS3'].copy())
    WS4=pd.DataFrame(data['WS4'].copy())
    WD1=pd.DataFrame(data['WD1'].copy())
    WD3=pd.DataFrame(data['WD3'].copy())
    WD4=pd.DataFrame(data['WD4'].copy())
    tod=pd.DataFrame(data['tod'].copy())
    TI=pd.DataFrame(data['TI'].copy())
    WSH=pd.DataFrame(data['WSH'].copy())
    WVeer=pd.DataFrame(data['WVeer'].copy())
    RH1=pd.DataFrame(data['RH1'].copy())
    RH2=pd.DataFrame(data['RH2'].copy())
    PR1=pd.DataFrame(data['PR1'].copy())
    PR2=pd.DataFrame(data['PR2'].copy())
    T1=pd.DataFrame(data['T1'].copy())
    T2=pd.DataFrame(data['T2'].copy())
    AD1=pd.DataFrame(data['AD1'].copy())
    AD2=pd.DataFrame(data['AD2'].copy())
    WSHor=pd.DataFrame(data['WSHor'].copy())
    WSVer=pd.DataFrame(data['WSVer'].copy())
    WDHor=pd.DataFrame(data['WDHor'].copy())
    WDVer=pd.DataFrame(data['WDVer'].copy())
    Rain=pd.DataFrame(data['Rain'].copy())
    
    
    #second the sensors are identified:
    sensors1=pd.concat([WS1, tod, TI], axis=1)
    sensors2=pd.concat([WSHor, WSVer, WDHor, WDVer], axis=1)
    sensors3=pd.concat([WS4, WSH], axis=1)
    sensors4=WS3
    sensors5=WD4
    sensors6=pd.concat([WD1, WVeer], axis=1)
    sensors7=RH1
    sensors8=PR1
    sensors9=WD3
    sensors10=pd.concat([T1, AD1], axis=1)
    sensors11=T2
    sensors12=RH2
    sensors13=pd.concat([PR2, AD2], axis=1)
    sensors14=Rain
    
    
    #third, we identify the subsets
    subset1=sensors1
    subset2=pd.concat([subset1, sensors2], axis=1)
    subset3=pd.concat([subset2, sensors3], axis=1)
    subset4=pd.concat([subset3, sensors4], axis=1)
    subset5=pd.concat([subset4, sensors5], axis=1)
    subset6=pd.concat([subset5, sensors6], axis=1)
    subset7=pd.concat([subset6, sensors7], axis=1)
    subset8=pd.concat([subset7, sensors8], axis=1)
    subset9=pd.concat([subset8, sensors9], axis=1)
    subset10=pd.concat([subset9, sensors10], axis=1)
    subset11=pd.concat([subset10, sensors11], axis=1)
    subset12=pd.concat([subset11, sensors12], axis=1)
    subset13=pd.concat([subset12, sensors13], axis=1)
    subset14=pd.concat([subset13, sensors14], axis=1)
    
    #finally, the desired subset is returned
    if number==1:
        final=subset1
    elif number==2:
        final=subset2
    elif number==3:
        final=subset3
    elif number==4:
        final=subset4
    elif number==5:
        final=subset5
    elif number==6:
        final=subset6
    elif number==7:
        final=subset7
    elif number==8:
        final=subset8
    elif number==9:
        final=subset9
    elif number==10:
        final=subset10
    elif number==11:
        final=subset11
    elif number==12:
        final=subset12
    elif number==13:
        final=subset13
    else:
        final=subset14
        

    
    
    return final

## Modelling

### Modelling XGBoost

In [18]:
def modelling_XGBoost (X_train, X_test, y_train, y_test, power_curve,  parameters, plot_error, plot):
    
    #creating the model
    max_depth=parameters['max_depth']
    n_estimators=parameters['n_estimators']
    learning_rate=parameters['learning_rate']
    subsample=parameters['subsample']
    colsample_bytree=parameters['colsample_bytree']
    min_child_weight=parameters['min_child_weight']
    gamma=parameters['gamma']
    reg_lambda=parameters['reg_lambda']
    
 
    model = xgb.XGBRegressor(max_depth=max_depth, n_estimators=n_estimators, random_state=42,
                               objective='reg:squarederror', booster='gbtree', learning_rate=learning_rate,
                               subsample=subsample, colsample_bytree=colsample_bytree,
                             min_child_weight=min_child_weight, reg_lambda=reg_lambda)
                               
    #model fitting
    model.fit(X_train, y_train)
        
    
    #model predicting
    y_pred_test=model.predict(X_test)
    y_pred_train=model.predict(X_train)
    
    test=pd.DataFrame(y_pred_test, columns = ['test'])
    train=pd.DataFrame(y_pred_train, columns = ['train'])
    
    

    #computing the results
    data_test = pd.DataFrame()
    data_train = pd.DataFrame()
    
    data_test['WS_pred']=test['test']
    data_test['Target']=y_test['Target']
    data_train['WS_pred']=train['train']
    data_train['Target']=y_train['Target']
    
    
    data=compute_results(data_test, data_train, power_curve, plot_error)
    print('XGBoost modelling performed')
    
    return model

### Grid Search XGBoost

In [19]:
def GridSearch_XGBoost (X_train, X_test, y_train, y_test, power_curve, parameters, param_grid, plot_error):
    
    
    #counting the runing time
    start_time = time.time()
    
    #creating the model
    max_depth=parameters['max_depth']
    n_estimators=parameters['n_estimators']
    learning_rate=parameters['learning_rate']
    subsample=parameters['subsample']
    colsample_bytree=parameters['colsample_bytree']
    min_child_weight=parameters['min_child_weight']
    gamma=parameters['gamma']
    reg_lambda=parameters['reg_lambda']
    
    #creating the model
    xgbr = xgb.XGBRegressor(max_depth=max_depth, n_estimators=n_estimators, random_state=42,
                           objective='reg:squarederror', booster='gbtree', learning_rate=learning_rate,
                           subsample=subsample, colsample_bytree=colsample_bytree,
                         min_child_weight=min_child_weight, gamma=gamma, reg_lambda=reg_lambda)
    
    
    
    #Grid Search CV
    clf = GridSearchCV(estimator=xgbr, 
                             param_grid=param_grid,
                             cv=4,
                             scoring='neg_mean_squared_error',
                             verbose=1) 
    
        
    #model fitting
    clf.fit(X_train, y_train)
    
    
    #model predicting
    
    y_pred_test=clf.predict(X_test)
    y_pred_train=clf.predict(X_train)
    
    test=pd.DataFrame(y_pred_test, columns = ['test'])
    train=pd.DataFrame(y_pred_train, columns = ['train'])
    
    print('')
    print('Best parameters :')
    print(clf.best_params_)
    print('')

    #computing the results
    data_test = pd.DataFrame()
    data_train = pd.DataFrame()
    
    data_test['WS_pred']=test['test']
    data_test['Target']=y_test['Target']
    data_train['WS_pred']=train['train']
    data_train['Target']=y_train['Target']
    
    compute_results(data_test, data_train, power_curve, plot_error)
    print('GridSearch_ XGBoost performed')
    print("--- %s minutes ---" % ((time.time() - start_time)/60))
    print('')
    print('Lowest RMSE: ', (-clf.best_score_)**(1/2.0))
    
    
    
    return clf

### Model Testing

In [20]:
def model_testing (X_train, X_test, y_train, y_test, power_curve, model, plot_error):

    

    y_pred_test=model.predict(X_test)
    y_pred_train=model.predict(X_train)
    
    test=pd.DataFrame(y_pred_test, columns = ['test'])
    train=pd.DataFrame(y_pred_train, columns = ['train'])


    data_test = pd.DataFrame()
    data_train = pd.DataFrame()
    
    data_test['WS_pred']=test['test']
    data_test['Target']=y_test['Target']
    data_train['WS_pred']=train['train']
    data_train['Target']=y_train['Target']
        
    
    compute_results(data_test, data_train, power_curve, plot_error)
    
    WS_pred=data_test['WS_pred']
    print('XGBoost results performed')
    
    return WS_pred

# Data analysis

In [21]:
#WTG14

In [22]:
#individual

In [23]:
#upload the dataset with file_folder, file_name
# data_up= uploading_csv('\Dataset1-Normal_Site','\data_comp14.csv')
X_train= uploading_csv('\Dataset1-Normal_Site','\X_train14.csv')
X_test= uploading_csv('\Dataset1-Normal_Site','\X_test14.csv')
y_train= uploading_csv('\Dataset1-Normal_Site','\y_train14.csv')
y_test= uploading_csv('\Dataset1-Normal_Site','\y_test14.csv')

X_test.keys()

Index(['T2', 'RH2', 'T1', 'RH1', 'PR1', 'AD1', 'PR2', 'AD2', 'Rain', 'WS1',
       'WS3', 'WS4', 'WD1', 'WD3', 'WD4', 'WSHor', 'WDHor', 'WSVer', 'WDVer',
       'TI', 'WSH', 'WD_bin', 'tod', 'WVeer'],
      dtype='object')

In [24]:
PC= uploading_csv('\Dataset1-Normal_Site','\PC_1.15kgm-3.csv')

## Subset7

In [25]:
X_train_subset=subset_selection(X_train, 7)
X_test_subset=subset_selection(X_test, 7)

In [26]:
X_train_subset

Unnamed: 0,WS1,tod,TI,WSHor,WSVer,WDHor,WDVer,WS4,WSH,WS3,WD4,WD1,WVeer,RH1
0,0.020975,1.000000,0.207488,0.029529,0.459364,0.053589,0.429742,0.100394,0.439725,0.066975,0.586977,0.057697,0.094759,0.611881
1,0.065031,0.041958,0.124319,0.068877,0.321555,0.059900,0.275543,0.163620,0.345484,0.124902,0.158576,0.063196,0.607850,0.842205
2,0.212434,0.006993,0.377173,0.216979,0.344857,0.909151,0.346802,0.292307,0.340846,0.281079,0.631750,0.910616,0.678993,0.768159
3,0.871467,0.293706,0.202202,0.874755,0.500812,0.706084,0.448942,0.871817,0.329727,0.871425,0.683743,0.711110,0.468120,0.655972
4,0.324850,0.762238,0.344330,0.332251,0.495464,0.804627,0.465200,0.470751,0.195611,0.397760,0.750985,0.815900,0.466545,0.446954
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3041,0.534532,0.419580,0.371217,0.544557,0.352688,0.702762,0.403211,0.695452,0.173290,0.616674,0.651365,0.712085,0.507316,0.722528
3042,0.357270,0.874126,0.231236,0.368119,0.350110,0.402231,0.362418,0.396343,0.379675,0.366648,0.448505,0.408939,0.521778,0.823567
3043,0.431474,0.867133,0.232592,0.442114,0.389075,0.667303,0.398147,0.454500,0.385883,0.450032,0.603778,0.676574,0.537315,0.911596
3044,0.644727,0.993007,0.095348,0.641298,0.680546,0.961800,0.525524,0.561681,0.483026,0.641783,0.673569,0.964112,0.669288,0.447414


### Grid Search

In [28]:
parameters={
    'max_depth':5,
    'n_estimators': 1000,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':1,
    'gamma':0,
    'reg_lambda':1,
}

In [29]:
param_grid={
    'n_estimators': [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000],
}

In [30]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 10 candidates, totalling 40 fits

Best parameters :
{'n_estimators': 400}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.098 m/s as root mean
Wind MAE:  0.075 m/s in avg
Wind MAPE:  0.875 %
Power RMSE:  47.169 kW as root mean
Power MAE:  29.299 kW in avg
Power MAPE:  1.91 %

Modelling errors for test set:
Wind RMSE:  0.35 m/s as root mean
Wind MAE:  0.264 m/s in avg
Wind MAPE:  3.15 %
Power RMSE:  162.512 kW as root mean
Power MAE:  102.3 kW in avg
Power MAPE:  7.274 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 3.2671067118644714 minutes ---

Lowest RMSE:  0.3446559058815519


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=1,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=1000, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,

In [31]:
parameters={
    'max_depth':5,
    'n_estimators': 400,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':1,
    'gamma':0,
    'reg_lambda':1,
}

In [32]:
param_grid={
    'max_depth': [3, 4, 5, 6, 10, 15, 20],
    'min_child_weight': [1, 2, 3, 6, 10]
}

In [33]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 35 candidates, totalling 140 fits

Best parameters :
{'max_depth': 5, 'min_child_weight': 2}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.1 m/s as root mean
Wind MAE:  0.077 m/s in avg
Wind MAPE:  0.902 %
Power RMSE:  48.627 kW as root mean
Power MAE:  30.282 kW in avg
Power MAPE:  1.987 %

Modelling errors for test set:
Wind RMSE:  0.348 m/s as root mean
Wind MAE:  0.262 m/s in avg
Wind MAPE:  3.113 %
Power RMSE:  162.219 kW as root mean
Power MAE:  101.876 kW in avg
Power MAPE:  7.171 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 7.637895647684733 minutes ---

Lowest RMSE:  0.3443680728144567


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=1,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=400, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,


In [34]:
parameters={
    'max_depth':5,
    'n_estimators': 400,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':2,
    'gamma':0,
    'reg_lambda':1,
}

In [35]:
param_grid={
    'gamma': [0, 0.1, 0.2, 0.3, 0.4, 0.5]
}

In [36]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 6 candidates, totalling 24 fits

Best parameters :
{'gamma': 0}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.1 m/s as root mean
Wind MAE:  0.077 m/s in avg
Wind MAPE:  0.902 %
Power RMSE:  48.627 kW as root mean
Power MAE:  30.282 kW in avg
Power MAPE:  1.987 %

Modelling errors for test set:
Wind RMSE:  0.348 m/s as root mean
Wind MAE:  0.262 m/s in avg
Wind MAPE:  3.113 %
Power RMSE:  162.219 kW as root mean
Power MAE:  101.876 kW in avg
Power MAPE:  7.171 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 0.7877987702687581 minutes ---

Lowest RMSE:  0.3443680728144567


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=2,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=400, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,


In [37]:
parameters={
    'max_depth':5,
    'n_estimators': 400,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':2,
    'gamma':0,
    'reg_lambda':1,
}

In [38]:
param_grid={
    'subsample': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1],
    'colsample_bytree': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
}

In [39]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 49 candidates, totalling 196 fits

Best parameters :
{'colsample_bytree': 0.8, 'subsample': 0.6}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.102 m/s as root mean
Wind MAE:  0.079 m/s in avg
Wind MAPE:  0.921 %
Power RMSE:  48.976 kW as root mean
Power MAE:  30.66 kW in avg
Power MAPE:  2.022 %

Modelling errors for test set:
Wind RMSE:  0.348 m/s as root mean
Wind MAE:  0.263 m/s in avg
Wind MAPE:  3.142 %
Power RMSE:  162.909 kW as root mean
Power MAE:  102.013 kW in avg
Power MAPE:  7.29 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 6.360441390673319 minutes ---

Lowest RMSE:  0.3426059658436299


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=2,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=400, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,


In [40]:
parameters={
    'max_depth':5,
    'n_estimators': 400,
    'learning_rate':0.1,
    'subsample':0.6,
    'colsample_bytree':0.8,
    'min_child_weight':2,
    'gamma':0,
    'reg_lambda':1,
}

In [41]:
param_grid={
    'reg_lambda': [1, 10, 50, 100, 150]
}

In [42]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 5 candidates, totalling 20 fits

Best parameters :
{'reg_lambda': 10}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.132 m/s as root mean
Wind MAE:  0.099 m/s in avg
Wind MAPE:  1.145 %
Power RMSE:  61.891 kW as root mean
Power MAE:  37.695 kW in avg
Power MAPE:  2.502 %

Modelling errors for test set:
Wind RMSE:  0.348 m/s as root mean
Wind MAE:  0.265 m/s in avg
Wind MAPE:  3.142 %
Power RMSE:  162.458 kW as root mean
Power MAE:  101.556 kW in avg
Power MAPE:  7.216 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 0.6700107256571451 minutes ---

Lowest RMSE:  0.34203327444219167


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=2,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=400, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.6,
                                    tree_method=None, validate_parameters=None,


In [43]:
parameters={
    'max_depth':5,
    'n_estimators': 400,
    'learning_rate':0.1,
    'subsample':0.6,
    'colsample_bytree':0.8,
    'min_child_weight':2,
    'gamma':0,
    'reg_lambda':10,
}

In [44]:
param_grid={
    'learning_rate': [0.001, 0.01, 0.1, 0.5],
    'n_estimators': [800, 900, 1000, 1200, 1500, 1800, 2000]
}

In [45]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 28 candidates, totalling 112 fits

Best parameters :
{'learning_rate': 0.01, 'n_estimators': 2000}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.195 m/s as root mean
Wind MAE:  0.148 m/s in avg
Wind MAPE:  1.724 %
Power RMSE:  90.978 kW as root mean
Power MAE:  56.099 kW in avg
Power MAPE:  3.835 %

Modelling errors for test set:
Wind RMSE:  0.338 m/s as root mean
Wind MAE:  0.258 m/s in avg
Wind MAPE:  3.073 %
Power RMSE:  157.095 kW as root mean
Power MAE:  99.603 kW in avg
Power MAPE:  7.108 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 13.324270987510682 minutes ---

Lowest RMSE:  0.3372111243272419


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=2,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=400, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=10,
                                    scale_pos_weight=None, subsample=0.6,
                                    tree_method=None, validate_parameters=None,

### Testing

In [46]:
parameters={
    'max_depth':5,
    'n_estimators': 2000,
    'learning_rate':0.01,
    'subsample':0.6,
    'colsample_bytree':0.8,
    'min_child_weight':2,
    'gamma':0,
    'reg_lambda':10,
}

In [47]:
#modelling

In [48]:
model = modelling_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, plot_error=False, plot=True)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.195 m/s as root mean
Wind MAE:  0.148 m/s in avg
Wind MAPE:  1.724 %
Power RMSE:  90.978 kW as root mean
Power MAE:  56.099 kW in avg
Power MAPE:  3.835 %

Modelling errors for test set:
Wind RMSE:  0.338 m/s as root mean
Wind MAE:  0.258 m/s in avg
Wind MAPE:  3.073 %
Power RMSE:  157.095 kW as root mean
Power MAE:  99.603 kW in avg
Power MAPE:  7.108 %


Showing the results of the modelling: 
XGBoost modelling performed


In [49]:
#testing

In [50]:
WS_pred=model_testing (X_train_subset, X_test_subset, y_train, y_test, PC, model, plot_error=False)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.195 m/s as root mean
Wind MAE:  0.148 m/s in avg
Wind MAPE:  1.724 %
Power RMSE:  90.978 kW as root mean
Power MAE:  56.099 kW in avg
Power MAPE:  3.835 %

Modelling errors for test set:
Wind RMSE:  0.338 m/s as root mean
Wind MAE:  0.258 m/s in avg
Wind MAPE:  3.073 %
Power RMSE:  157.095 kW as root mean
Power MAE:  99.603 kW in avg
Power MAPE:  7.108 %


Showing the results of the modelling: 
XGBoost results performed


In [51]:
WS_pred=pd.DataFrame(WS_pred)
save(WS_pred,'\Results_','Subset7_XGB_WTG14.csv')

file Subset7_XGB_WTG14.csv saved in \Results_ folder


## Subset8

In [52]:
X_train_subset=subset_selection(X_train, 8)
X_test_subset=subset_selection(X_test, 8)

In [53]:
X_train_subset

Unnamed: 0,WS1,tod,TI,WSHor,WSVer,WDHor,WDVer,WS4,WSH,WS3,WD4,WD1,WVeer,RH1,PR1
0,0.020975,1.000000,0.207488,0.029529,0.459364,0.053589,0.429742,0.100394,0.439725,0.066975,0.586977,0.057697,0.094759,0.611881,0.249594
1,0.065031,0.041958,0.124319,0.068877,0.321555,0.059900,0.275543,0.163620,0.345484,0.124902,0.158576,0.063196,0.607850,0.842205,0.593645
2,0.212434,0.006993,0.377173,0.216979,0.344857,0.909151,0.346802,0.292307,0.340846,0.281079,0.631750,0.910616,0.678993,0.768159,0.732628
3,0.871467,0.293706,0.202202,0.874755,0.500812,0.706084,0.448942,0.871817,0.329727,0.871425,0.683743,0.711110,0.468120,0.655972,0.889374
4,0.324850,0.762238,0.344330,0.332251,0.495464,0.804627,0.465200,0.470751,0.195611,0.397760,0.750985,0.815900,0.466545,0.446954,0.745891
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3041,0.534532,0.419580,0.371217,0.544557,0.352688,0.702762,0.403211,0.695452,0.173290,0.616674,0.651365,0.712085,0.507316,0.722528,0.769005
3042,0.357270,0.874126,0.231236,0.368119,0.350110,0.402231,0.362418,0.396343,0.379675,0.366648,0.448505,0.408939,0.521778,0.823567,0.651807
3043,0.431474,0.867133,0.232592,0.442114,0.389075,0.667303,0.398147,0.454500,0.385883,0.450032,0.603778,0.676574,0.537315,0.911596,0.554040
3044,0.644727,0.993007,0.095348,0.641298,0.680546,0.961800,0.525524,0.561681,0.483026,0.641783,0.673569,0.964112,0.669288,0.447414,0.697590


### Grid Search

In [54]:
parameters={
    'max_depth':5,
    'n_estimators': 1000,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':1,
    'gamma':0,
    'reg_lambda':1,
}

In [55]:
param_grid={
    'n_estimators': [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000],
}

In [56]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 10 candidates, totalling 40 fits

Best parameters :
{'n_estimators': 1000}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.023 m/s as root mean
Wind MAE:  0.018 m/s in avg
Wind MAPE:  0.206 %
Power RMSE:  11.514 kW as root mean
Power MAE:  7.083 kW in avg
Power MAPE:  0.455 %

Modelling errors for test set:
Wind RMSE:  0.344 m/s as root mean
Wind MAE:  0.26 m/s in avg
Wind MAPE:  3.106 %
Power RMSE:  159.328 kW as root mean
Power MAE:  99.775 kW in avg
Power MAPE:  7.156 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 3.372089421749115 minutes ---

Lowest RMSE:  0.3441415086585952


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=1,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=1000, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,

In [57]:
parameters={
    'max_depth':5,
    'n_estimators': 1000,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':1,
    'gamma':0,
    'reg_lambda':1,
}

In [58]:
param_grid={
    'max_depth': [3, 4, 5, 6, 10, 15, 20],
    'min_child_weight': [1, 2, 3, 6, 10]
}

In [59]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 35 candidates, totalling 140 fits

Best parameters :
{'max_depth': 5, 'min_child_weight': 6}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.031 m/s as root mean
Wind MAE:  0.023 m/s in avg
Wind MAPE:  0.268 %
Power RMSE:  15.34 kW as root mean
Power MAE:  9.129 kW in avg
Power MAPE:  0.591 %

Modelling errors for test set:
Wind RMSE:  0.343 m/s as root mean
Wind MAE:  0.259 m/s in avg
Wind MAPE:  3.092 %
Power RMSE:  161.093 kW as root mean
Power MAE:  100.252 kW in avg
Power MAPE:  7.128 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 21.171679858366648 minutes ---

Lowest RMSE:  0.337516665578437


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=1,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=1000, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,

In [60]:
parameters={
    'max_depth':5,
    'n_estimators': 1000,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':6,
    'gamma':0,
    'reg_lambda':1,
}

In [61]:
param_grid={
    'gamma': [0, 0.1, 0.2, 0.3, 0.4, 0.5]
}

In [62]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 6 candidates, totalling 24 fits

Best parameters :
{'gamma': 0}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.031 m/s as root mean
Wind MAE:  0.023 m/s in avg
Wind MAPE:  0.268 %
Power RMSE:  15.34 kW as root mean
Power MAE:  9.129 kW in avg
Power MAPE:  0.591 %

Modelling errors for test set:
Wind RMSE:  0.343 m/s as root mean
Wind MAE:  0.259 m/s in avg
Wind MAPE:  3.092 %
Power RMSE:  161.093 kW as root mean
Power MAE:  100.252 kW in avg
Power MAPE:  7.128 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 2.258437772591909 minutes ---

Lowest RMSE:  0.337516665578437


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=6,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=1000, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,

In [63]:
parameters={
    'max_depth':5,
    'n_estimators': 1000,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':6,
    'gamma':0,
    'reg_lambda':1,
}

In [64]:
param_grid={
    'subsample': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1],
    'colsample_bytree': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
}

In [65]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 49 candidates, totalling 196 fits

Best parameters :
{'colsample_bytree': 0.8, 'subsample': 0.8}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.031 m/s as root mean
Wind MAE:  0.023 m/s in avg
Wind MAPE:  0.268 %
Power RMSE:  15.34 kW as root mean
Power MAE:  9.129 kW in avg
Power MAPE:  0.591 %

Modelling errors for test set:
Wind RMSE:  0.343 m/s as root mean
Wind MAE:  0.259 m/s in avg
Wind MAPE:  3.092 %
Power RMSE:  161.093 kW as root mean
Power MAE:  100.252 kW in avg
Power MAPE:  7.128 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 16.678395195802054 minutes ---

Lowest RMSE:  0.337516665578437


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=6,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=1000, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,

In [66]:
parameters={
    'max_depth':5,
    'n_estimators': 1000,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':6,
    'gamma':0,
    'reg_lambda':1,
}

In [67]:
param_grid={
    'reg_lambda': [1, 10, 50, 100, 150]
}

In [68]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 5 candidates, totalling 20 fits

Best parameters :
{'reg_lambda': 10}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.045 m/s as root mean
Wind MAE:  0.033 m/s in avg
Wind MAPE:  0.39 %
Power RMSE:  22.369 kW as root mean
Power MAE:  13.269 kW in avg
Power MAPE:  0.863 %

Modelling errors for test set:
Wind RMSE:  0.335 m/s as root mean
Wind MAE:  0.254 m/s in avg
Wind MAPE:  3.035 %
Power RMSE:  157.613 kW as root mean
Power MAE:  98.641 kW in avg
Power MAPE:  7.04 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 1.7671287655830383 minutes ---

Lowest RMSE:  0.33495399845576007


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=6,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=1000, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,

In [69]:
parameters={
    'max_depth':5,
    'n_estimators': 1000,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':6,
    'gamma':0,
    'reg_lambda':10,
}

In [70]:
param_grid={
    'learning_rate': [0.001, 0.01, 0.1, 0.5],
    'n_estimators': [800, 900, 1000, 1200, 1500, 1800, 2000]
}

In [71]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 28 candidates, totalling 112 fits

Best parameters :
{'learning_rate': 0.1, 'n_estimators': 800}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.063 m/s as root mean
Wind MAE:  0.047 m/s in avg
Wind MAPE:  0.541 %
Power RMSE:  30.574 kW as root mean
Power MAE:  18.192 kW in avg
Power MAPE:  1.188 %

Modelling errors for test set:
Wind RMSE:  0.335 m/s as root mean
Wind MAE:  0.254 m/s in avg
Wind MAPE:  3.031 %
Power RMSE:  157.232 kW as root mean
Power MAE:  98.488 kW in avg
Power MAPE:  7.023 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 10.517322452863057 minutes ---

Lowest RMSE:  0.33482763979638985


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=6,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=1000, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=10,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None

### Testing

In [72]:
parameters={
    'max_depth':5,
    'n_estimators': 800,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':6,
    'gamma':0,
    'reg_lambda':10,
}

In [73]:
#modelling

In [74]:
model = modelling_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, plot_error=False, plot=True)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.063 m/s as root mean
Wind MAE:  0.047 m/s in avg
Wind MAPE:  0.541 %
Power RMSE:  30.574 kW as root mean
Power MAE:  18.192 kW in avg
Power MAPE:  1.188 %

Modelling errors for test set:
Wind RMSE:  0.335 m/s as root mean
Wind MAE:  0.254 m/s in avg
Wind MAPE:  3.031 %
Power RMSE:  157.232 kW as root mean
Power MAE:  98.488 kW in avg
Power MAPE:  7.023 %


Showing the results of the modelling: 
XGBoost modelling performed


In [75]:
#testing

In [76]:
WS_pred=model_testing (X_train_subset, X_test_subset, y_train, y_test, PC, model, plot_error=False)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.063 m/s as root mean
Wind MAE:  0.047 m/s in avg
Wind MAPE:  0.541 %
Power RMSE:  30.574 kW as root mean
Power MAE:  18.192 kW in avg
Power MAPE:  1.188 %

Modelling errors for test set:
Wind RMSE:  0.335 m/s as root mean
Wind MAE:  0.254 m/s in avg
Wind MAPE:  3.031 %
Power RMSE:  157.232 kW as root mean
Power MAE:  98.488 kW in avg
Power MAPE:  7.023 %


Showing the results of the modelling: 
XGBoost results performed


In [77]:
WS_pred=pd.DataFrame(WS_pred)
save(WS_pred,'\Results_','Subset8_XGB_WTG14.csv')

file Subset8_XGB_WTG14.csv saved in \Results_ folder


## Subset9

In [78]:
X_train_subset=subset_selection(X_train, 9)
X_test_subset=subset_selection(X_test, 9)

In [79]:
X_train_subset

Unnamed: 0,WS1,tod,TI,WSHor,WSVer,WDHor,WDVer,WS4,WSH,WS3,WD4,WD1,WVeer,RH1,PR1,WD3
0,0.020975,1.000000,0.207488,0.029529,0.459364,0.053589,0.429742,0.100394,0.439725,0.066975,0.586977,0.057697,0.094759,0.611881,0.249594,0.270001
1,0.065031,0.041958,0.124319,0.068877,0.321555,0.059900,0.275543,0.163620,0.345484,0.124902,0.158576,0.063196,0.607850,0.842205,0.593645,0.129344
2,0.212434,0.006993,0.377173,0.216979,0.344857,0.909151,0.346802,0.292307,0.340846,0.281079,0.631750,0.910616,0.678993,0.768159,0.732628,0.785221
3,0.871467,0.293706,0.202202,0.874755,0.500812,0.706084,0.448942,0.871817,0.329727,0.871425,0.683743,0.711110,0.468120,0.655972,0.889374,0.717605
4,0.324850,0.762238,0.344330,0.332251,0.495464,0.804627,0.465200,0.470751,0.195611,0.397760,0.750985,0.815900,0.466545,0.446954,0.745891,0.819320
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3041,0.534532,0.419580,0.371217,0.544557,0.352688,0.702762,0.403211,0.695452,0.173290,0.616674,0.651365,0.712085,0.507316,0.722528,0.769005,0.708987
3042,0.357270,0.874126,0.231236,0.368119,0.350110,0.402231,0.362418,0.396343,0.379675,0.366648,0.448505,0.408939,0.521778,0.823567,0.651807,0.419980
3043,0.431474,0.867133,0.232592,0.442114,0.389075,0.667303,0.398147,0.454500,0.385883,0.450032,0.603778,0.676574,0.537315,0.911596,0.554040,0.655095
3044,0.644727,0.993007,0.095348,0.641298,0.680546,0.961800,0.525524,0.561681,0.483026,0.641783,0.673569,0.964112,0.669288,0.447414,0.697590,0.844445


### Grid Search

In [80]:
parameters={
    'max_depth':5,
    'n_estimators': 1000,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':1,
    'gamma':0,
    'reg_lambda':1,
}

In [81]:
param_grid={
    'n_estimators': [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000],
}

In [82]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 10 candidates, totalling 40 fits

Best parameters :
{'n_estimators': 600}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.055 m/s as root mean
Wind MAE:  0.042 m/s in avg
Wind MAPE:  0.486 %
Power RMSE:  26.873 kW as root mean
Power MAE:  16.537 kW in avg
Power MAPE:  1.062 %

Modelling errors for test set:
Wind RMSE:  0.342 m/s as root mean
Wind MAE:  0.259 m/s in avg
Wind MAPE:  3.077 %
Power RMSE:  159.078 kW as root mean
Power MAE:  99.493 kW in avg
Power MAPE:  7.068 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 2.174764668941498 minutes ---

Lowest RMSE:  0.34152102366290754


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=1,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=1000, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,

In [84]:
parameters={
    'max_depth':5,
    'n_estimators': 600,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':1,
    'gamma':0,
    'reg_lambda':1,
}

In [85]:
param_grid={
    'max_depth': [3, 4, 5, 6, 10, 15, 20],
    'min_child_weight': [1, 2, 3, 6, 10]
}

In [86]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 35 candidates, totalling 140 fits

Best parameters :
{'max_depth': 5, 'min_child_weight': 6}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.07 m/s as root mean
Wind MAE:  0.052 m/s in avg
Wind MAPE:  0.61 %
Power RMSE:  34.166 kW as root mean
Power MAE:  20.673 kW in avg
Power MAPE:  1.336 %

Modelling errors for test set:
Wind RMSE:  0.342 m/s as root mean
Wind MAE:  0.258 m/s in avg
Wind MAPE:  3.074 %
Power RMSE:  158.447 kW as root mean
Power MAE:  98.781 kW in avg
Power MAPE:  7.077 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 10.72613468170166 minutes ---

Lowest RMSE:  0.3385502000295546


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=1,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=600, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,


In [87]:
parameters={
    'max_depth':5,
    'n_estimators': 600,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':6,
    'gamma':0,
    'reg_lambda':1,
}

In [88]:
param_grid={
    'gamma': [0, 0.1, 0.2, 0.3, 0.4, 0.5]
}

In [89]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 6 candidates, totalling 24 fits

Best parameters :
{'gamma': 0}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.07 m/s as root mean
Wind MAE:  0.052 m/s in avg
Wind MAPE:  0.61 %
Power RMSE:  34.166 kW as root mean
Power MAE:  20.673 kW in avg
Power MAPE:  1.336 %

Modelling errors for test set:
Wind RMSE:  0.342 m/s as root mean
Wind MAE:  0.258 m/s in avg
Wind MAPE:  3.074 %
Power RMSE:  158.447 kW as root mean
Power MAE:  98.781 kW in avg
Power MAPE:  7.077 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 1.2886651118596395 minutes ---

Lowest RMSE:  0.3385502000295546


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=6,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=600, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,


In [90]:
parameters={
    'max_depth':5,
    'n_estimators': 600,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':6,
    'gamma':0,
    'reg_lambda':1,
}

In [91]:
param_grid={
    'subsample': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1],
    'colsample_bytree': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
}

In [92]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 49 candidates, totalling 196 fits

Best parameters :
{'colsample_bytree': 0.8, 'subsample': 0.8}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.07 m/s as root mean
Wind MAE:  0.052 m/s in avg
Wind MAPE:  0.61 %
Power RMSE:  34.166 kW as root mean
Power MAE:  20.673 kW in avg
Power MAPE:  1.336 %

Modelling errors for test set:
Wind RMSE:  0.342 m/s as root mean
Wind MAE:  0.258 m/s in avg
Wind MAPE:  3.074 %
Power RMSE:  158.447 kW as root mean
Power MAE:  98.781 kW in avg
Power MAPE:  7.077 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 9.596929089228313 minutes ---

Lowest RMSE:  0.3385502000295546


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=6,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=600, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,


In [93]:
parameters={
    'max_depth':5,
    'n_estimators': 600,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':6,
    'gamma':0,
    'reg_lambda':1,
}

In [94]:
param_grid={
    'reg_lambda': [1, 10, 50, 100, 150]
}

In [95]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 5 candidates, totalling 20 fits

Best parameters :
{'reg_lambda': 1}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.07 m/s as root mean
Wind MAE:  0.052 m/s in avg
Wind MAPE:  0.61 %
Power RMSE:  34.166 kW as root mean
Power MAE:  20.673 kW in avg
Power MAPE:  1.336 %

Modelling errors for test set:
Wind RMSE:  0.342 m/s as root mean
Wind MAE:  0.258 m/s in avg
Wind MAPE:  3.074 %
Power RMSE:  158.447 kW as root mean
Power MAE:  98.781 kW in avg
Power MAPE:  7.077 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 1.0269015192985536 minutes ---

Lowest RMSE:  0.3385502000295546


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=6,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=600, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,


In [96]:
parameters={
    'max_depth':5,
    'n_estimators': 600,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':6,
    'gamma':0,
    'reg_lambda':1,
}

In [97]:
param_grid={
    'learning_rate': [0.001, 0.01, 0.1, 0.5],
    'n_estimators': [800, 900, 1000, 1200, 1500, 1800, 2000]
}

In [98]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 28 candidates, totalling 112 fits

Best parameters :
{'learning_rate': 0.01, 'n_estimators': 2000}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.166 m/s as root mean
Wind MAE:  0.127 m/s in avg
Wind MAPE:  1.478 %
Power RMSE:  78.763 kW as root mean
Power MAE:  48.826 kW in avg
Power MAPE:  3.25 %

Modelling errors for test set:
Wind RMSE:  0.335 m/s as root mean
Wind MAE:  0.253 m/s in avg
Wind MAPE:  3.013 %
Power RMSE:  157.16 kW as root mean
Power MAE:  98.7 kW in avg
Power MAPE:  6.97 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 13.171715541680653 minutes ---

Lowest RMSE:  0.3384472594387375


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=6,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=600, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,


### Testing

In [99]:
parameters={
    'max_depth':5,
    'n_estimators': 2000,
    'learning_rate':0.01,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':6,
    'gamma':0,
    'reg_lambda':1,
}

In [100]:
#modelling

In [101]:
model = modelling_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, plot_error=False, plot=True)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.166 m/s as root mean
Wind MAE:  0.127 m/s in avg
Wind MAPE:  1.478 %
Power RMSE:  78.763 kW as root mean
Power MAE:  48.826 kW in avg
Power MAPE:  3.25 %

Modelling errors for test set:
Wind RMSE:  0.335 m/s as root mean
Wind MAE:  0.253 m/s in avg
Wind MAPE:  3.013 %
Power RMSE:  157.16 kW as root mean
Power MAE:  98.7 kW in avg
Power MAPE:  6.97 %


Showing the results of the modelling: 
XGBoost modelling performed


In [102]:
#testing

In [103]:
WS_pred=model_testing (X_train_subset, X_test_subset, y_train, y_test, PC, model, plot_error=False)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.166 m/s as root mean
Wind MAE:  0.127 m/s in avg
Wind MAPE:  1.478 %
Power RMSE:  78.763 kW as root mean
Power MAE:  48.826 kW in avg
Power MAPE:  3.25 %

Modelling errors for test set:
Wind RMSE:  0.335 m/s as root mean
Wind MAE:  0.253 m/s in avg
Wind MAPE:  3.013 %
Power RMSE:  157.16 kW as root mean
Power MAE:  98.7 kW in avg
Power MAPE:  6.97 %


Showing the results of the modelling: 
XGBoost results performed


In [104]:
WS_pred=pd.DataFrame(WS_pred)
save(WS_pred,'\Results_','Subset9_XGB_WTG14.csv')

file Subset9_XGB_WTG14.csv saved in \Results_ folder


## Subset10

In [105]:
X_train_subset=subset_selection(X_train, 10)
X_test_subset=subset_selection(X_test, 10)

In [106]:
X_train_subset

Unnamed: 0,WS1,tod,TI,WSHor,WSVer,WDHor,WDVer,WS4,WSH,WS3,WD4,WD1,WVeer,RH1,PR1,WD3,T1,AD1
0,0.020975,1.000000,0.207488,0.029529,0.459364,0.053589,0.429742,0.100394,0.439725,0.066975,0.586977,0.057697,0.094759,0.611881,0.249594,0.270001,0.509751,0.374190
1,0.065031,0.041958,0.124319,0.068877,0.321555,0.059900,0.275543,0.163620,0.345484,0.124902,0.158576,0.063196,0.607850,0.842205,0.593645,0.129344,0.285279,0.658617
2,0.212434,0.006993,0.377173,0.216979,0.344857,0.909151,0.346802,0.292307,0.340846,0.281079,0.631750,0.910616,0.678993,0.768159,0.732628,0.785221,0.104197,0.866180
3,0.871467,0.293706,0.202202,0.874755,0.500812,0.706084,0.448942,0.871817,0.329727,0.871425,0.683743,0.711110,0.468120,0.655972,0.889374,0.717605,0.158289,0.847260
4,0.324850,0.762238,0.344330,0.332251,0.495464,0.804627,0.465200,0.470751,0.195611,0.397760,0.750985,0.815900,0.466545,0.446954,0.745891,0.819320,0.558715,0.435372
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3041,0.534532,0.419580,0.371217,0.544557,0.352688,0.702762,0.403211,0.695452,0.173290,0.616674,0.651365,0.712085,0.507316,0.722528,0.769005,0.708987,0.347711,0.637177
3042,0.357270,0.874126,0.231236,0.368119,0.350110,0.402231,0.362418,0.396343,0.379675,0.366648,0.448505,0.408939,0.521778,0.823567,0.651807,0.419980,0.301997,0.655292
3043,0.431474,0.867133,0.232592,0.442114,0.389075,0.667303,0.398147,0.454500,0.385883,0.450032,0.603778,0.676574,0.537315,0.911596,0.554040,0.655095,0.223007,0.709991
3044,0.644727,0.993007,0.095348,0.641298,0.680546,0.961800,0.525524,0.561681,0.483026,0.641783,0.673569,0.964112,0.669288,0.447414,0.697590,0.844445,0.498942,0.480005


### Grid Search

In [107]:
parameters={
    'max_depth':5,
    'n_estimators': 1000,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':1,
    'gamma':0,
    'reg_lambda':1,
}

In [108]:
param_grid={
    'n_estimators': [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000],
}

In [109]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 10 candidates, totalling 40 fits

Best parameters :
{'n_estimators': 1000}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.02 m/s as root mean
Wind MAE:  0.015 m/s in avg
Wind MAPE:  0.174 %
Power RMSE:  9.838 kW as root mean
Power MAE:  6.044 kW in avg
Power MAPE:  0.383 %

Modelling errors for test set:
Wind RMSE:  0.338 m/s as root mean
Wind MAE:  0.255 m/s in avg
Wind MAPE:  3.048 %
Power RMSE:  158.891 kW as root mean
Power MAE:  98.782 kW in avg
Power MAPE:  7.083 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 2.3419595917065936 minutes ---

Lowest RMSE:  0.33770721529657427


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=1,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=1000, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,

In [110]:
parameters={
    'max_depth':5,
    'n_estimators': 1000,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':1,
    'gamma':0,
    'reg_lambda':1,
}

In [111]:
param_grid={
    'max_depth': [3, 4, 5, 6, 10, 15, 20],
    'min_child_weight': [1, 2, 3, 6, 10]
}

In [112]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 35 candidates, totalling 140 fits

Best parameters :
{'max_depth': 6, 'min_child_weight': 10}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.016 m/s as root mean
Wind MAE:  0.012 m/s in avg
Wind MAPE:  0.134 %
Power RMSE:  7.935 kW as root mean
Power MAE:  4.646 kW in avg
Power MAPE:  0.295 %

Modelling errors for test set:
Wind RMSE:  0.341 m/s as root mean
Wind MAE:  0.256 m/s in avg
Wind MAPE:  3.061 %
Power RMSE:  159.408 kW as root mean
Power MAE:  98.727 kW in avg
Power MAPE:  7.103 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 15.872538503011068 minutes ---

Lowest RMSE:  0.33503710347006027


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=1,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=1000, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,

In [113]:
parameters={
    'max_depth':6,
    'n_estimators': 1000,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':10,
    'gamma':0,
    'reg_lambda':1,
}

In [114]:
param_grid={
    'gamma': [0, 0.1, 0.2, 0.3, 0.4, 0.5]
}

In [115]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 6 candidates, totalling 24 fits

Best parameters :
{'gamma': 0}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.016 m/s as root mean
Wind MAE:  0.012 m/s in avg
Wind MAPE:  0.134 %
Power RMSE:  7.935 kW as root mean
Power MAE:  4.646 kW in avg
Power MAPE:  0.295 %

Modelling errors for test set:
Wind RMSE:  0.341 m/s as root mean
Wind MAE:  0.256 m/s in avg
Wind MAPE:  3.061 %
Power RMSE:  159.408 kW as root mean
Power MAE:  98.727 kW in avg
Power MAPE:  7.103 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 2.5897698203722634 minutes ---

Lowest RMSE:  0.33503710347006027


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=6, min_child_weight=10,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=1000, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None

In [116]:
parameters={
    'max_depth':6,
    'n_estimators': 1000,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':10,
    'gamma':0,
    'reg_lambda':1,
}

In [117]:
param_grid={
    'subsample': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1],
    'colsample_bytree': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
}

In [118]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 49 candidates, totalling 196 fits

Best parameters :
{'colsample_bytree': 0.7, 'subsample': 0.7}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.017 m/s as root mean
Wind MAE:  0.012 m/s in avg
Wind MAPE:  0.145 %
Power RMSE:  8.226 kW as root mean
Power MAE:  4.933 kW in avg
Power MAPE:  0.316 %

Modelling errors for test set:
Wind RMSE:  0.34 m/s as root mean
Wind MAE:  0.254 m/s in avg
Wind MAPE:  3.031 %
Power RMSE:  158.443 kW as root mean
Power MAE:  97.332 kW in avg
Power MAPE:  7.015 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 17.668983991940816 minutes ---

Lowest RMSE:  0.33273951270869984


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=6, min_child_weight=10,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=1000, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None

In [119]:
parameters={
    'max_depth':6,
    'n_estimators': 1000,
    'learning_rate':0.1,
    'subsample':0.7,
    'colsample_bytree':0.7,
    'min_child_weight':10,
    'gamma':0,
    'reg_lambda':1,
}

In [120]:
param_grid={
    'reg_lambda': [1, 10, 50, 100, 150]
}

In [121]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 5 candidates, totalling 20 fits

Best parameters :
{'reg_lambda': 1}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.017 m/s as root mean
Wind MAE:  0.012 m/s in avg
Wind MAPE:  0.145 %
Power RMSE:  8.226 kW as root mean
Power MAE:  4.933 kW in avg
Power MAPE:  0.316 %

Modelling errors for test set:
Wind RMSE:  0.34 m/s as root mean
Wind MAE:  0.254 m/s in avg
Wind MAPE:  3.031 %
Power RMSE:  158.443 kW as root mean
Power MAE:  97.332 kW in avg
Power MAPE:  7.015 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 1.9440810322761535 minutes ---

Lowest RMSE:  0.33273951270869984


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.7,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=6, min_child_weight=10,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=1000, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.7,
                                    tree_method=None, validate_parameters=None

In [122]:
parameters={
    'max_depth':6,
    'n_estimators': 1000,
    'learning_rate':0.1,
    'subsample':0.7,
    'colsample_bytree':0.7,
    'min_child_weight':10,
    'gamma':0,
    'reg_lambda':1,
}

In [123]:
param_grid={
    'learning_rate': [0.001, 0.01, 0.1, 0.5],
    'n_estimators': [800, 900, 1000, 1200, 1500, 1800, 2000]
}

In [124]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 28 candidates, totalling 112 fits

Best parameters :
{'learning_rate': 0.01, 'n_estimators': 2000}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.138 m/s as root mean
Wind MAE:  0.106 m/s in avg
Wind MAPE:  1.23 %
Power RMSE:  66.011 kW as root mean
Power MAE:  40.737 kW in avg
Power MAPE:  2.714 %

Modelling errors for test set:
Wind RMSE:  0.331 m/s as root mean
Wind MAE:  0.248 m/s in avg
Wind MAPE:  2.96 %
Power RMSE:  155.039 kW as root mean
Power MAE:  96.36 kW in avg
Power MAPE:  6.88 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 10.73378409544627 minutes ---

Lowest RMSE:  0.3322744994620697


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.7,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=6, min_child_weight=10,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=1000, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.7,
                                    tree_method=None, validate_parameters=None

### Testing

In [125]:
parameters={
    'max_depth':6,
    'n_estimators': 2000,
    'learning_rate':0.01,
    'subsample':0.7,
    'colsample_bytree':0.7,
    'min_child_weight':10,
    'gamma':0,
    'reg_lambda':1,
}

In [126]:
#modelling

In [127]:
model = modelling_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, plot_error=False, plot=True)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.138 m/s as root mean
Wind MAE:  0.106 m/s in avg
Wind MAPE:  1.23 %
Power RMSE:  66.011 kW as root mean
Power MAE:  40.737 kW in avg
Power MAPE:  2.714 %

Modelling errors for test set:
Wind RMSE:  0.331 m/s as root mean
Wind MAE:  0.248 m/s in avg
Wind MAPE:  2.96 %
Power RMSE:  155.039 kW as root mean
Power MAE:  96.36 kW in avg
Power MAPE:  6.88 %


Showing the results of the modelling: 
XGBoost modelling performed


In [128]:
#testing

In [129]:
WS_pred=model_testing (X_train_subset, X_test_subset, y_train, y_test, PC, model, plot_error=False)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.138 m/s as root mean
Wind MAE:  0.106 m/s in avg
Wind MAPE:  1.23 %
Power RMSE:  66.011 kW as root mean
Power MAE:  40.737 kW in avg
Power MAPE:  2.714 %

Modelling errors for test set:
Wind RMSE:  0.331 m/s as root mean
Wind MAE:  0.248 m/s in avg
Wind MAPE:  2.96 %
Power RMSE:  155.039 kW as root mean
Power MAE:  96.36 kW in avg
Power MAPE:  6.88 %


Showing the results of the modelling: 
XGBoost results performed


In [130]:
WS_pred=pd.DataFrame(WS_pred)
save(WS_pred,'\Results_','Subset10_XGB_WTG14.csv')

file Subset10_XGB_WTG14.csv saved in \Results_ folder


## Subset11

In [131]:
X_train_subset=subset_selection(X_train, 11)
X_test_subset=subset_selection(X_test, 11)

In [132]:
X_train_subset

Unnamed: 0,WS1,tod,TI,WSHor,WSVer,WDHor,WDVer,WS4,WSH,WS3,WD4,WD1,WVeer,RH1,PR1,WD3,T1,AD1,T2
0,0.020975,1.000000,0.207488,0.029529,0.459364,0.053589,0.429742,0.100394,0.439725,0.066975,0.586977,0.057697,0.094759,0.611881,0.249594,0.270001,0.509751,0.374190,0.442748
1,0.065031,0.041958,0.124319,0.068877,0.321555,0.059900,0.275543,0.163620,0.345484,0.124902,0.158576,0.063196,0.607850,0.842205,0.593645,0.129344,0.285279,0.658617,0.228729
2,0.212434,0.006993,0.377173,0.216979,0.344857,0.909151,0.346802,0.292307,0.340846,0.281079,0.631750,0.910616,0.678993,0.768159,0.732628,0.785221,0.104197,0.866180,0.082700
3,0.871467,0.293706,0.202202,0.874755,0.500812,0.706084,0.448942,0.871817,0.329727,0.871425,0.683743,0.711110,0.468120,0.655972,0.889374,0.717605,0.158289,0.847260,0.166427
4,0.324850,0.762238,0.344330,0.332251,0.495464,0.804627,0.465200,0.470751,0.195611,0.397760,0.750985,0.815900,0.466545,0.446954,0.745891,0.819320,0.558715,0.435372,0.573405
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3041,0.534532,0.419580,0.371217,0.544557,0.352688,0.702762,0.403211,0.695452,0.173290,0.616674,0.651365,0.712085,0.507316,0.722528,0.769005,0.708987,0.347711,0.637177,0.384908
3042,0.357270,0.874126,0.231236,0.368119,0.350110,0.402231,0.362418,0.396343,0.379675,0.366648,0.448505,0.408939,0.521778,0.823567,0.651807,0.419980,0.301997,0.655292,0.273947
3043,0.431474,0.867133,0.232592,0.442114,0.389075,0.667303,0.398147,0.454500,0.385883,0.450032,0.603778,0.676574,0.537315,0.911596,0.554040,0.655095,0.223007,0.709991,0.215115
3044,0.644727,0.993007,0.095348,0.641298,0.680546,0.961800,0.525524,0.561681,0.483026,0.641783,0.673569,0.964112,0.669288,0.447414,0.697590,0.844445,0.498942,0.480005,0.426905


### Grid Search

In [133]:
parameters={
    'max_depth':5,
    'n_estimators': 1000,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':1,
    'gamma':0,
    'reg_lambda':1,
}

In [134]:
param_grid={
    'n_estimators': [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000],
}

In [135]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 10 candidates, totalling 40 fits

Best parameters :
{'n_estimators': 700}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.041 m/s as root mean
Wind MAE:  0.031 m/s in avg
Wind MAPE:  0.359 %
Power RMSE:  19.591 kW as root mean
Power MAE:  12.054 kW in avg
Power MAPE:  0.779 %

Modelling errors for test set:
Wind RMSE:  0.338 m/s as root mean
Wind MAE:  0.256 m/s in avg
Wind MAPE:  3.05 %
Power RMSE:  158.467 kW as root mean
Power MAE:  98.914 kW in avg
Power MAPE:  7.063 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 2.289395201206207 minutes ---

Lowest RMSE:  0.3378930930268559


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=1,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=1000, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,

In [136]:
parameters={
    'max_depth':5,
    'n_estimators': 700,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':1,
    'gamma':0,
    'reg_lambda':1,
}

In [137]:
param_grid={
    'max_depth': [3, 4, 5, 6, 10, 15, 20],
    'min_child_weight': [1, 2, 3, 6, 10]
}

In [138]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 35 candidates, totalling 140 fits

Best parameters :
{'max_depth': 5, 'min_child_weight': 2}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.041 m/s as root mean
Wind MAE:  0.031 m/s in avg
Wind MAPE:  0.356 %
Power RMSE:  19.914 kW as root mean
Power MAE:  12.126 kW in avg
Power MAPE:  0.775 %

Modelling errors for test set:
Wind RMSE:  0.335 m/s as root mean
Wind MAE:  0.252 m/s in avg
Wind MAPE:  3.02 %
Power RMSE:  157.795 kW as root mean
Power MAE:  97.491 kW in avg
Power MAPE:  7.055 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 11.0552663564682 minutes ---

Lowest RMSE:  0.3350559231933875


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=1,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=700, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,


In [139]:
parameters={
    'max_depth':5,
    'n_estimators': 700,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':2,
    'gamma':0,
    'reg_lambda':1,
}

In [140]:
param_grid={
    'gamma': [0, 0.1, 0.2, 0.3, 0.4, 0.5]
}

In [141]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 6 candidates, totalling 24 fits

Best parameters :
{'gamma': 0}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.041 m/s as root mean
Wind MAE:  0.031 m/s in avg
Wind MAPE:  0.356 %
Power RMSE:  19.914 kW as root mean
Power MAE:  12.126 kW in avg
Power MAPE:  0.775 %

Modelling errors for test set:
Wind RMSE:  0.335 m/s as root mean
Wind MAE:  0.252 m/s in avg
Wind MAPE:  3.02 %
Power RMSE:  157.795 kW as root mean
Power MAE:  97.491 kW in avg
Power MAPE:  7.055 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 1.6858972231547038 minutes ---

Lowest RMSE:  0.3350559231933875


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=2,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=700, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,


In [142]:
parameters={
    'max_depth':5,
    'n_estimators': 700,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':2,
    'gamma':0,
    'reg_lambda':1,
}

In [143]:
param_grid={
    'subsample': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1],
    'colsample_bytree': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
}

In [144]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 49 candidates, totalling 196 fits

Best parameters :
{'colsample_bytree': 0.8, 'subsample': 0.5}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.046 m/s as root mean
Wind MAE:  0.036 m/s in avg
Wind MAPE:  0.412 %
Power RMSE:  22.649 kW as root mean
Power MAE:  14.076 kW in avg
Power MAPE:  0.897 %

Modelling errors for test set:
Wind RMSE:  0.338 m/s as root mean
Wind MAE:  0.254 m/s in avg
Wind MAPE:  3.039 %
Power RMSE:  158.656 kW as root mean
Power MAE:  99.341 kW in avg
Power MAPE:  7.071 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 13.300392993291219 minutes ---

Lowest RMSE:  0.3339848167540508


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=2,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=700, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,


In [145]:
parameters={
    'max_depth':5,
    'n_estimators': 700,
    'learning_rate':0.1,
    'subsample':0.5,
    'colsample_bytree':0.8,
    'min_child_weight':2,
    'gamma':0,
    'reg_lambda':1,
}

In [146]:
param_grid={
    'reg_lambda': [1, 10, 50, 100, 150]
}

In [147]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 5 candidates, totalling 20 fits

Best parameters :
{'reg_lambda': 1}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.046 m/s as root mean
Wind MAE:  0.036 m/s in avg
Wind MAPE:  0.412 %
Power RMSE:  22.649 kW as root mean
Power MAE:  14.076 kW in avg
Power MAPE:  0.897 %

Modelling errors for test set:
Wind RMSE:  0.338 m/s as root mean
Wind MAE:  0.254 m/s in avg
Wind MAPE:  3.039 %
Power RMSE:  158.656 kW as root mean
Power MAE:  99.341 kW in avg
Power MAPE:  7.071 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 1.1436723748842874 minutes ---

Lowest RMSE:  0.3339848167540508


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=2,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=700, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.5,
                                    tree_method=None, validate_parameters=None,


In [148]:
parameters={
    'max_depth':5,
    'n_estimators': 700,
    'learning_rate':0.1,
    'subsample':0.5,
    'colsample_bytree':0.8,
    'min_child_weight':2,
    'gamma':0,
    'reg_lambda':1,
}

In [149]:
param_grid={
    'learning_rate': [0.001, 0.01, 0.1, 0.5],
    'n_estimators': [800, 900, 1000, 1200, 1500, 1800, 2000]
}

In [150]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 28 candidates, totalling 112 fits

Best parameters :
{'learning_rate': 0.01, 'n_estimators': 2000}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.156 m/s as root mean
Wind MAE:  0.122 m/s in avg
Wind MAPE:  1.424 %
Power RMSE:  74.335 kW as root mean
Power MAE:  46.911 kW in avg
Power MAPE:  3.123 %

Modelling errors for test set:
Wind RMSE:  0.33 m/s as root mean
Wind MAE:  0.247 m/s in avg
Wind MAPE:  2.949 %
Power RMSE:  154.999 kW as root mean
Power MAE:  96.188 kW in avg
Power MAPE:  6.866 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 13.537594083944956 minutes ---

Lowest RMSE:  0.32952813312889057


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=2,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=700, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.5,
                                    tree_method=None, validate_parameters=None,


### Testing

In [151]:
parameters={
    'max_depth':5,
    'n_estimators': 2000,
    'learning_rate':0.01,
    'subsample':0.5,
    'colsample_bytree':0.8,
    'min_child_weight':2,
    'gamma':0,
    'reg_lambda':1,
}

In [152]:
#modelling

In [153]:
model = modelling_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, plot_error=False, plot=True)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.156 m/s as root mean
Wind MAE:  0.122 m/s in avg
Wind MAPE:  1.424 %
Power RMSE:  74.335 kW as root mean
Power MAE:  46.911 kW in avg
Power MAPE:  3.123 %

Modelling errors for test set:
Wind RMSE:  0.33 m/s as root mean
Wind MAE:  0.247 m/s in avg
Wind MAPE:  2.949 %
Power RMSE:  154.999 kW as root mean
Power MAE:  96.188 kW in avg
Power MAPE:  6.866 %


Showing the results of the modelling: 
XGBoost modelling performed


In [154]:
#testing

In [155]:
WS_pred=model_testing (X_train_subset, X_test_subset, y_train, y_test, PC, model, plot_error=False)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.156 m/s as root mean
Wind MAE:  0.122 m/s in avg
Wind MAPE:  1.424 %
Power RMSE:  74.335 kW as root mean
Power MAE:  46.911 kW in avg
Power MAPE:  3.123 %

Modelling errors for test set:
Wind RMSE:  0.33 m/s as root mean
Wind MAE:  0.247 m/s in avg
Wind MAPE:  2.949 %
Power RMSE:  154.999 kW as root mean
Power MAE:  96.188 kW in avg
Power MAPE:  6.866 %


Showing the results of the modelling: 
XGBoost results performed


In [156]:
WS_pred=pd.DataFrame(WS_pred)
save(WS_pred,'\Results_','Subset11_XGB_WTG14.csv')

file Subset11_XGB_WTG14.csv saved in \Results_ folder


## Subset12

In [157]:
X_train_subset=subset_selection(X_train, 12)
X_test_subset=subset_selection(X_test, 12)

In [158]:
X_train_subset

Unnamed: 0,WS1,tod,TI,WSHor,WSVer,WDHor,WDVer,WS4,WSH,WS3,WD4,WD1,WVeer,RH1,PR1,WD3,T1,AD1,T2,RH2
0,0.020975,1.000000,0.207488,0.029529,0.459364,0.053589,0.429742,0.100394,0.439725,0.066975,0.586977,0.057697,0.094759,0.611881,0.249594,0.270001,0.509751,0.374190,0.442748,0.675888
1,0.065031,0.041958,0.124319,0.068877,0.321555,0.059900,0.275543,0.163620,0.345484,0.124902,0.158576,0.063196,0.607850,0.842205,0.593645,0.129344,0.285279,0.658617,0.228729,0.903947
2,0.212434,0.006993,0.377173,0.216979,0.344857,0.909151,0.346802,0.292307,0.340846,0.281079,0.631750,0.910616,0.678993,0.768159,0.732628,0.785221,0.104197,0.866180,0.082700,0.782070
3,0.871467,0.293706,0.202202,0.874755,0.500812,0.706084,0.448942,0.871817,0.329727,0.871425,0.683743,0.711110,0.468120,0.655972,0.889374,0.717605,0.158289,0.847260,0.166427,0.618053
4,0.324850,0.762238,0.344330,0.332251,0.495464,0.804627,0.465200,0.470751,0.195611,0.397760,0.750985,0.815900,0.466545,0.446954,0.745891,0.819320,0.558715,0.435372,0.573405,0.407095
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3041,0.534532,0.419580,0.371217,0.544557,0.352688,0.702762,0.403211,0.695452,0.173290,0.616674,0.651365,0.712085,0.507316,0.722528,0.769005,0.708987,0.347711,0.637177,0.384908,0.634749
3042,0.357270,0.874126,0.231236,0.368119,0.350110,0.402231,0.362418,0.396343,0.379675,0.366648,0.448505,0.408939,0.521778,0.823567,0.651807,0.419980,0.301997,0.655292,0.273947,0.825479
3043,0.431474,0.867133,0.232592,0.442114,0.389075,0.667303,0.398147,0.454500,0.385883,0.450032,0.603778,0.676574,0.537315,0.911596,0.554040,0.655095,0.223007,0.709991,0.215115,0.922228
3044,0.644727,0.993007,0.095348,0.641298,0.680546,0.961800,0.525524,0.561681,0.483026,0.641783,0.673569,0.964112,0.669288,0.447414,0.697590,0.844445,0.498942,0.480005,0.426905,0.608633


### Grid Search

In [159]:
parameters={
    'max_depth':5,
    'n_estimators': 1000,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':1,
    'gamma':0,
    'reg_lambda':1,
}

In [160]:
param_grid={
    'n_estimators': [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000],
}

In [161]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 10 candidates, totalling 40 fits

Best parameters :
{'n_estimators': 900}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.024 m/s as root mean
Wind MAE:  0.018 m/s in avg
Wind MAPE:  0.208 %
Power RMSE:  11.635 kW as root mean
Power MAE:  7.074 kW in avg
Power MAPE:  0.449 %

Modelling errors for test set:
Wind RMSE:  0.335 m/s as root mean
Wind MAE:  0.251 m/s in avg
Wind MAPE:  3.004 %
Power RMSE:  157.992 kW as root mean
Power MAE:  97.242 kW in avg
Power MAPE:  6.988 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 2.486509164174398 minutes ---

Lowest RMSE:  0.33833341110904797


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=1,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=1000, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,

In [162]:
parameters={
    'max_depth':5,
    'n_estimators': 900,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':1,
    'gamma':0,
    'reg_lambda':1,
}

In [163]:
param_grid={
    'max_depth': [3, 4, 5, 6, 10, 15, 20],
    'min_child_weight': [1, 2, 3, 6, 10]
}

In [164]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 35 candidates, totalling 140 fits

Best parameters :
{'max_depth': 6, 'min_child_weight': 6}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.012 m/s as root mean
Wind MAE:  0.008 m/s in avg
Wind MAPE:  0.098 %
Power RMSE:  5.738 kW as root mean
Power MAE:  3.376 kW in avg
Power MAPE:  0.213 %

Modelling errors for test set:
Wind RMSE:  0.334 m/s as root mean
Wind MAE:  0.251 m/s in avg
Wind MAPE:  2.993 %
Power RMSE:  157.444 kW as root mean
Power MAE:  97.278 kW in avg
Power MAPE:  6.883 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 14.169322232405344 minutes ---

Lowest RMSE:  0.33411819701740836


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=1,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=900, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,


In [165]:
parameters={
    'max_depth':6,
    'n_estimators': 900,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':6,
    'gamma':0,
    'reg_lambda':1,
}

In [166]:
param_grid={
    'gamma': [0, 0.1, 0.2, 0.3, 0.4, 0.5]
}

In [167]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 6 candidates, totalling 24 fits

Best parameters :
{'gamma': 0}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.012 m/s as root mean
Wind MAE:  0.008 m/s in avg
Wind MAPE:  0.098 %
Power RMSE:  5.738 kW as root mean
Power MAE:  3.376 kW in avg
Power MAPE:  0.213 %

Modelling errors for test set:
Wind RMSE:  0.334 m/s as root mean
Wind MAE:  0.251 m/s in avg
Wind MAPE:  2.993 %
Power RMSE:  157.444 kW as root mean
Power MAE:  97.278 kW in avg
Power MAPE:  6.883 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 4.863227915763855 minutes ---

Lowest RMSE:  0.33411819701740836


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=6, min_child_weight=6,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=900, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,


In [168]:
parameters={
    'max_depth':6,
    'n_estimators': 900,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':6,
    'gamma':0,
    'reg_lambda':1,
}

In [169]:
param_grid={
    'subsample': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1],
    'colsample_bytree': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
}

In [170]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 49 candidates, totalling 196 fits

Best parameters :
{'colsample_bytree': 0.9, 'subsample': 0.6}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.011 m/s as root mean
Wind MAE:  0.009 m/s in avg
Wind MAPE:  0.1 %
Power RMSE:  5.528 kW as root mean
Power MAE:  3.355 kW in avg
Power MAPE:  0.218 %

Modelling errors for test set:
Wind RMSE:  0.336 m/s as root mean
Wind MAE:  0.254 m/s in avg
Wind MAPE:  3.043 %
Power RMSE:  157.891 kW as root mean
Power MAE:  98.535 kW in avg
Power MAPE:  7.054 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 18.8306915918986 minutes ---

Lowest RMSE:  0.33060636199383436


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=6, min_child_weight=6,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=900, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,


In [171]:
parameters={
    'max_depth':6,
    'n_estimators': 900,
    'learning_rate':0.1,
    'subsample':0.6,
    'colsample_bytree':0.9,
    'min_child_weight':6,
    'gamma':0,
    'reg_lambda':1,
}

In [172]:
param_grid={
    'reg_lambda': [1, 10, 50, 100, 150]
}

In [173]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 5 candidates, totalling 20 fits

Best parameters :
{'reg_lambda': 10}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.021 m/s as root mean
Wind MAE:  0.015 m/s in avg
Wind MAPE:  0.177 %
Power RMSE:  10.151 kW as root mean
Power MAE:  5.944 kW in avg
Power MAPE:  0.384 %

Modelling errors for test set:
Wind RMSE:  0.335 m/s as root mean
Wind MAE:  0.255 m/s in avg
Wind MAPE:  3.042 %
Power RMSE:  158.611 kW as root mean
Power MAE:  99.012 kW in avg
Power MAPE:  7.042 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 2.277798052628835 minutes ---

Lowest RMSE:  0.33021384662274567


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.9,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=6, min_child_weight=6,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=900, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.6,
                                    tree_method=None, validate_parameters=None,


In [174]:
parameters={
    'max_depth':6,
    'n_estimators': 900,
    'learning_rate':0.1,
    'subsample':0.6,
    'colsample_bytree':0.9,
    'min_child_weight':6,
    'gamma':0,
    'reg_lambda':10,
}

In [175]:
param_grid={
    'learning_rate': [0.001, 0.01, 0.1, 0.5],
    'n_estimators': [800, 900, 1000, 1200, 1500, 1800, 2000]
}

In [176]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 28 candidates, totalling 112 fits

Best parameters :
{'learning_rate': 0.01, 'n_estimators': 2000}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.151 m/s as root mean
Wind MAE:  0.114 m/s in avg
Wind MAPE:  1.327 %
Power RMSE:  70.663 kW as root mean
Power MAE:  43.141 kW in avg
Power MAPE:  2.919 %

Modelling errors for test set:
Wind RMSE:  0.327 m/s as root mean
Wind MAE:  0.246 m/s in avg
Wind MAPE:  2.932 %
Power RMSE:  152.589 kW as root mean
Power MAE:  95.36 kW in avg
Power MAPE:  6.792 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 12.574488727251689 minutes ---

Lowest RMSE:  0.3263153064297175


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.9,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=6, min_child_weight=6,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=900, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=10,
                                    scale_pos_weight=None, subsample=0.6,
                                    tree_method=None, validate_parameters=None,

### Testing

In [177]:
parameters={
    'max_depth':6,
    'n_estimators': 2000,
    'learning_rate':0.01,
    'subsample':0.6,
    'colsample_bytree':0.9,
    'min_child_weight':6,
    'gamma':0,
    'reg_lambda':10,
}

In [178]:
#modelling

In [179]:
model = modelling_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, plot_error=False, plot=True)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.151 m/s as root mean
Wind MAE:  0.114 m/s in avg
Wind MAPE:  1.327 %
Power RMSE:  70.663 kW as root mean
Power MAE:  43.141 kW in avg
Power MAPE:  2.919 %

Modelling errors for test set:
Wind RMSE:  0.327 m/s as root mean
Wind MAE:  0.246 m/s in avg
Wind MAPE:  2.932 %
Power RMSE:  152.589 kW as root mean
Power MAE:  95.36 kW in avg
Power MAPE:  6.792 %


Showing the results of the modelling: 
XGBoost modelling performed


In [180]:
#testing

In [181]:
WS_pred=model_testing (X_train_subset, X_test_subset, y_train, y_test, PC, model, plot_error=False)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.151 m/s as root mean
Wind MAE:  0.114 m/s in avg
Wind MAPE:  1.327 %
Power RMSE:  70.663 kW as root mean
Power MAE:  43.141 kW in avg
Power MAPE:  2.919 %

Modelling errors for test set:
Wind RMSE:  0.327 m/s as root mean
Wind MAE:  0.246 m/s in avg
Wind MAPE:  2.932 %
Power RMSE:  152.589 kW as root mean
Power MAE:  95.36 kW in avg
Power MAPE:  6.792 %


Showing the results of the modelling: 
XGBoost results performed


In [182]:
WS_pred=pd.DataFrame(WS_pred)
save(WS_pred,'\Results_','Subset12_XGB_WTG14.csv')

file Subset12_XGB_WTG14.csv saved in \Results_ folder


## Subset13

In [183]:
X_train_subset=subset_selection(X_train, 13)
X_test_subset=subset_selection(X_test, 13)

In [184]:
X_train_subset

Unnamed: 0,WS1,tod,TI,WSHor,WSVer,WDHor,WDVer,WS4,WSH,WS3,...,WVeer,RH1,PR1,WD3,T1,AD1,T2,RH2,PR2,AD2
0,0.020975,1.000000,0.207488,0.029529,0.459364,0.053589,0.429742,0.100394,0.439725,0.066975,...,0.094759,0.611881,0.249594,0.270001,0.509751,0.374190,0.442748,0.675888,0.230855,0.420409
1,0.065031,0.041958,0.124319,0.068877,0.321555,0.059900,0.275543,0.163620,0.345484,0.124902,...,0.607850,0.842205,0.593645,0.129344,0.285279,0.658617,0.228729,0.903947,0.595287,0.704532
2,0.212434,0.006993,0.377173,0.216979,0.344857,0.909151,0.346802,0.292307,0.340846,0.281079,...,0.678993,0.768159,0.732628,0.785221,0.104197,0.866180,0.082700,0.782070,0.751327,0.884967
3,0.871467,0.293706,0.202202,0.874755,0.500812,0.706084,0.448942,0.871817,0.329727,0.871425,...,0.468120,0.655972,0.889374,0.717605,0.158289,0.847260,0.166427,0.618053,0.938485,0.840702
4,0.324850,0.762238,0.344330,0.332251,0.495464,0.804627,0.465200,0.470751,0.195611,0.397760,...,0.466545,0.446954,0.745891,0.819320,0.558715,0.435372,0.573405,0.407095,0.746792,0.406103
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3041,0.534532,0.419580,0.371217,0.544557,0.352688,0.702762,0.403211,0.695452,0.173290,0.616674,...,0.507316,0.722528,0.769005,0.708987,0.347711,0.637177,0.384908,0.634749,0.784920,0.592542
3042,0.357270,0.874126,0.231236,0.368119,0.350110,0.402231,0.362418,0.396343,0.379675,0.366648,...,0.521778,0.823567,0.651807,0.419980,0.301997,0.655292,0.273947,0.825479,0.664979,0.674933
3043,0.431474,0.867133,0.232592,0.442114,0.389075,0.667303,0.398147,0.454500,0.385883,0.450032,...,0.537315,0.911596,0.554040,0.655095,0.223007,0.709991,0.215115,0.922228,0.572454,0.713095
3044,0.644727,0.993007,0.095348,0.641298,0.680546,0.961800,0.525524,0.561681,0.483026,0.641783,...,0.669288,0.447414,0.697590,0.844445,0.498942,0.480005,0.426905,0.608633,0.714323,0.537327


### Grid Search

In [185]:
parameters={
    'max_depth':5,
    'n_estimators': 1000,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':1,
    'gamma':0,
    'reg_lambda':1,
}

In [186]:
param_grid={
    'n_estimators': [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000],
}

In [187]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 10 candidates, totalling 40 fits

Best parameters :
{'n_estimators': 900}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.023 m/s as root mean
Wind MAE:  0.018 m/s in avg
Wind MAPE:  0.206 %
Power RMSE:  11.595 kW as root mean
Power MAE:  7.061 kW in avg
Power MAPE:  0.448 %

Modelling errors for test set:
Wind RMSE:  0.336 m/s as root mean
Wind MAE:  0.251 m/s in avg
Wind MAPE:  2.981 %
Power RMSE:  158.35 kW as root mean
Power MAE:  97.779 kW in avg
Power MAPE:  6.856 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 3.236876861254374 minutes ---

Lowest RMSE:  0.33592389785549415


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=1,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=1000, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,

In [188]:
parameters={
    'max_depth':5,
    'n_estimators': 900,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':1,
    'gamma':0,
    'reg_lambda':1,
}

In [189]:
param_grid={
    'max_depth': [3, 4, 5, 6, 10, 15, 20],
    'min_child_weight': [1, 2, 3, 6, 10]
}

In [190]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 35 candidates, totalling 140 fits

Best parameters :
{'max_depth': 10, 'min_child_weight': 10}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.001 m/s as root mean
Wind MAE:  0.0 m/s in avg
Wind MAPE:  0.005 %
Power RMSE:  0.316 kW as root mean
Power MAE:  0.173 kW in avg
Power MAPE:  0.011 %

Modelling errors for test set:
Wind RMSE:  0.335 m/s as root mean
Wind MAE:  0.251 m/s in avg
Wind MAPE:  3.001 %
Power RMSE:  158.827 kW as root mean
Power MAE:  97.887 kW in avg
Power MAPE:  6.952 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 15.237314681212107 minutes ---

Lowest RMSE:  0.3351204582249217


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=1,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=900, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,


In [191]:
parameters={
    'max_depth':10,
    'n_estimators': 900,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':10,
    'gamma':0,
    'reg_lambda':1,
}

In [192]:
param_grid={
    'gamma': [0, 0.1, 0.2, 0.3, 0.4, 0.5]
}

In [193]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 6 candidates, totalling 24 fits

Best parameters :
{'gamma': 0}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.001 m/s as root mean
Wind MAE:  0.0 m/s in avg
Wind MAPE:  0.005 %
Power RMSE:  0.316 kW as root mean
Power MAE:  0.173 kW in avg
Power MAPE:  0.011 %

Modelling errors for test set:
Wind RMSE:  0.335 m/s as root mean
Wind MAE:  0.251 m/s in avg
Wind MAPE:  3.001 %
Power RMSE:  158.827 kW as root mean
Power MAE:  97.887 kW in avg
Power MAPE:  6.952 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 5.051323513189952 minutes ---

Lowest RMSE:  0.3351204582249217


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=10, min_child_weight=10,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=900, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None

In [194]:
parameters={
    'max_depth':10,
    'n_estimators': 900,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':10,
    'gamma':0,
    'reg_lambda':1,
}

In [195]:
param_grid={
    'subsample': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1],
    'colsample_bytree': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
}

In [196]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 49 candidates, totalling 196 fits

Best parameters :
{'colsample_bytree': 0.9, 'subsample': 0.7}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.001 m/s as root mean
Wind MAE:  0.0 m/s in avg
Wind MAPE:  0.005 %
Power RMSE:  0.289 kW as root mean
Power MAE:  0.154 kW in avg
Power MAPE:  0.01 %

Modelling errors for test set:
Wind RMSE:  0.335 m/s as root mean
Wind MAE:  0.252 m/s in avg
Wind MAPE:  3.006 %
Power RMSE:  157.305 kW as root mean
Power MAE:  97.632 kW in avg
Power MAPE:  6.986 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 25.98923443555832 minutes ---

Lowest RMSE:  0.33247106339996285


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=10, min_child_weight=10,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=900, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None

In [197]:
parameters={
    'max_depth':10,
    'n_estimators': 900,
    'learning_rate':0.1,
    'subsample':0.7,
    'colsample_bytree':0.9,
    'min_child_weight':10,
    'gamma':0,
    'reg_lambda':1,
}

In [198]:
param_grid={
    'reg_lambda': [1, 10, 50, 100, 150]
}

In [199]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 5 candidates, totalling 20 fits

Best parameters :
{'reg_lambda': 1}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.001 m/s as root mean
Wind MAE:  0.0 m/s in avg
Wind MAPE:  0.005 %
Power RMSE:  0.289 kW as root mean
Power MAE:  0.154 kW in avg
Power MAPE:  0.01 %

Modelling errors for test set:
Wind RMSE:  0.335 m/s as root mean
Wind MAE:  0.252 m/s in avg
Wind MAPE:  3.006 %
Power RMSE:  157.305 kW as root mean
Power MAE:  97.632 kW in avg
Power MAPE:  6.986 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 3.4988684097925824 minutes ---

Lowest RMSE:  0.33247106339996285


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.9,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=10, min_child_weight=10,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=900, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.7,
                                    tree_method=None, validate_parameters=None

In [200]:
parameters={
    'max_depth':10,
    'n_estimators': 900,
    'learning_rate':0.1,
    'subsample':0.7,
    'colsample_bytree':0.9,
    'min_child_weight':10,
    'gamma':0,
    'reg_lambda':1,
}

In [201]:
param_grid={
    'learning_rate': [0.001, 0.01, 0.1, 0.5],
    'n_estimators': [800, 900, 1000, 1200, 1500, 1800, 2000]
}

In [202]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 28 candidates, totalling 112 fits

Best parameters :
{'learning_rate': 0.01, 'n_estimators': 2000}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.056 m/s as root mean
Wind MAE:  0.042 m/s in avg
Wind MAPE:  0.482 %
Power RMSE:  26.894 kW as root mean
Power MAE:  16.074 kW in avg
Power MAPE:  1.054 %

Modelling errors for test set:
Wind RMSE:  0.327 m/s as root mean
Wind MAE:  0.245 m/s in avg
Wind MAPE:  2.927 %
Power RMSE:  154.495 kW as root mean
Power MAE:  95.629 kW in avg
Power MAPE:  6.813 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 16.17221982081731 minutes ---

Lowest RMSE:  0.3261163683780708


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.9,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=10, min_child_weight=10,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=900, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.7,
                                    tree_method=None, validate_parameters=None

### Testing

In [203]:
parameters={
    'max_depth':10,
    'n_estimators': 2000,
    'learning_rate':0.01,
    'subsample':0.7,
    'colsample_bytree':0.9,
    'min_child_weight':10,
    'gamma':0,
    'reg_lambda':1,
}

In [204]:
#modelling

In [205]:
model = modelling_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, plot_error=False, plot=True)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.056 m/s as root mean
Wind MAE:  0.042 m/s in avg
Wind MAPE:  0.482 %
Power RMSE:  26.894 kW as root mean
Power MAE:  16.074 kW in avg
Power MAPE:  1.054 %

Modelling errors for test set:
Wind RMSE:  0.327 m/s as root mean
Wind MAE:  0.245 m/s in avg
Wind MAPE:  2.927 %
Power RMSE:  154.495 kW as root mean
Power MAE:  95.629 kW in avg
Power MAPE:  6.813 %


Showing the results of the modelling: 
XGBoost modelling performed


In [206]:
#testing

In [207]:
WS_pred=model_testing (X_train_subset, X_test_subset, y_train, y_test, PC, model, plot_error=False)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.056 m/s as root mean
Wind MAE:  0.042 m/s in avg
Wind MAPE:  0.482 %
Power RMSE:  26.894 kW as root mean
Power MAE:  16.074 kW in avg
Power MAPE:  1.054 %

Modelling errors for test set:
Wind RMSE:  0.327 m/s as root mean
Wind MAE:  0.245 m/s in avg
Wind MAPE:  2.927 %
Power RMSE:  154.495 kW as root mean
Power MAE:  95.629 kW in avg
Power MAPE:  6.813 %


Showing the results of the modelling: 
XGBoost results performed


In [208]:
WS_pred=pd.DataFrame(WS_pred)
save(WS_pred,'\Results_','Subset13_XGB_WTG14.csv')

file Subset13_XGB_WTG14.csv saved in \Results_ folder
