# Packages

In [1]:
#basic packages
import time
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os

#data pre-processing packages
from datetime import datetime


#results and analysis packages
from sklearn.metrics import mean_absolute_percentage_error as mape
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
# data modelling & results
from yellowbrick.regressor import PredictionError, ResidualsPlot
from sklearn.model_selection import train_test_split
import math as math

#NN
import xgboost as xgb

from tensorflow import keras
from keras.utils.vis_utils import plot_model
from scipy.stats import reciprocal
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV

#feature importance
import shap


# Script

## Error computation

In [3]:
#defining the Root Mean Squared Error

def rmse(y_true, y_predicted):
    
    return np.sqrt(mean_squared_error(y_true, y_predicted))

In [4]:
#errors computation

def errors_computation(data):
    
    df=pd.DataFrame()
    #df.at['RMSE (as root mean)', 'Wind']= round(rmse(data['Target'], data['WS_pred']), 3)
    df.at['MAE (in avg)', 'Wind']= round(mae(data['Target'], data['WS_pred']), 3)
    df.at['MAPE (%)', 'Wind']= round(mape(data['Target'], data['WS_pred'])*100, 3)
    
    #df.at['RMSE (as root mean)', 'Power']= round(rmse(data['P'], data['P_pred']), 3)
    df.at['MAE (in avg)', 'Power']= round(mae(data['P'], data['P_pred']), 3)
    df.at['MAPE (%)', 'Power']= round(mape(data['P'], data['P_pred'])*100, 3)
    
    
    print('Wind RMSE: ', round(rmse(data['Target'], data['WS_pred']), 3), 'm/s as root mean')
    print('Wind MAE: ', round(mae(data['Target'], data['WS_pred']), 3), 'm/s in avg')
    print('Wind MAPE: ', round(mape(data['Target'], data['WS_pred'])*100, 3), '%')
    
    print('Power RMSE: ', round(rmse(data['P'], data['P_pred']), 3), 'kW as root mean')
    print('Power MAE: ', round(mae(data['P'], data['P_pred']), 3), 'kW in avg')
    print('Power MAPE: ', round(mape(data['P'], data['P_pred'])*100, 3), '%')
    
    return df

In [5]:
def error_plot(data, title):
    
    #title is expected to be an str
    #WS_pred and Target should be the variables names

    #plotting the reference
    plt.figure(figsize=(12,8))
    plt.plot([-1,17.5],[-1,17.5], 'green', linewidth=4, alpha=.12)
    plt.plot(data['WS_pred'], data['Target'], marker='o', ls='', label='Regression', markersize=5, alpha=.1)


    plt.legend()

    ax=plt.gca()
    ax.set(xlabel='y predicted', ylabel='y actual');
    ax.set_title(title)
    ax.set_ylim(ymin=4, ymax=17.5)
    ax.set_xlim(xmin=4, xmax=17.5)
    
    return print('')

In [6]:
def powercurve_computation(data, power_curve):
    
    from scipy import interpolate
    
    #this function computes the power at a observation given the information at a observation:
    # the WS (in m/s) at the wind turbine location and at the hub height (Target)
    # the power curve of the wind turbine in an xslx
    
    
    x=power_curve['Wind Speed [m/s]']
    y=power_curve['Warranted Power Curve [kW]']
    x_new=data['Target']
    
    f = interpolate.interp1d(x, y)
    #, kind='linear'
    data['P']=f(x_new)
    
    if 'WS_pred' in data.keys():
        x_new2=data['WS_pred']
        data['P_pred']=f(x_new2)
    
    print('power curve computation performed')
    
    return data

In [7]:
def control_power_computation (data_test, data_train, power_curve):
    
    results_test=pd.DataFrame()
    results_train=pd.DataFrame()
    
    
    results_test=powercurve_computation(data_test, power_curve)
    results_train=powercurve_computation(data_train, power_curve)

    return results_test, results_train

In [8]:
def compute_results(data_test, data_train, power_curve, plot_error):
    
    #this function computes and plots the results of a modelling:

    results_test, results_train=control_power_computation (data_test, data_train, power_curve)
    
    
    print('Modelling errors for training set:')
    errors_computation(results_train)
    print('')
    print('Modelling errors for test set:')
    errors_computation(results_test)
    print('')
    
    if plot_error:
        print('')
        error_plot(results_test, 'Error plot for test set wind speed')

    print('')
    return print('Showing the results of the modelling: ')

In [9]:
def compute_results1(data_test, data_train, power_curve, plot_error):
    
    #this function computes and plots the results of a modelling:

    results_test, results_train=control_power_computation (data_test, data_train, power_curve)
    
    
    print('Modelling errors for training set:')
    errors_computation(results_train)
    print('')
    print('Modelling errors for test set:')
    errors_computation(results_test)
    print('')
    print('Modelling errors in AEP terms for test set')
    data=AEP_computation(results_test, power_curve)
    
    if plot_error:
        print('')
        error_plot(results_test, 'Error plot for test set wind speed')

    print('')
    print('Showing the results of the modelling: ')
    return data

In [10]:
def AEP_computation(data, power_curve):
    
    #binning the WS_Pred
    data['WS_pred_bin']=pd.cut(data['WS_pred'],
                                       bins=list(np.arange(-0.25,28.0,0.5)),
                                                 labels=list(np.arange(0.0,28.0,0.5)))

        
    #creating the AEP (Annual Energy Production table)
        
    AEP_table=pd.DataFrame()
    
    AEP_table['WS']=PC['Wind Speed [m/s]']
    AEP_table['WPC']=PC['Warranted Power Curve [kW]']
    AEP_table.set_index('WS', inplace=True)
    AEP_table['MPC_SC']=data.groupby('WS_pred_bin', as_index=True)['P'].mean()

    #puedo rellenar los NaN de MPC_SC copiando lo que tendría que ser: WPC
    
#     for i in AEP_table.index:
#         if math.isnan(AEP_table.at[i, 'MPC_SC']):
#             AEP_table.at[i, 'MPC_SC']=AEP_table.at[i, 'WPC']
    
    AEP_table['WSAD']=power_curve['Wind Speed Distribution [Hr/Year]']
    
    #o igual aquí tendría que cortar la tabla para NaN values
    
#     AEP_table=AEP_table.dropna()
    
    AEP_table['WAEP']=AEP_table['WPC']*AEP_table['WSAD']
    AEP_table['MAEP_SC']=AEP_table['MPC_SC']*AEP_table['WSAD']
    
    #computing the metrics for later printing
    
    MAEP_SC=AEP_table['MAEP_SC'].sum()
    WAEP=AEP_table['WAEP'].sum()
    AEP_perc=(MAEP_SC/WAEP)*100
    AEP_diff=(MAEP_SC-WAEP)/1000
    
    #printing the results

    print('')
    print('The computed AEP_table is: ')
    AEP_table(AEP_table)
    print('')
    print('The AEP% of the Site Calibration is: ', round(AEP_perc, 3), '%')
    print('')
    print('The AEP difference of the Site Calibration is: ', round(AEP_diff, 3), 'MWh')
    print('')
    AEP_plot(data, AEP_table)                                
    
    
    return data

In [11]:
def AEP_plot(data, AEP_table):
    
    #data is expected to contain: Target, WS_pred, P, P_pred
    #AEP_table is expected to contain: WS, WPC, MPC-SC

    #plotting the Power curves
    plt.figure(figsize=(10,6))
    plt.plot(data['WS_pred'], data['P'], marker='o', ls='', label='measured scatter', markersize=5, alpha=.1)
    plt.plot(AEP_table.index, AEP_table['WPC'], marker='o', color='black', linewidth=0.5, label='WPC', markersize=5)
    plt.plot(AEP_table.index, AEP_table['MPC_SC'], marker='o', color='red', linewidth=0.5, label='MPC-SC', markersize=5)


    plt.legend()

    ax=plt.gca()
    ax.set(ylabel='Power interpolated with WPC and Target WS (kW)', xlabel='Predicted Wind Speed (m/s)');
    ax.set_title('Site Calibration Power Curve')
    ax.set_xlim(xmin=0, xmax=20)
#     ax.set_xlim(xmin=0, xmax=27.5)
    
    return print('Error plot performed')

In [12]:
def AEP_table(AEP_table):
    
    #AEP_table is expected to contain: WPC, MPC_SC, WPC, WSAD, WAEP, MAEP
    
    WS= AEP_table.index
    WPC=AEP_table['WPC']
    MPC_SC=round(AEP_table['MPC_SC'],0)
    WSAD=AEP_table['WASD']
    WAEP=round(AEP_table['WAEP']/1000000, 3)
    MAEP=round(AEP_table['MAEP']/1000000, 3)
    

    row_headers=None
    column_headers=('Wind speed bin (m/s)', 'WPC (kW)', 'MPC_SC (kW)',
                    'Annual Wind Distribution (h/year)', 'WAEP (GWh)', 'MAEP (GWh)')

    rcolors = plt.cm.BuPu(np.full(len(row_headers), 0.1))
    ccolors = plt.cm.BuPu(np.full(len(column_headers), 0.1))

    l_2d=[list(WS), list(WPC), list(MPC), list(WSAD), list(WAEP), list(MAEP)]

    cell_text=np.array(l_2d).T

    fig, ax = plt.subplots(figsize=(12,5)) 
    ax.set_axis_off()

    the_table=ax.table(cellText=cell_text,
                      rowLabels=row_headers,
                      rowColours=rcolors,
                      rowLoc='right',
                      colColours=ccolors,
                      colLabels=column_headers,
                      cellLoc ='center',
                      loc='upper center',
                      colWidths = [0.1] * 5)

    ax.set_title('Annual Energy Production for Site Calibration PC', 
                 fontweight ="bold", fontsize=14) 

    the_table.auto_set_font_size(False)
    the_table.set_fontsize(14)
    the_table.scale(2, 2)
    
    return ('AEP table printed')

## Data uploading

In [13]:
def uploading_csv(file_folder,file_name):
    
    #file folder required
    #file name required
    #file is expected to be in the data root: r'C:\Users\irgaa\Irma\Data'
    #this function uploads and formats csv/txt/xlsx datasets into DataFrame
    
    
    data_root=r'C:\Users\irgaa\Irma\Data'
    data_folder=str(file_folder)
    data_file=str(file_name)
    
    data_path=data_root+data_folder+data_file
    
    data1 = pd.read_csv(data_path)

    
    # We will save the WD_bin as the index
    
    return data1

In [14]:
#this function saves a data csv

def save (data, file_folder,file_name):
    
    #file folder required
    #file name required
    #file is expected to be saved in the data root: r'C:\Users\irgaa\Irma\Data'
    #this function saves a csv/txt/xlsx into Irma's folder
    #the saved file will keep the columns names but not the index
    
    data_root=r'C:\Users\irgaa\Irma\Data'
    data_folder=str(file_folder)
    data_file=str(file_name)
    
    data_path=data_root+data_folder+data_file
    
    data.to_csv (data_path, index = False, header=True)
    
    
    return print('file', data_file, 'saved in', data_folder, 'folder')

## Data selection

In [15]:
def data_selection(X_train, X_test, inputs):
    
    #this function returns the columns of the training and test sets in the inputs list
    
    X_train1 = pd.DataFrame()
    X_test1 = pd.DataFrame()
    
    
    X_train1 = X_train[inputs]
    X_test1 = X_test[inputs]

    
    return X_train1,X_test1

In [16]:
def data_drop(X_train, X_test, list_2drop):
    
    #this function returns the columns of the training and test sets in the inputs list

    X_train1 = X_train.drop(columns=list_2drop)
    X_test1 = X_test.drop(columns=list_2drop)

    
    
    return X_train1,X_test1

## Subsets

In [17]:
def subset_selection (data, number):
    
    
    #this function returns a subset of sensors as dataframe
    #data is the original dataset with all sensors named: WS1, tod, WSVer, etc.
    #number is a value that indicates the desired subset of sensors
    #final is the dataframe returned by this function with the desired subset of sensors
    
    
    
    #first we identify the sensors one by one:
    WS1=pd.DataFrame(data['WS1'].copy())
    WS3=pd.DataFrame(data['WS3'].copy())
    WS4=pd.DataFrame(data['WS4'].copy())
    WD1=pd.DataFrame(data['WD1'].copy())
    WD3=pd.DataFrame(data['WD3'].copy())
    WD4=pd.DataFrame(data['WD4'].copy())
    tod=pd.DataFrame(data['tod'].copy())
    TI=pd.DataFrame(data['TI'].copy())
    WSH=pd.DataFrame(data['WSH'].copy())
    WVeer=pd.DataFrame(data['WVeer'].copy())
    RH1=pd.DataFrame(data['RH1'].copy())
    RH2=pd.DataFrame(data['RH2'].copy())
    PR1=pd.DataFrame(data['PR1'].copy())
    PR2=pd.DataFrame(data['PR2'].copy())
    T1=pd.DataFrame(data['T1'].copy())
    T2=pd.DataFrame(data['T2'].copy())
    AD1=pd.DataFrame(data['AD1'].copy())
    AD2=pd.DataFrame(data['AD2'].copy())
    WSHor=pd.DataFrame(data['WSHor'].copy())
    WSVer=pd.DataFrame(data['WSVer'].copy())
    WDHor=pd.DataFrame(data['WDHor'].copy())
    WDVer=pd.DataFrame(data['WDVer'].copy())
    Rain=pd.DataFrame(data['Rain'].copy())
    
    
    #second the sensors are identified:
    sensors1=pd.concat([WS1, tod, TI], axis=1)
    sensors2=pd.concat([WSHor, WSVer, WDHor, WDVer], axis=1)
    sensors3=pd.concat([WS4, WSH], axis=1)
    sensors4=WS3
    sensors5=WD4
    sensors6=pd.concat([WD1, WVeer], axis=1)
    sensors7=RH1
    sensors8=PR1
    sensors9=WD3
    sensors10=pd.concat([T1, AD1], axis=1)
    sensors11=T2
    sensors12=RH2
    sensors13=pd.concat([PR2, AD2], axis=1)
    sensors14=Rain
    
    
    #third, we identify the subsets
    subset1=sensors1
    subset2=pd.concat([subset1, sensors2], axis=1)
    subset3=pd.concat([subset2, sensors3], axis=1)
    subset4=pd.concat([subset3, sensors4], axis=1)
    subset5=pd.concat([subset4, sensors5], axis=1)
    subset6=pd.concat([subset5, sensors6], axis=1)
    subset7=pd.concat([subset6, sensors7], axis=1)
    subset8=pd.concat([subset7, sensors8], axis=1)
    subset9=pd.concat([subset8, sensors9], axis=1)
    subset10=pd.concat([subset9, sensors10], axis=1)
    subset11=pd.concat([subset10, sensors11], axis=1)
    subset12=pd.concat([subset11, sensors12], axis=1)
    subset13=pd.concat([subset12, sensors13], axis=1)
    subset14=pd.concat([subset13, sensors14], axis=1)
    
    #finally, the desired subset is returned
    if number==1:
        final=subset1
    elif number==2:
        final=subset2
    elif number==3:
        final=subset3
    elif number==4:
        final=subset4
    elif number==5:
        final=subset5
    elif number==6:
        final=subset6
    elif number==7:
        final=subset7
    elif number==8:
        final=subset8
    elif number==9:
        final=subset9
    elif number==10:
        final=subset10
    elif number==11:
        final=subset11
    elif number==12:
        final=subset12
    elif number==13:
        final=subset13
    else:
        final=subset14
        

    
    
    return final

## Modelling

### Modelling XGBoost

In [55]:
def modelling_XGBoost (X_train, X_test, y_train, y_test, power_curve,  parameters, plot_error, plot):
    
    #creating the model
    max_depth=parameters['max_depth']
    n_estimators=parameters['n_estimators']
    learning_rate=parameters['learning_rate']
    subsample=parameters['subsample']
    colsample_bytree=parameters['colsample_bytree']
    min_child_weight=parameters['min_child_weight']
    gamma=parameters['gamma']
    reg_lambda=parameters['reg_lambda']
    
 
    model = xgb.XGBRegressor(max_depth=max_depth, n_estimators=n_estimators, random_state=42,
                               objective='reg:squarederror', booster='gbtree', learning_rate=learning_rate,
                               subsample=subsample, colsample_bytree=colsample_bytree,
                             min_child_weight=min_child_weight, reg_lambda=reg_lambda)
                               
    #model fitting
    model.fit(X_train, y_train)
        
    
    #model predicting
    y_pred_test=model.predict(X_test)
    y_pred_train=model.predict(X_train)
    
    test=pd.DataFrame(y_pred_test, columns = ['test'])
    train=pd.DataFrame(y_pred_train, columns = ['train'])
    
    

    #computing the results
    data_test = pd.DataFrame()
    data_train = pd.DataFrame()
    
    data_test['WS_pred']=test['test']
    data_test['Target']=y_test['Target']
    data_train['WS_pred']=train['train']
    data_train['Target']=y_train['Target']
    
    
    data=compute_results(data_test, data_train, power_curve, plot_error)
    print('XGBoost modelling performed')
    
    return model

### Grid Search XGBoost

In [35]:
def GridSearch_XGBoost (X_train, X_test, y_train, y_test, power_curve, parameters, param_grid, plot_error):
    
    
    #counting the runing time
    start_time = time.time()
    
    #creating the model
    max_depth=parameters['max_depth']
    n_estimators=parameters['n_estimators']
    learning_rate=parameters['learning_rate']
    subsample=parameters['subsample']
    colsample_bytree=parameters['colsample_bytree']
    min_child_weight=parameters['min_child_weight']
    gamma=parameters['gamma']
    reg_lambda=parameters['reg_lambda']
    
    #creating the model
    xgbr = xgb.XGBRegressor(max_depth=max_depth, n_estimators=n_estimators, random_state=42,
                           objective='reg:squarederror', booster='gbtree', learning_rate=learning_rate,
                           subsample=subsample, colsample_bytree=colsample_bytree,
                         min_child_weight=min_child_weight, gamma=gamma, reg_lambda=reg_lambda)
    
    
    
    #Grid Search CV
    clf = GridSearchCV(estimator=xgbr, 
                             param_grid=param_grid,
                             cv=4,
                             scoring='neg_mean_squared_error',
                             verbose=1) 
    
        
    #model fitting
    clf.fit(X_train, y_train)
    
    
    #model predicting
    
    y_pred_test=clf.predict(X_test)
    y_pred_train=clf.predict(X_train)
    
    test=pd.DataFrame(y_pred_test, columns = ['test'])
    train=pd.DataFrame(y_pred_train, columns = ['train'])
    
    print('')
    print('Best parameters :')
    print(clf.best_params_)
    print('')

    #computing the results
    data_test = pd.DataFrame()
    data_train = pd.DataFrame()
    
    data_test['WS_pred']=test['test']
    data_test['Target']=y_test['Target']
    data_train['WS_pred']=train['train']
    data_train['Target']=y_train['Target']
    
    compute_results(data_test, data_train, power_curve, plot_error)
    print('GridSearch_ XGBoost performed')
    print("--- %s minutes ---" % ((time.time() - start_time)/60))
    print('')
    print('Lowest RMSE: ', (-clf.best_score_)**(1/2.0))
    
    
    
    return clf

### Model Testing

In [20]:
def model_testing (X_train, X_test, y_train, y_test, power_curve, model, plot_error):

    

    y_pred_test=model.predict(X_test)
    y_pred_train=model.predict(X_train)
    
    test=pd.DataFrame(y_pred_test, columns = ['test'])
    train=pd.DataFrame(y_pred_train, columns = ['train'])


    data_test = pd.DataFrame()
    data_train = pd.DataFrame()
    
    data_test['WS_pred']=test['test']
    data_test['Target']=y_test['Target']
    data_train['WS_pred']=train['train']
    data_train['Target']=y_train['Target']
        
    
    compute_results(data_test, data_train, power_curve, plot_error)
    
    WS_pred=data_test['WS_pred']
    print('XGBoost results performed')
    
    return WS_pred

# Data analysis

In [21]:
#WTG14

In [22]:
#individual

In [23]:
#upload the dataset with file_folder, file_name
# data_up= uploading_csv('\Dataset1-Normal_Site','\data_comp14.csv')
X_train= uploading_csv('\Dataset1-Normal_Site','\X_train14.csv')
X_test= uploading_csv('\Dataset1-Normal_Site','\X_test14.csv')
y_train= uploading_csv('\Dataset1-Normal_Site','\y_train14.csv')
y_test= uploading_csv('\Dataset1-Normal_Site','\y_test14.csv')

X_test.keys()

Index(['T2', 'RH2', 'T1', 'RH1', 'PR1', 'AD1', 'PR2', 'AD2', 'Rain', 'WS1',
       'WS3', 'WS4', 'WD1', 'WD3', 'WD4', 'WSHor', 'WDHor', 'WSVer', 'WDVer',
       'TI', 'WSH', 'WD_bin', 'tod', 'WVeer'],
      dtype='object')

In [24]:
PC= uploading_csv('\Dataset1-Normal_Site','\PC_1.15kgm-3.csv')

## Subset1

In [25]:
X_train_subset=subset_selection(X_train, 1)
X_test_subset=subset_selection(X_test, 1)

In [26]:
X_train_subset

Unnamed: 0,WS1,tod,TI
0,0.020975,1.000000,0.207488
1,0.065031,0.041958,0.124319
2,0.212434,0.006993,0.377173
3,0.871467,0.293706,0.202202
4,0.324850,0.762238,0.344330
...,...,...,...
3041,0.534532,0.419580,0.371217
3042,0.357270,0.874126,0.231236
3043,0.431474,0.867133,0.232592
3044,0.644727,0.993007,0.095348


### Grid Search

In [27]:
#doing

In [32]:
parameters={
    'max_depth':5,
    'n_estimators': 1000,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':1,
    'gamma':0,
    'reg_lambda':1,
}

In [33]:
param_grid={
    'n_estimators': [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000],
}

In [36]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 10 candidates, totalling 40 fits

Best parameters :
{'n_estimators': 100}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.335 m/s as root mean
Wind MAE:  0.252 m/s in avg
Wind MAPE:  2.942 %
Power RMSE:  154.059 kW as root mean
Power MAE:  94.495 kW in avg
Power MAPE:  6.563 %

Modelling errors for test set:
Wind RMSE:  0.448 m/s as root mean
Wind MAE:  0.336 m/s in avg
Wind MAPE:  3.988 %
Power RMSE:  208.807 kW as root mean
Power MAE:  129.997 kW in avg
Power MAPE:  9.315 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 0.7957025567690531 minutes ---

Lowest RMSE:  0.4592630938943637


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=1,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=1000, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,

In [37]:
parameters={
    'max_depth':5,
    'n_estimators': 100,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':1,
    'gamma':0,
    'reg_lambda':1,
}

In [38]:
param_grid={
    'max_depth': [3, 4, 5, 6, 10, 15, 20],
    'min_child_weight': [1, 2, 3, 6, 10]
}

In [39]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 35 candidates, totalling 140 fits

Best parameters :
{'max_depth': 3, 'min_child_weight': 10}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.392 m/s as root mean
Wind MAE:  0.291 m/s in avg
Wind MAPE:  3.394 %
Power RMSE:  176.698 kW as root mean
Power MAE:  107.652 kW in avg
Power MAPE:  7.649 %

Modelling errors for test set:
Wind RMSE:  0.443 m/s as root mean
Wind MAE:  0.33 m/s in avg
Wind MAPE:  3.895 %
Power RMSE:  204.803 kW as root mean
Power MAE:  127.529 kW in avg
Power MAPE:  9.005 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 1.139086918036143 minutes ---

Lowest RMSE:  0.4454775083012192


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=1,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=100, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,


In [40]:
parameters={
    'max_depth':3,
    'n_estimators': 100,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':10,
    'gamma':0,
    'reg_lambda':1,
}

In [41]:
param_grid={
    'gamma': [0, 0.1, 0.2, 0.3, 0.4, 0.5]
}

In [42]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 6 candidates, totalling 24 fits

Best parameters :
{'gamma': 0.2}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.392 m/s as root mean
Wind MAE:  0.292 m/s in avg
Wind MAPE:  3.404 %
Power RMSE:  176.621 kW as root mean
Power MAE:  107.699 kW in avg
Power MAPE:  7.667 %

Modelling errors for test set:
Wind RMSE:  0.442 m/s as root mean
Wind MAE:  0.33 m/s in avg
Wind MAPE:  3.896 %
Power RMSE:  203.96 kW as root mean
Power MAE:  127.06 kW in avg
Power MAPE:  9.005 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 0.07896660168965658 minutes ---

Lowest RMSE:  0.44535360509147437


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=3, min_child_weight=10,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=100, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,

In [43]:
parameters={
    'max_depth':3,
    'n_estimators': 100,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':10,
    'gamma':0.2,
    'reg_lambda':1,
}

In [44]:
param_grid={
    'subsample': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1],
    'colsample_bytree': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
}

In [45]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 49 candidates, totalling 196 fits

Best parameters :
{'colsample_bytree': 1, 'subsample': 0.8}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.386 m/s as root mean
Wind MAE:  0.288 m/s in avg
Wind MAPE:  3.368 %
Power RMSE:  175.613 kW as root mean
Power MAE:  107.102 kW in avg
Power MAPE:  7.582 %

Modelling errors for test set:
Wind RMSE:  0.437 m/s as root mean
Wind MAE:  0.327 m/s in avg
Wind MAPE:  3.844 %
Power RMSE:  203.626 kW as root mean
Power MAE:  126.681 kW in avg
Power MAPE:  8.829 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 0.47332452138264974 minutes ---

Lowest RMSE:  0.4326231726698581


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0.2, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=3, min_child_weight=10,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=100, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=Non

In [46]:
parameters={
    'max_depth':3,
    'n_estimators': 100,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':1,
    'min_child_weight':10,
    'gamma':0.2,
    'reg_lambda':1,
}

In [47]:
param_grid={
    'reg_lambda': [1, 10, 50, 100, 150]
}

In [48]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 5 candidates, totalling 20 fits

Best parameters :
{'reg_lambda': 1}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.386 m/s as root mean
Wind MAE:  0.288 m/s in avg
Wind MAPE:  3.368 %
Power RMSE:  175.613 kW as root mean
Power MAE:  107.102 kW in avg
Power MAPE:  7.582 %

Modelling errors for test set:
Wind RMSE:  0.437 m/s as root mean
Wind MAE:  0.327 m/s in avg
Wind MAPE:  3.844 %
Power RMSE:  203.626 kW as root mean
Power MAE:  126.681 kW in avg
Power MAPE:  8.829 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 0.07428447405497234 minutes ---

Lowest RMSE:  0.4326231726698581


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=1,
                                    gamma=0.2, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=3, min_child_weight=10,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=100, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,

In [49]:
parameters={
    'max_depth':3,
    'n_estimators': 100,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':1,
    'min_child_weight':10,
    'gamma':0.2,
    'reg_lambda':1,
}

In [50]:
param_grid={
    'learning_rate': [0.001, 0.01, 0.1, 0.5],
    'n_estimators': [800, 900, 1000, 1200, 1500, 1800, 2000]
}

In [51]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 28 candidates, totalling 112 fits

Best parameters :
{'learning_rate': 0.01, 'n_estimators': 900}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.388 m/s as root mean
Wind MAE:  0.289 m/s in avg
Wind MAPE:  3.376 %
Power RMSE:  175.944 kW as root mean
Power MAE:  107.24 kW in avg
Power MAPE:  7.6 %

Modelling errors for test set:
Wind RMSE:  0.436 m/s as root mean
Wind MAE:  0.325 m/s in avg
Wind MAPE:  3.832 %
Power RMSE:  202.906 kW as root mean
Power MAE:  126.303 kW in avg
Power MAPE:  8.832 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 3.486817304293315 minutes ---

Lowest RMSE:  0.4328982597364167


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=1,
                                    gamma=0.2, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=3, min_child_weight=10,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=100, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,

In [None]:
#pending

### Testing

In [52]:
parameters={
    'max_depth':3,
    'n_estimators': 900,
    'learning_rate':0.01,
    'subsample':0.8,
    'colsample_bytree':1,
    'min_child_weight':10,
    'gamma':0.2,
    'reg_lambda':1,
}

In [53]:
#modelling

In [56]:
model = modelling_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, plot_error=False, plot=True)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.388 m/s as root mean
Wind MAE:  0.289 m/s in avg
Wind MAPE:  3.376 %
Power RMSE:  175.902 kW as root mean
Power MAE:  107.245 kW in avg
Power MAPE:  7.602 %

Modelling errors for test set:
Wind RMSE:  0.436 m/s as root mean
Wind MAE:  0.325 m/s in avg
Wind MAPE:  3.834 %
Power RMSE:  202.9 kW as root mean
Power MAE:  126.323 kW in avg
Power MAPE:  8.839 %


Showing the results of the modelling: 
XGBoost modelling performed


In [57]:
#testing

In [58]:
WS_pred=model_testing (X_train_subset, X_test_subset, y_train, y_test, PC, model, plot_error=False)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.388 m/s as root mean
Wind MAE:  0.289 m/s in avg
Wind MAPE:  3.376 %
Power RMSE:  175.902 kW as root mean
Power MAE:  107.245 kW in avg
Power MAPE:  7.602 %

Modelling errors for test set:
Wind RMSE:  0.436 m/s as root mean
Wind MAE:  0.325 m/s in avg
Wind MAPE:  3.834 %
Power RMSE:  202.9 kW as root mean
Power MAE:  126.323 kW in avg
Power MAPE:  8.839 %


Showing the results of the modelling: 
XGBoost results performed


In [59]:
WS_pred=pd.DataFrame(WS_pred)
save(WS_pred,'\Results_','Subset1_XGB_WTG14.csv')

file Subset1_XGB_WTG14.csv saved in \Results_ folder


## Subset2

In [60]:
X_train_subset=subset_selection(X_train, 2)
X_test_subset=subset_selection(X_test, 2)

In [61]:
X_train_subset

Unnamed: 0,WS1,tod,TI,WSHor,WSVer,WDHor,WDVer
0,0.020975,1.000000,0.207488,0.029529,0.459364,0.053589,0.429742
1,0.065031,0.041958,0.124319,0.068877,0.321555,0.059900,0.275543
2,0.212434,0.006993,0.377173,0.216979,0.344857,0.909151,0.346802
3,0.871467,0.293706,0.202202,0.874755,0.500812,0.706084,0.448942
4,0.324850,0.762238,0.344330,0.332251,0.495464,0.804627,0.465200
...,...,...,...,...,...,...,...
3041,0.534532,0.419580,0.371217,0.544557,0.352688,0.702762,0.403211
3042,0.357270,0.874126,0.231236,0.368119,0.350110,0.402231,0.362418
3043,0.431474,0.867133,0.232592,0.442114,0.389075,0.667303,0.398147
3044,0.644727,0.993007,0.095348,0.641298,0.680546,0.961800,0.525524


### Grid Search

In [62]:
parameters={
    'max_depth':5,
    'n_estimators': 1000,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':1,
    'gamma':0,
    'reg_lambda':1,
}

In [63]:
param_grid={
    'n_estimators': [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000],
}

In [64]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 10 candidates, totalling 40 fits

Best parameters :
{'n_estimators': 200}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.202 m/s as root mean
Wind MAE:  0.156 m/s in avg
Wind MAPE:  1.822 %
Power RMSE:  95.332 kW as root mean
Power MAE:  59.681 kW in avg
Power MAPE:  4.046 %

Modelling errors for test set:
Wind RMSE:  0.363 m/s as root mean
Wind MAE:  0.275 m/s in avg
Wind MAPE:  3.275 %
Power RMSE:  172.116 kW as root mean
Power MAE:  108.18 kW in avg
Power MAPE:  7.617 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 1.196300486723582 minutes ---

Lowest RMSE:  0.36201385215579185


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=1,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=1000, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,

In [65]:
parameters={
    'max_depth':5,
    'n_estimators': 200,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':1,
    'gamma':0,
    'reg_lambda':1,
}

In [66]:
param_grid={
    'max_depth': [3, 4, 5, 6, 10, 15, 20],
    'min_child_weight': [1, 2, 3, 6, 10]
}

In [67]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 35 candidates, totalling 140 fits

Best parameters :
{'max_depth': 6, 'min_child_weight': 6}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.178 m/s as root mean
Wind MAE:  0.135 m/s in avg
Wind MAPE:  1.582 %
Power RMSE:  83.797 kW as root mean
Power MAE:  51.678 kW in avg
Power MAPE:  3.508 %

Modelling errors for test set:
Wind RMSE:  0.365 m/s as root mean
Wind MAE:  0.279 m/s in avg
Wind MAPE:  3.348 %
Power RMSE:  174.2 kW as root mean
Power MAE:  110.535 kW in avg
Power MAPE:  7.894 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 2.674429643154144 minutes ---

Lowest RMSE:  0.35927799143104017


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=1,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=200, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,


In [68]:
parameters={
    'max_depth':6,
    'n_estimators': 200,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':6,
    'gamma':0,
    'reg_lambda':1,
}

In [69]:
param_grid={
    'gamma': [0, 0.1, 0.2, 0.3, 0.4, 0.5]
}

In [70]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 6 candidates, totalling 24 fits

Best parameters :
{'gamma': 0}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.178 m/s as root mean
Wind MAE:  0.135 m/s in avg
Wind MAPE:  1.582 %
Power RMSE:  83.797 kW as root mean
Power MAE:  51.678 kW in avg
Power MAPE:  3.508 %

Modelling errors for test set:
Wind RMSE:  0.365 m/s as root mean
Wind MAE:  0.279 m/s in avg
Wind MAPE:  3.348 %
Power RMSE:  174.2 kW as root mean
Power MAE:  110.535 kW in avg
Power MAPE:  7.894 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 0.274212646484375 minutes ---

Lowest RMSE:  0.35927799143104017


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=6, min_child_weight=6,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=200, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,


In [72]:
parameters={
    'max_depth':6,
    'n_estimators': 200,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':6,
    'gamma':0,
    'reg_lambda':1,
}

In [73]:
param_grid={
    'subsample': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1],
    'colsample_bytree': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
}

In [74]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 49 candidates, totalling 196 fits

Best parameters :
{'colsample_bytree': 1, 'subsample': 0.8}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.172 m/s as root mean
Wind MAE:  0.132 m/s in avg
Wind MAPE:  1.548 %
Power RMSE:  82.674 kW as root mean
Power MAE:  51.721 kW in avg
Power MAPE:  3.459 %

Modelling errors for test set:
Wind RMSE:  0.363 m/s as root mean
Wind MAE:  0.273 m/s in avg
Wind MAPE:  3.282 %
Power RMSE:  173.789 kW as root mean
Power MAE:  108.927 kW in avg
Power MAPE:  7.761 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 2.0495995124181112 minutes ---

Lowest RMSE:  0.3572764464135117


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=6, min_child_weight=6,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=200, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,


In [75]:
parameters={
    'max_depth':6,
    'n_estimators': 200,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':1,
    'min_child_weight':6,
    'gamma':0,
    'reg_lambda':1,
}

In [76]:
param_grid={
    'reg_lambda': [1, 10, 50, 100, 150]
}

In [77]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 5 candidates, totalling 20 fits

Best parameters :
{'reg_lambda': 1}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.172 m/s as root mean
Wind MAE:  0.132 m/s in avg
Wind MAPE:  1.548 %
Power RMSE:  82.674 kW as root mean
Power MAE:  51.721 kW in avg
Power MAPE:  3.459 %

Modelling errors for test set:
Wind RMSE:  0.363 m/s as root mean
Wind MAE:  0.273 m/s in avg
Wind MAPE:  3.282 %
Power RMSE:  173.789 kW as root mean
Power MAE:  108.927 kW in avg
Power MAPE:  7.761 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 0.3472219189008077 minutes ---

Lowest RMSE:  0.3572764464135117


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=1,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=6, min_child_weight=6,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=200, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,
  

In [78]:
parameters={
    'max_depth':6,
    'n_estimators': 200,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':1,
    'min_child_weight':6,
    'gamma':0,
    'reg_lambda':1,
}

In [79]:
param_grid={
    'learning_rate': [0.001, 0.01, 0.1, 0.5],
    'n_estimators': [800, 900, 1000, 1200, 1500, 1800, 2000]
}

In [80]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 28 candidates, totalling 112 fits

Best parameters :
{'learning_rate': 0.01, 'n_estimators': 1800}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.175 m/s as root mean
Wind MAE:  0.134 m/s in avg
Wind MAPE:  1.566 %
Power RMSE:  83.527 kW as root mean
Power MAE:  51.886 kW in avg
Power MAPE:  3.502 %

Modelling errors for test set:
Wind RMSE:  0.36 m/s as root mean
Wind MAE:  0.273 m/s in avg
Wind MAPE:  3.273 %
Power RMSE:  173.229 kW as root mean
Power MAE:  108.63 kW in avg
Power MAPE:  7.729 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 15.248723193009694 minutes ---

Lowest RMSE:  0.35275409737429886


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=1,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=6, min_child_weight=6,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=200, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,
  

### Testing

In [81]:
parameters={
    'max_depth':6,
    'n_estimators': 1800,
    'learning_rate':0.01,
    'subsample':0.8,
    'colsample_bytree':1,
    'min_child_weight':6,
    'gamma':0,
    'reg_lambda':1,
}

In [82]:
#modelling

In [83]:
model = modelling_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, plot_error=False, plot=True)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.175 m/s as root mean
Wind MAE:  0.134 m/s in avg
Wind MAPE:  1.566 %
Power RMSE:  83.527 kW as root mean
Power MAE:  51.886 kW in avg
Power MAPE:  3.502 %

Modelling errors for test set:
Wind RMSE:  0.36 m/s as root mean
Wind MAE:  0.273 m/s in avg
Wind MAPE:  3.273 %
Power RMSE:  173.229 kW as root mean
Power MAE:  108.63 kW in avg
Power MAPE:  7.729 %


Showing the results of the modelling: 
XGBoost modelling performed


In [84]:
#testing

In [85]:
WS_pred=model_testing (X_train_subset, X_test_subset, y_train, y_test, PC, model, plot_error=False)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.175 m/s as root mean
Wind MAE:  0.134 m/s in avg
Wind MAPE:  1.566 %
Power RMSE:  83.527 kW as root mean
Power MAE:  51.886 kW in avg
Power MAPE:  3.502 %

Modelling errors for test set:
Wind RMSE:  0.36 m/s as root mean
Wind MAE:  0.273 m/s in avg
Wind MAPE:  3.273 %
Power RMSE:  173.229 kW as root mean
Power MAE:  108.63 kW in avg
Power MAPE:  7.729 %


Showing the results of the modelling: 
XGBoost results performed


In [86]:
WS_pred=pd.DataFrame(WS_pred)
save(WS_pred,'\Results_','Subset2_XGB_WTG14.csv')

file Subset2_XGB_WTG14.csv saved in \Results_ folder


## Subset3

In [87]:
X_train_subset=subset_selection(X_train, 3)
X_test_subset=subset_selection(X_test, 3)

In [88]:
X_train_subset

Unnamed: 0,WS1,tod,TI,WSHor,WSVer,WDHor,WDVer,WS4,WSH
0,0.020975,1.000000,0.207488,0.029529,0.459364,0.053589,0.429742,0.100394,0.439725
1,0.065031,0.041958,0.124319,0.068877,0.321555,0.059900,0.275543,0.163620,0.345484
2,0.212434,0.006993,0.377173,0.216979,0.344857,0.909151,0.346802,0.292307,0.340846
3,0.871467,0.293706,0.202202,0.874755,0.500812,0.706084,0.448942,0.871817,0.329727
4,0.324850,0.762238,0.344330,0.332251,0.495464,0.804627,0.465200,0.470751,0.195611
...,...,...,...,...,...,...,...,...,...
3041,0.534532,0.419580,0.371217,0.544557,0.352688,0.702762,0.403211,0.695452,0.173290
3042,0.357270,0.874126,0.231236,0.368119,0.350110,0.402231,0.362418,0.396343,0.379675
3043,0.431474,0.867133,0.232592,0.442114,0.389075,0.667303,0.398147,0.454500,0.385883
3044,0.644727,0.993007,0.095348,0.641298,0.680546,0.961800,0.525524,0.561681,0.483026


### Grid Search

In [90]:
parameters={
    'max_depth':5,
    'n_estimators': 1000,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':1,
    'gamma':0,
    'reg_lambda':1,
}

In [91]:
param_grid={
    'n_estimators': [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000],
}

In [92]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 10 candidates, totalling 40 fits

Best parameters :
{'n_estimators': 200}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.186 m/s as root mean
Wind MAE:  0.144 m/s in avg
Wind MAPE:  1.68 %
Power RMSE:  87.499 kW as root mean
Power MAE:  55.221 kW in avg
Power MAPE:  3.687 %

Modelling errors for test set:
Wind RMSE:  0.352 m/s as root mean
Wind MAE:  0.268 m/s in avg
Wind MAPE:  3.213 %
Power RMSE:  164.598 kW as root mean
Power MAE:  104.213 kW in avg
Power MAPE:  7.502 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 1.8370547731717428 minutes ---

Lowest RMSE:  0.36081730682083385


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=1,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=1000, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,

In [93]:
parameters={
    'max_depth':5,
    'n_estimators': 200,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':1,
    'gamma':0,
    'reg_lambda':1,
}

In [94]:
param_grid={
    'max_depth': [3, 4, 5, 6, 10, 15, 20],
    'min_child_weight': [1, 2, 3, 6, 10]
}

In [95]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 35 candidates, totalling 140 fits

Best parameters :
{'max_depth': 6, 'min_child_weight': 3}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.141 m/s as root mean
Wind MAE:  0.109 m/s in avg
Wind MAPE:  1.273 %
Power RMSE:  67.537 kW as root mean
Power MAE:  42.124 kW in avg
Power MAPE:  2.83 %

Modelling errors for test set:
Wind RMSE:  0.354 m/s as root mean
Wind MAE:  0.271 m/s in avg
Wind MAPE:  3.242 %
Power RMSE:  168.806 kW as root mean
Power MAE:  106.724 kW in avg
Power MAPE:  7.54 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 3.604090666770935 minutes ---

Lowest RMSE:  0.35591373353572137


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=1,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=200, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,


In [96]:
parameters={
    'max_depth':6,
    'n_estimators': 200,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':3,
    'gamma':0,
    'reg_lambda':1,
}

In [97]:
param_grid={
    'gamma': [0, 0.1, 0.2, 0.3, 0.4, 0.5]
}

In [98]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 6 candidates, totalling 24 fits

Best parameters :
{'gamma': 0.3}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.198 m/s as root mean
Wind MAE:  0.156 m/s in avg
Wind MAPE:  1.825 %
Power RMSE:  93.572 kW as root mean
Power MAE:  59.438 kW in avg
Power MAPE:  4.085 %

Modelling errors for test set:
Wind RMSE:  0.354 m/s as root mean
Wind MAE:  0.271 m/s in avg
Wind MAPE:  3.221 %
Power RMSE:  166.928 kW as root mean
Power MAE:  105.538 kW in avg
Power MAPE:  7.441 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 0.44061471621195475 minutes ---

Lowest RMSE:  0.35457068924550594


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=6, min_child_weight=3,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=200, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,


In [99]:
parameters={
    'max_depth':6,
    'n_estimators': 200,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':3,
    'gamma':0.3,
    'reg_lambda':1,
}

In [100]:
param_grid={
    'subsample': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1],
    'colsample_bytree': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
}

In [101]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 49 candidates, totalling 196 fits

Best parameters :
{'colsample_bytree': 1, 'subsample': 0.8}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.199 m/s as root mean
Wind MAE:  0.156 m/s in avg
Wind MAPE:  1.83 %
Power RMSE:  93.508 kW as root mean
Power MAE:  59.67 kW in avg
Power MAPE:  4.095 %

Modelling errors for test set:
Wind RMSE:  0.353 m/s as root mean
Wind MAE:  0.269 m/s in avg
Wind MAPE:  3.227 %
Power RMSE:  167.722 kW as root mean
Power MAE:  106.167 kW in avg
Power MAPE:  7.571 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 2.7542741020520527 minutes ---

Lowest RMSE:  0.3525659268433924


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0.3, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=6, min_child_weight=3,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=200, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None

In [102]:
parameters={
    'max_depth':6,
    'n_estimators': 200,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':1,
    'min_child_weight':3,
    'gamma':0.3,
    'reg_lambda':1,
}

In [103]:
param_grid={
    'reg_lambda': [1, 10, 50, 100, 150]
}

In [104]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 5 candidates, totalling 20 fits

Best parameters :
{'reg_lambda': 1}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.199 m/s as root mean
Wind MAE:  0.156 m/s in avg
Wind MAPE:  1.83 %
Power RMSE:  93.508 kW as root mean
Power MAE:  59.67 kW in avg
Power MAPE:  4.095 %

Modelling errors for test set:
Wind RMSE:  0.353 m/s as root mean
Wind MAE:  0.269 m/s in avg
Wind MAPE:  3.227 %
Power RMSE:  167.722 kW as root mean
Power MAE:  106.167 kW in avg
Power MAPE:  7.571 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 0.4252168854077657 minutes ---

Lowest RMSE:  0.3525659268433924


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=1,
                                    gamma=0.3, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=6, min_child_weight=3,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=200, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,


In [105]:
parameters={
    'max_depth':6,
    'n_estimators': 200,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':1,
    'min_child_weight':3,
    'gamma':0.3,
    'reg_lambda':1,
}

In [106]:
param_grid={
    'learning_rate': [0.001, 0.01, 0.1, 0.5],
    'n_estimators': [800, 900, 1000, 1200, 1500, 1800, 2000]
}

In [107]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 28 candidates, totalling 112 fits

Best parameters :
{'learning_rate': 0.01, 'n_estimators': 2000}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.198 m/s as root mean
Wind MAE:  0.155 m/s in avg
Wind MAPE:  1.823 %
Power RMSE:  93.675 kW as root mean
Power MAE:  59.618 kW in avg
Power MAPE:  4.105 %

Modelling errors for test set:
Wind RMSE:  0.349 m/s as root mean
Wind MAE:  0.266 m/s in avg
Wind MAPE:  3.18 %
Power RMSE:  165.062 kW as root mean
Power MAE:  104.17 kW in avg
Power MAPE:  7.39 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 14.323642186323802 minutes ---

Lowest RMSE:  0.3498289290791046


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=1,
                                    gamma=0.3, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=6, min_child_weight=3,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=200, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,


### Testing

In [108]:
parameters={
    'max_depth':6,
    'n_estimators': 2000,
    'learning_rate':0.01,
    'subsample':0.8,
    'colsample_bytree':1,
    'min_child_weight':3,
    'gamma':0.3,
    'reg_lambda':1,
}

In [109]:
#modelling

In [110]:
model = modelling_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, plot_error=False, plot=True)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.134 m/s as root mean
Wind MAE:  0.103 m/s in avg
Wind MAPE:  1.209 %
Power RMSE:  64.999 kW as root mean
Power MAE:  40.591 kW in avg
Power MAPE:  2.684 %

Modelling errors for test set:
Wind RMSE:  0.347 m/s as root mean
Wind MAE:  0.265 m/s in avg
Wind MAPE:  3.156 %
Power RMSE:  164.34 kW as root mean
Power MAE:  103.394 kW in avg
Power MAPE:  7.319 %


Showing the results of the modelling: 
XGBoost modelling performed


In [111]:
#testing

In [112]:
WS_pred=model_testing (X_train_subset, X_test_subset, y_train, y_test, PC, model, plot_error=False)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.134 m/s as root mean
Wind MAE:  0.103 m/s in avg
Wind MAPE:  1.209 %
Power RMSE:  64.999 kW as root mean
Power MAE:  40.591 kW in avg
Power MAPE:  2.684 %

Modelling errors for test set:
Wind RMSE:  0.347 m/s as root mean
Wind MAE:  0.265 m/s in avg
Wind MAPE:  3.156 %
Power RMSE:  164.34 kW as root mean
Power MAE:  103.394 kW in avg
Power MAPE:  7.319 %


Showing the results of the modelling: 
XGBoost results performed


In [113]:
WS_pred=pd.DataFrame(WS_pred)
save(WS_pred,'\Results_','Subset3_XGB_WTG14.csv')

file Subset3_XGB_WTG14.csv saved in \Results_ folder


## Subset4

In [114]:
X_train_subset=subset_selection(X_train, 4)
X_test_subset=subset_selection(X_test, 4)

In [115]:
X_train_subset

Unnamed: 0,WS1,tod,TI,WSHor,WSVer,WDHor,WDVer,WS4,WSH,WS3
0,0.020975,1.000000,0.207488,0.029529,0.459364,0.053589,0.429742,0.100394,0.439725,0.066975
1,0.065031,0.041958,0.124319,0.068877,0.321555,0.059900,0.275543,0.163620,0.345484,0.124902
2,0.212434,0.006993,0.377173,0.216979,0.344857,0.909151,0.346802,0.292307,0.340846,0.281079
3,0.871467,0.293706,0.202202,0.874755,0.500812,0.706084,0.448942,0.871817,0.329727,0.871425
4,0.324850,0.762238,0.344330,0.332251,0.495464,0.804627,0.465200,0.470751,0.195611,0.397760
...,...,...,...,...,...,...,...,...,...,...
3041,0.534532,0.419580,0.371217,0.544557,0.352688,0.702762,0.403211,0.695452,0.173290,0.616674
3042,0.357270,0.874126,0.231236,0.368119,0.350110,0.402231,0.362418,0.396343,0.379675,0.366648
3043,0.431474,0.867133,0.232592,0.442114,0.389075,0.667303,0.398147,0.454500,0.385883,0.450032
3044,0.644727,0.993007,0.095348,0.641298,0.680546,0.961800,0.525524,0.561681,0.483026,0.641783


### Grid Search

In [116]:
parameters={
    'max_depth':5,
    'n_estimators': 1000,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':1,
    'gamma':0,
    'reg_lambda':1,
}

In [117]:
param_grid={
    'n_estimators': [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000],
}

In [118]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 10 candidates, totalling 40 fits

Best parameters :
{'n_estimators': 300}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.142 m/s as root mean
Wind MAE:  0.109 m/s in avg
Wind MAPE:  1.28 %
Power RMSE:  69.271 kW as root mean
Power MAE:  43.17 kW in avg
Power MAPE:  2.843 %

Modelling errors for test set:
Wind RMSE:  0.357 m/s as root mean
Wind MAE:  0.275 m/s in avg
Wind MAPE:  3.279 %
Power RMSE:  168.726 kW as root mean
Power MAE:  107.252 kW in avg
Power MAPE:  7.569 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 1.5926148732503256 minutes ---

Lowest RMSE:  0.3619056053312274


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=1,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=1000, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,

In [119]:
parameters={
    'max_depth':5,
    'n_estimators': 300,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':1,
    'gamma':0,
    'reg_lambda':1,
}

In [120]:
param_grid={
    'max_depth': [3, 4, 5, 6, 10, 15, 20],
    'min_child_weight': [1, 2, 3, 6, 10]
}

In [121]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 35 candidates, totalling 140 fits

Best parameters :
{'max_depth': 6, 'min_child_weight': 6}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.115 m/s as root mean
Wind MAE:  0.088 m/s in avg
Wind MAPE:  1.025 %
Power RMSE:  55.78 kW as root mean
Power MAE:  34.294 kW in avg
Power MAPE:  2.275 %

Modelling errors for test set:
Wind RMSE:  0.355 m/s as root mean
Wind MAE:  0.274 m/s in avg
Wind MAPE:  3.268 %
Power RMSE:  168.214 kW as root mean
Power MAE:  106.863 kW in avg
Power MAPE:  7.562 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 5.213793996969859 minutes ---

Lowest RMSE:  0.35836722195897064


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=1,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=300, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,


In [122]:
parameters={
    'max_depth':6,
    'n_estimators': 300,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':6,
    'gamma':0,
    'reg_lambda':1,
}

In [123]:
param_grid={
    'gamma': [0, 0.1, 0.2, 0.3, 0.4, 0.5]
}

In [124]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 6 candidates, totalling 24 fits

Best parameters :
{'gamma': 0}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.115 m/s as root mean
Wind MAE:  0.088 m/s in avg
Wind MAPE:  1.025 %
Power RMSE:  55.78 kW as root mean
Power MAE:  34.294 kW in avg
Power MAPE:  2.275 %

Modelling errors for test set:
Wind RMSE:  0.355 m/s as root mean
Wind MAE:  0.274 m/s in avg
Wind MAPE:  3.268 %
Power RMSE:  168.214 kW as root mean
Power MAE:  106.863 kW in avg
Power MAPE:  7.562 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 0.6364447752634684 minutes ---

Lowest RMSE:  0.35836722195897064


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=6, min_child_weight=6,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=300, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,


In [125]:
parameters={
    'max_depth':6,
    'n_estimators': 300,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':6,
    'gamma':0,
    'reg_lambda':1,
}

In [126]:
param_grid={
    'subsample': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1],
    'colsample_bytree': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
}

In [127]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 49 candidates, totalling 196 fits

Best parameters :
{'colsample_bytree': 1, 'subsample': 0.7}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.116 m/s as root mean
Wind MAE:  0.088 m/s in avg
Wind MAPE:  1.033 %
Power RMSE:  56.307 kW as root mean
Power MAE:  34.592 kW in avg
Power MAPE:  2.293 %

Modelling errors for test set:
Wind RMSE:  0.356 m/s as root mean
Wind MAE:  0.272 m/s in avg
Wind MAPE:  3.25 %
Power RMSE:  165.673 kW as root mean
Power MAE:  105.112 kW in avg
Power MAPE:  7.537 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 4.505081057548523 minutes ---

Lowest RMSE:  0.3523409316903785


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=6, min_child_weight=6,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=300, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,


In [128]:
parameters={
    'max_depth':6,
    'n_estimators': 300,
    'learning_rate':0.1,
    'subsample':0.7,
    'colsample_bytree':1,
    'min_child_weight':6,
    'gamma':0,
    'reg_lambda':1,
}

In [129]:
param_grid={
    'reg_lambda': [1, 10, 50, 100, 150]
}

In [130]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 5 candidates, totalling 20 fits

Best parameters :
{'reg_lambda': 10}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.146 m/s as root mean
Wind MAE:  0.11 m/s in avg
Wind MAPE:  1.284 %
Power RMSE:  69.115 kW as root mean
Power MAE:  42.304 kW in avg
Power MAPE:  2.868 %

Modelling errors for test set:
Wind RMSE:  0.352 m/s as root mean
Wind MAE:  0.271 m/s in avg
Wind MAPE:  3.205 %
Power RMSE:  165.639 kW as root mean
Power MAE:  104.712 kW in avg
Power MAPE:  7.342 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 0.6080136458079021 minutes ---

Lowest RMSE:  0.3492455640009112


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=1,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=6, min_child_weight=6,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=300, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.7,
                                    tree_method=None, validate_parameters=None,
  

In [131]:
parameters={
    'max_depth':6,
    'n_estimators': 300,
    'learning_rate':0.1,
    'subsample':0.7,
    'colsample_bytree':1,
    'min_child_weight':6,
    'gamma':0,
    'reg_lambda':10,
}

In [132]:
param_grid={
    'learning_rate': [0.001, 0.01, 0.1, 0.5],
    'n_estimators': [800, 900, 1000, 1200, 1500, 1800, 2000]
}

In [133]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 28 candidates, totalling 112 fits

Best parameters :
{'learning_rate': 0.01, 'n_estimators': 2000}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.183 m/s as root mean
Wind MAE:  0.139 m/s in avg
Wind MAPE:  1.628 %
Power RMSE:  86.659 kW as root mean
Power MAE:  53.3 kW in avg
Power MAPE:  3.664 %

Modelling errors for test set:
Wind RMSE:  0.345 m/s as root mean
Wind MAE:  0.264 m/s in avg
Wind MAPE:  3.141 %
Power RMSE:  161.931 kW as root mean
Power MAE:  102.551 kW in avg
Power MAPE:  7.245 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 11.505776337782542 minutes ---

Lowest RMSE:  0.3437649573903065


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=1,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=6, min_child_weight=6,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=300, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=10,
                                    scale_pos_weight=None, subsample=0.7,
                                    tree_method=None, validate_parameters=None,
 

### Testing

In [134]:
parameters={
    'max_depth':6,
    'n_estimators': 2000,
    'learning_rate':0.01,
    'subsample':0.7,
    'colsample_bytree':1,
    'min_child_weight':6,
    'gamma':0,
    'reg_lambda':10,
}

In [135]:
#modelling

In [136]:
model = modelling_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, plot_error=False, plot=True)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.183 m/s as root mean
Wind MAE:  0.139 m/s in avg
Wind MAPE:  1.628 %
Power RMSE:  86.659 kW as root mean
Power MAE:  53.3 kW in avg
Power MAPE:  3.664 %

Modelling errors for test set:
Wind RMSE:  0.345 m/s as root mean
Wind MAE:  0.264 m/s in avg
Wind MAPE:  3.141 %
Power RMSE:  161.931 kW as root mean
Power MAE:  102.551 kW in avg
Power MAPE:  7.245 %


Showing the results of the modelling: 
XGBoost modelling performed


In [137]:
#testing

In [138]:
WS_pred=model_testing (X_train_subset, X_test_subset, y_train, y_test, PC, model, plot_error=False)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.183 m/s as root mean
Wind MAE:  0.139 m/s in avg
Wind MAPE:  1.628 %
Power RMSE:  86.659 kW as root mean
Power MAE:  53.3 kW in avg
Power MAPE:  3.664 %

Modelling errors for test set:
Wind RMSE:  0.345 m/s as root mean
Wind MAE:  0.264 m/s in avg
Wind MAPE:  3.141 %
Power RMSE:  161.931 kW as root mean
Power MAE:  102.551 kW in avg
Power MAPE:  7.245 %


Showing the results of the modelling: 
XGBoost results performed


In [139]:
WS_pred=pd.DataFrame(WS_pred)
save(WS_pred,'\Results_','Subset4_XGB_WTG14.csv')

file Subset4_XGB_WTG14.csv saved in \Results_ folder


## Subset5

In [140]:
X_train_subset=subset_selection(X_train, 5)
X_test_subset=subset_selection(X_test, 5)

In [141]:
X_train_subset

Unnamed: 0,WS1,tod,TI,WSHor,WSVer,WDHor,WDVer,WS4,WSH,WS3,WD4
0,0.020975,1.000000,0.207488,0.029529,0.459364,0.053589,0.429742,0.100394,0.439725,0.066975,0.586977
1,0.065031,0.041958,0.124319,0.068877,0.321555,0.059900,0.275543,0.163620,0.345484,0.124902,0.158576
2,0.212434,0.006993,0.377173,0.216979,0.344857,0.909151,0.346802,0.292307,0.340846,0.281079,0.631750
3,0.871467,0.293706,0.202202,0.874755,0.500812,0.706084,0.448942,0.871817,0.329727,0.871425,0.683743
4,0.324850,0.762238,0.344330,0.332251,0.495464,0.804627,0.465200,0.470751,0.195611,0.397760,0.750985
...,...,...,...,...,...,...,...,...,...,...,...
3041,0.534532,0.419580,0.371217,0.544557,0.352688,0.702762,0.403211,0.695452,0.173290,0.616674,0.651365
3042,0.357270,0.874126,0.231236,0.368119,0.350110,0.402231,0.362418,0.396343,0.379675,0.366648,0.448505
3043,0.431474,0.867133,0.232592,0.442114,0.389075,0.667303,0.398147,0.454500,0.385883,0.450032,0.603778
3044,0.644727,0.993007,0.095348,0.641298,0.680546,0.961800,0.525524,0.561681,0.483026,0.641783,0.673569


### Grid Search

In [142]:
parameters={
    'max_depth':5,
    'n_estimators': 1000,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':1,
    'gamma':0,
    'reg_lambda':1,
}

In [143]:
param_grid={
    'n_estimators': [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000],
}

In [144]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 10 candidates, totalling 40 fits

Best parameters :
{'n_estimators': 300}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.136 m/s as root mean
Wind MAE:  0.105 m/s in avg
Wind MAPE:  1.221 %
Power RMSE:  65.933 kW as root mean
Power MAE:  41.189 kW in avg
Power MAPE:  2.679 %

Modelling errors for test set:
Wind RMSE:  0.354 m/s as root mean
Wind MAE:  0.271 m/s in avg
Wind MAPE:  3.23 %
Power RMSE:  165.388 kW as root mean
Power MAE:  104.713 kW in avg
Power MAPE:  7.45 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 1.9844670017560324 minutes ---

Lowest RMSE:  0.3571250249849731


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=1,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=1000, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,

In [145]:
parameters={
    'max_depth':5,
    'n_estimators': 300,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':1,
    'gamma':0,
    'reg_lambda':1,
}

In [146]:
param_grid={
    'max_depth': [3, 4, 5, 6, 10, 15, 20],
    'min_child_weight': [1, 2, 3, 6, 10]
}

In [147]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 35 candidates, totalling 140 fits

Best parameters :
{'max_depth': 6, 'min_child_weight': 10}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.128 m/s as root mean
Wind MAE:  0.097 m/s in avg
Wind MAPE:  1.144 %
Power RMSE:  61.638 kW as root mean
Power MAE:  38.04 kW in avg
Power MAPE:  2.561 %

Modelling errors for test set:
Wind RMSE:  0.357 m/s as root mean
Wind MAE:  0.274 m/s in avg
Wind MAPE:  3.258 %
Power RMSE:  168.082 kW as root mean
Power MAE:  106.164 kW in avg
Power MAPE:  7.503 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 6.704281353950501 minutes ---

Lowest RMSE:  0.3551753986554067


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=1,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=300, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,


In [148]:
parameters={
    'max_depth':6,
    'n_estimators': 300,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':10,
    'gamma':0,
    'reg_lambda':1,
}

In [149]:
param_grid={
    'gamma': [0, 0.1, 0.2, 0.3, 0.4, 0.5]
}

In [150]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 6 candidates, totalling 24 fits

Best parameters :
{'gamma': 0}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.128 m/s as root mean
Wind MAE:  0.097 m/s in avg
Wind MAPE:  1.144 %
Power RMSE:  61.638 kW as root mean
Power MAE:  38.04 kW in avg
Power MAPE:  2.561 %

Modelling errors for test set:
Wind RMSE:  0.357 m/s as root mean
Wind MAE:  0.274 m/s in avg
Wind MAPE:  3.258 %
Power RMSE:  168.082 kW as root mean
Power MAE:  106.164 kW in avg
Power MAPE:  7.503 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 0.6011811335881551 minutes ---

Lowest RMSE:  0.3551753986554067


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=6, min_child_weight=10,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=300, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,

In [151]:
parameters={
    'max_depth':6,
    'n_estimators': 300,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':10,
    'gamma':0,
    'reg_lambda':1,
}

In [152]:
param_grid={
    'subsample': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1],
    'colsample_bytree': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
}

In [153]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 49 candidates, totalling 196 fits

Best parameters :
{'colsample_bytree': 1, 'subsample': 0.7}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.129 m/s as root mean
Wind MAE:  0.099 m/s in avg
Wind MAPE:  1.152 %
Power RMSE:  62.448 kW as root mean
Power MAE:  38.426 kW in avg
Power MAPE:  2.546 %

Modelling errors for test set:
Wind RMSE:  0.352 m/s as root mean
Wind MAE:  0.267 m/s in avg
Wind MAPE:  3.193 %
Power RMSE:  166.226 kW as root mean
Power MAE:  104.752 kW in avg
Power MAPE:  7.415 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 4.292648100852967 minutes ---

Lowest RMSE:  0.34839338967216055


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=6, min_child_weight=10,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=300, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,

In [154]:
parameters={
    'max_depth':6,
    'n_estimators': 300,
    'learning_rate':0.1,
    'subsample':0.7,
    'colsample_bytree':1,
    'min_child_weight':10,
    'gamma':0,
    'reg_lambda':1,
}

In [155]:
param_grid={
    'reg_lambda': [1, 10, 50, 100, 150]
}

In [156]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 5 candidates, totalling 20 fits

Best parameters :
{'reg_lambda': 10}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.15 m/s as root mean
Wind MAE:  0.113 m/s in avg
Wind MAPE:  1.32 %
Power RMSE:  71.253 kW as root mean
Power MAE:  43.367 kW in avg
Power MAPE:  2.945 %

Modelling errors for test set:
Wind RMSE:  0.353 m/s as root mean
Wind MAE:  0.271 m/s in avg
Wind MAPE:  3.213 %
Power RMSE:  165.432 kW as root mean
Power MAE:  104.388 kW in avg
Power MAPE:  7.349 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 0.5717172225316366 minutes ---

Lowest RMSE:  0.34814951482159107


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=1,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=6, min_child_weight=10,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=300, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.7,
                                    tree_method=None, validate_parameters=None,
 

In [157]:
parameters={
    'max_depth':6,
    'n_estimators': 300,
    'learning_rate':0.1,
    'subsample':0.7,
    'colsample_bytree':1,
    'min_child_weight':10,
    'gamma':0,
    'reg_lambda':10,
}

In [158]:
param_grid={
    'learning_rate': [0.001, 0.01, 0.1, 0.5],
    'n_estimators': [800, 900, 1000, 1200, 1500, 1800, 2000]
}

In [159]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 28 candidates, totalling 112 fits

Best parameters :
{'learning_rate': 0.01, 'n_estimators': 2000}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.187 m/s as root mean
Wind MAE:  0.142 m/s in avg
Wind MAPE:  1.659 %
Power RMSE:  88.067 kW as root mean
Power MAE:  54.081 kW in avg
Power MAPE:  3.724 %

Modelling errors for test set:
Wind RMSE:  0.342 m/s as root mean
Wind MAE:  0.26 m/s in avg
Wind MAPE:  3.099 %
Power RMSE:  160.2 kW as root mean
Power MAE:  100.927 kW in avg
Power MAPE:  7.157 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 10.911642591158548 minutes ---

Lowest RMSE:  0.3411606157514566


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=1,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=6, min_child_weight=10,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=300, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=10,
                                    scale_pos_weight=None, subsample=0.7,
                                    tree_method=None, validate_parameters=None,


### Testing

In [160]:
parameters={
    'max_depth':6,
    'n_estimators': 2000,
    'learning_rate':0.01,
    'subsample':0.7,
    'colsample_bytree':1,
    'min_child_weight':10,
    'gamma':0,
    'reg_lambda':10,
}

In [161]:
#modelling

In [162]:
model = modelling_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, plot_error=False, plot=True)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.187 m/s as root mean
Wind MAE:  0.142 m/s in avg
Wind MAPE:  1.659 %
Power RMSE:  88.067 kW as root mean
Power MAE:  54.081 kW in avg
Power MAPE:  3.724 %

Modelling errors for test set:
Wind RMSE:  0.342 m/s as root mean
Wind MAE:  0.26 m/s in avg
Wind MAPE:  3.099 %
Power RMSE:  160.2 kW as root mean
Power MAE:  100.927 kW in avg
Power MAPE:  7.157 %


Showing the results of the modelling: 
XGBoost modelling performed


In [163]:
#testing

In [164]:
WS_pred=model_testing (X_train_subset, X_test_subset, y_train, y_test, PC, model, plot_error=False)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.187 m/s as root mean
Wind MAE:  0.142 m/s in avg
Wind MAPE:  1.659 %
Power RMSE:  88.067 kW as root mean
Power MAE:  54.081 kW in avg
Power MAPE:  3.724 %

Modelling errors for test set:
Wind RMSE:  0.342 m/s as root mean
Wind MAE:  0.26 m/s in avg
Wind MAPE:  3.099 %
Power RMSE:  160.2 kW as root mean
Power MAE:  100.927 kW in avg
Power MAPE:  7.157 %


Showing the results of the modelling: 
XGBoost results performed


In [165]:
WS_pred=pd.DataFrame(WS_pred)
save(WS_pred,'\Results_','Subset5_XGB_WTG14.csv')

file Subset5_XGB_WTG14.csv saved in \Results_ folder


## Subset6

In [166]:
X_train_subset=subset_selection(X_train, 6)
X_test_subset=subset_selection(X_test, 6)

In [167]:
X_train_subset

Unnamed: 0,WS1,tod,TI,WSHor,WSVer,WDHor,WDVer,WS4,WSH,WS3,WD4,WD1,WVeer
0,0.020975,1.000000,0.207488,0.029529,0.459364,0.053589,0.429742,0.100394,0.439725,0.066975,0.586977,0.057697,0.094759
1,0.065031,0.041958,0.124319,0.068877,0.321555,0.059900,0.275543,0.163620,0.345484,0.124902,0.158576,0.063196,0.607850
2,0.212434,0.006993,0.377173,0.216979,0.344857,0.909151,0.346802,0.292307,0.340846,0.281079,0.631750,0.910616,0.678993
3,0.871467,0.293706,0.202202,0.874755,0.500812,0.706084,0.448942,0.871817,0.329727,0.871425,0.683743,0.711110,0.468120
4,0.324850,0.762238,0.344330,0.332251,0.495464,0.804627,0.465200,0.470751,0.195611,0.397760,0.750985,0.815900,0.466545
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3041,0.534532,0.419580,0.371217,0.544557,0.352688,0.702762,0.403211,0.695452,0.173290,0.616674,0.651365,0.712085,0.507316
3042,0.357270,0.874126,0.231236,0.368119,0.350110,0.402231,0.362418,0.396343,0.379675,0.366648,0.448505,0.408939,0.521778
3043,0.431474,0.867133,0.232592,0.442114,0.389075,0.667303,0.398147,0.454500,0.385883,0.450032,0.603778,0.676574,0.537315
3044,0.644727,0.993007,0.095348,0.641298,0.680546,0.961800,0.525524,0.561681,0.483026,0.641783,0.673569,0.964112,0.669288


### Grid Search

In [168]:
parameters={
    'max_depth':5,
    'n_estimators': 1000,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':1,
    'gamma':0,
    'reg_lambda':1,
}

In [169]:
param_grid={
    'n_estimators': [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000],
}

In [170]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 10 candidates, totalling 40 fits

Best parameters :
{'n_estimators': 200}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.171 m/s as root mean
Wind MAE:  0.131 m/s in avg
Wind MAPE:  1.538 %
Power RMSE:  82.243 kW as root mean
Power MAE:  51.217 kW in avg
Power MAPE:  3.414 %

Modelling errors for test set:
Wind RMSE:  0.357 m/s as root mean
Wind MAE:  0.273 m/s in avg
Wind MAPE:  3.248 %
Power RMSE:  167.86 kW as root mean
Power MAE:  106.546 kW in avg
Power MAPE:  7.517 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 1.9120225667953492 minutes ---

Lowest RMSE:  0.35555652902532325


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=1,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=1000, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,

In [171]:
parameters={
    'max_depth':5,
    'n_estimators': 200,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':1,
    'gamma':0,
    'reg_lambda':1,
}

In [172]:
param_grid={
    'max_depth': [3, 4, 5, 6, 10, 15, 20],
    'min_child_weight': [1, 2, 3, 6, 10]
}

In [173]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 35 candidates, totalling 140 fits

Best parameters :
{'max_depth': 6, 'min_child_weight': 10}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.165 m/s as root mean
Wind MAE:  0.126 m/s in avg
Wind MAPE:  1.475 %
Power RMSE:  79.177 kW as root mean
Power MAE:  48.808 kW in avg
Power MAPE:  3.293 %

Modelling errors for test set:
Wind RMSE:  0.353 m/s as root mean
Wind MAE:  0.269 m/s in avg
Wind MAPE:  3.204 %
Power RMSE:  164.773 kW as root mean
Power MAE:  104.848 kW in avg
Power MAPE:  7.385 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 4.2480987191200255 minutes ---

Lowest RMSE:  0.3531827737341212


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=1,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=200, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,


In [174]:
parameters={
    'max_depth':6,
    'n_estimators': 200,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':10,
    'gamma':0,
    'reg_lambda':1,
}

In [175]:
param_grid={
    'gamma': [0, 0.1, 0.2, 0.3, 0.4, 0.5]
}

In [176]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 6 candidates, totalling 24 fits

Best parameters :
{'gamma': 0.1}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.167 m/s as root mean
Wind MAE:  0.129 m/s in avg
Wind MAPE:  1.511 %
Power RMSE:  80.47 kW as root mean
Power MAE:  49.934 kW in avg
Power MAPE:  3.394 %

Modelling errors for test set:
Wind RMSE:  0.351 m/s as root mean
Wind MAE:  0.268 m/s in avg
Wind MAPE:  3.214 %
Power RMSE:  164.121 kW as root mean
Power MAE:  104.547 kW in avg
Power MAPE:  7.483 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 0.44519558747609456 minutes ---

Lowest RMSE:  0.35316384399285505


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=6, min_child_weight=10,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=200, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,

In [178]:
parameters={
    'max_depth':6,
    'n_estimators': 200,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':10,
    'gamma':0.1,
    'reg_lambda':1,
}

In [179]:
param_grid={
    'subsample': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1],
    'colsample_bytree': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
}

In [180]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 49 candidates, totalling 196 fits

Best parameters :
{'colsample_bytree': 0.8, 'subsample': 0.8}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.167 m/s as root mean
Wind MAE:  0.129 m/s in avg
Wind MAPE:  1.511 %
Power RMSE:  80.47 kW as root mean
Power MAE:  49.934 kW in avg
Power MAPE:  3.394 %

Modelling errors for test set:
Wind RMSE:  0.351 m/s as root mean
Wind MAE:  0.268 m/s in avg
Wind MAPE:  3.214 %
Power RMSE:  164.121 kW as root mean
Power MAE:  104.547 kW in avg
Power MAPE:  7.483 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 3.020140246550242 minutes ---

Lowest RMSE:  0.35316384399285505


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0.1, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=6, min_child_weight=10,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=200, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=Non

In [181]:
parameters={
    'max_depth':6,
    'n_estimators': 200,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':10,
    'gamma':0.1,
    'reg_lambda':1,
}

In [182]:
param_grid={
    'reg_lambda': [1, 10, 50, 100, 150]
}

In [183]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 5 candidates, totalling 20 fits

Best parameters :
{'reg_lambda': 1}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.167 m/s as root mean
Wind MAE:  0.129 m/s in avg
Wind MAPE:  1.511 %
Power RMSE:  80.47 kW as root mean
Power MAE:  49.934 kW in avg
Power MAPE:  3.394 %

Modelling errors for test set:
Wind RMSE:  0.351 m/s as root mean
Wind MAE:  0.268 m/s in avg
Wind MAPE:  3.214 %
Power RMSE:  164.121 kW as root mean
Power MAE:  104.547 kW in avg
Power MAPE:  7.483 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 0.34452070395151774 minutes ---

Lowest RMSE:  0.35316384399285505


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0.1, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=6, min_child_weight=10,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=200, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=Non

In [184]:
parameters={
    'max_depth':6,
    'n_estimators': 200,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':10,
    'gamma':0.1,
    'reg_lambda':1,
}

In [185]:
param_grid={
    'learning_rate': [0.001, 0.01, 0.1, 0.5],
    'n_estimators': [800, 900, 1000, 1200, 1500, 1800, 2000]
}

In [186]:
GridSearch_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 28 candidates, totalling 112 fits

Best parameters :
{'learning_rate': 0.01, 'n_estimators': 2000}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.164 m/s as root mean
Wind MAE:  0.125 m/s in avg
Wind MAPE:  1.465 %
Power RMSE:  79.057 kW as root mean
Power MAE:  48.738 kW in avg
Power MAPE:  3.286 %

Modelling errors for test set:
Wind RMSE:  0.346 m/s as root mean
Wind MAE:  0.263 m/s in avg
Wind MAPE:  3.14 %
Power RMSE:  162.677 kW as root mean
Power MAE:  102.312 kW in avg
Power MAPE:  7.296 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 12.168057203292847 minutes ---

Lowest RMSE:  0.3488178010390744


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0.1, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=6, min_child_weight=10,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=200, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=Non

### Testing

In [187]:
parameters={
    'max_depth':6,
    'n_estimators': 2000,
    'learning_rate':0.01,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':10,
    'gamma':0.1,
    'reg_lambda':1,
}

In [188]:
#modelling

In [189]:
model = modelling_XGBoost (X_train_subset, X_test_subset, y_train, y_test, PC, parameters, plot_error=False, plot=True)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.157 m/s as root mean
Wind MAE:  0.12 m/s in avg
Wind MAPE:  1.399 %
Power RMSE:  76.214 kW as root mean
Power MAE:  46.688 kW in avg
Power MAPE:  3.129 %

Modelling errors for test set:
Wind RMSE:  0.346 m/s as root mean
Wind MAE:  0.263 m/s in avg
Wind MAPE:  3.14 %
Power RMSE:  162.157 kW as root mean
Power MAE:  102.381 kW in avg
Power MAPE:  7.3 %


Showing the results of the modelling: 
XGBoost modelling performed


In [190]:
#testing

In [191]:
WS_pred=model_testing (X_train_subset, X_test_subset, y_train, y_test, PC, model, plot_error=False)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.157 m/s as root mean
Wind MAE:  0.12 m/s in avg
Wind MAPE:  1.399 %
Power RMSE:  76.214 kW as root mean
Power MAE:  46.688 kW in avg
Power MAPE:  3.129 %

Modelling errors for test set:
Wind RMSE:  0.346 m/s as root mean
Wind MAE:  0.263 m/s in avg
Wind MAPE:  3.14 %
Power RMSE:  162.157 kW as root mean
Power MAE:  102.381 kW in avg
Power MAPE:  7.3 %


Showing the results of the modelling: 
XGBoost results performed


In [192]:
WS_pred=pd.DataFrame(WS_pred)
save(WS_pred,'\Results_','Subset6_XGB_WTG14.csv')

file Subset6_XGB_WTG14.csv saved in \Results_ folder
