# Packages

In [1]:
#basic packages
import time
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os

#data pre-processing packages
from datetime import datetime


#results and analysis packages
from sklearn.metrics import mean_absolute_percentage_error as mape
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
# data modelling & results
from yellowbrick.regressor import PredictionError, ResidualsPlot
from sklearn.model_selection import train_test_split
import math as math

#NN
import xgboost as xgb

from tensorflow import keras
from keras.utils.vis_utils import plot_model
from scipy.stats import reciprocal
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV

#feature importance
import shap


# Script

## Error computation

In [3]:
#defining the Root Mean Squared Error

def rmse(y_true, y_predicted):
    
    return np.sqrt(mean_squared_error(y_true, y_predicted))

In [4]:
#errors computation

def errors_computation(data):
    
    df=pd.DataFrame()
    #df.at['RMSE (as root mean)', 'Wind']= round(rmse(data['Target'], data['WS_pred']), 3)
    df.at['MAE (in avg)', 'Wind']= round(mae(data['Target'], data['WS_pred']), 3)
    df.at['MAPE (%)', 'Wind']= round(mape(data['Target'], data['WS_pred'])*100, 3)
    
    #df.at['RMSE (as root mean)', 'Power']= round(rmse(data['P'], data['P_pred']), 3)
    df.at['MAE (in avg)', 'Power']= round(mae(data['P'], data['P_pred']), 3)
    df.at['MAPE (%)', 'Power']= round(mape(data['P'], data['P_pred'])*100, 3)
    
    
    print('Wind RMSE: ', round(rmse(data['Target'], data['WS_pred']), 3), 'm/s as root mean')
    print('Wind MAE: ', round(mae(data['Target'], data['WS_pred']), 3), 'm/s in avg')
    print('Wind MAPE: ', round(mape(data['Target'], data['WS_pred'])*100, 3), '%')
    
    print('Power RMSE: ', round(rmse(data['P'], data['P_pred']), 3), 'kW as root mean')
    print('Power MAE: ', round(mae(data['P'], data['P_pred']), 3), 'kW in avg')
    print('Power MAPE: ', round(mape(data['P'], data['P_pred'])*100, 3), '%')
    
    return df

In [5]:
def error_plot(data, title):
    
    #title is expected to be an str
    #WS_pred and Target should be the variables names

    #plotting the reference
    plt.figure(figsize=(12,8))
    plt.plot([-1,17.5],[-1,17.5], 'green', linewidth=4, alpha=.12)
    plt.plot(data['WS_pred'], data['Target'], marker='o', ls='', label='Regression', markersize=5, alpha=.1)


    plt.legend()

    ax=plt.gca()
    ax.set(xlabel='y predicted', ylabel='y actual');
    ax.set_title(title)
    ax.set_ylim(ymin=4, ymax=17.5)
    ax.set_xlim(xmin=4, xmax=17.5)
    
    return print('')

In [6]:
def powercurve_computation(data, power_curve):
    
    from scipy import interpolate
    
    #this function computes the power at a observation given the information at a observation:
    # the WS (in m/s) at the wind turbine location and at the hub height (Target)
    # the power curve of the wind turbine in an xslx
    
    
    x=power_curve['Wind Speed [m/s]']
    y=power_curve['Warranted Power Curve [kW]']
    x_new=data['Target']
    
    f = interpolate.interp1d(x, y)
    #, kind='linear'
    data['P']=f(x_new)
    
    if 'WS_pred' in data.keys():
        x_new2=data['WS_pred']
        data['P_pred']=f(x_new2)
    
    print('power curve computation performed')
    
    return data

In [7]:
def control_power_computation (data_test, data_train, power_curve):
    
    results_test=pd.DataFrame()
    results_train=pd.DataFrame()
    
    
    results_test=powercurve_computation(data_test, power_curve)
    results_train=powercurve_computation(data_train, power_curve)

    return results_test, results_train

In [8]:
def compute_results(data_test, data_train, power_curve, plot_error):
    
    #this function computes and plots the results of a modelling:

    results_test, results_train=control_power_computation (data_test, data_train, power_curve)
    
    
    print('Modelling errors for training set:')
    errors_computation(results_train)
    print('')
    print('Modelling errors for test set:')
    errors_computation(results_test)
    print('')
    
    if plot_error:
        print('')
        error_plot(results_test, 'Error plot for test set wind speed')

    print('')
    return print('Showing the results of the modelling: ')

In [9]:
def compute_results1(data_test, data_train, power_curve, plot_error):
    
    #this function computes and plots the results of a modelling:

    results_test, results_train=control_power_computation (data_test, data_train, power_curve)
    
    
    print('Modelling errors for training set:')
    errors_computation(results_train)
    print('')
    print('Modelling errors for test set:')
    errors_computation(results_test)
    print('')
    print('Modelling errors in AEP terms for test set')
    data=AEP_computation(results_test, power_curve)
    
    if plot_error:
        print('')
        error_plot(results_test, 'Error plot for test set wind speed')

    print('')
    print('Showing the results of the modelling: ')
    return data

In [10]:
def AEP_computation(data, power_curve):
    
    #binning the WS_Pred
    data['WS_pred_bin']=pd.cut(data['WS_pred'],
                                       bins=list(np.arange(-0.25,28.0,0.5)),
                                                 labels=list(np.arange(0.0,28.0,0.5)))

        
    #creating the AEP (Annual Energy Production table)
        
    AEP_table=pd.DataFrame()
    
    AEP_table['WS']=PC['Wind Speed [m/s]']
    AEP_table['WPC']=PC['Warranted Power Curve [kW]']
    AEP_table.set_index('WS', inplace=True)
    AEP_table['MPC_SC']=data.groupby('WS_pred_bin', as_index=True)['P'].mean()

    #puedo rellenar los NaN de MPC_SC copiando lo que tendría que ser: WPC
    
#     for i in AEP_table.index:
#         if math.isnan(AEP_table.at[i, 'MPC_SC']):
#             AEP_table.at[i, 'MPC_SC']=AEP_table.at[i, 'WPC']
    
    AEP_table['WSAD']=power_curve['Wind Speed Distribution [Hr/Year]']
    
    #o igual aquí tendría que cortar la tabla para NaN values
    
#     AEP_table=AEP_table.dropna()
    
    AEP_table['WAEP']=AEP_table['WPC']*AEP_table['WSAD']
    AEP_table['MAEP_SC']=AEP_table['MPC_SC']*AEP_table['WSAD']
    
    #computing the metrics for later printing
    
    MAEP_SC=AEP_table['MAEP_SC'].sum()
    WAEP=AEP_table['WAEP'].sum()
    AEP_perc=(MAEP_SC/WAEP)*100
    AEP_diff=(MAEP_SC-WAEP)/1000
    
    #printing the results

    print('')
    print('The computed AEP_table is: ')
    AEP_table(AEP_table)
    print('')
    print('The AEP% of the Site Calibration is: ', round(AEP_perc, 3), '%')
    print('')
    print('The AEP difference of the Site Calibration is: ', round(AEP_diff, 3), 'MWh')
    print('')
    AEP_plot(data, AEP_table)                                
    
    
    return data

In [11]:
def AEP_plot(data, AEP_table):
    
    #data is expected to contain: Target, WS_pred, P, P_pred
    #AEP_table is expected to contain: WS, WPC, MPC-SC

    #plotting the Power curves
    plt.figure(figsize=(10,6))
    plt.plot(data['WS_pred'], data['P'], marker='o', ls='', label='measured scatter', markersize=5, alpha=.1)
    plt.plot(AEP_table.index, AEP_table['WPC'], marker='o', color='black', linewidth=0.5, label='WPC', markersize=5)
    plt.plot(AEP_table.index, AEP_table['MPC_SC'], marker='o', color='red', linewidth=0.5, label='MPC-SC', markersize=5)


    plt.legend()

    ax=plt.gca()
    ax.set(ylabel='Power interpolated with WPC and Target WS (kW)', xlabel='Predicted Wind Speed (m/s)');
    ax.set_title('Site Calibration Power Curve')
    ax.set_xlim(xmin=0, xmax=20)
#     ax.set_xlim(xmin=0, xmax=27.5)
    
    return print('Error plot performed')

In [12]:
def AEP_table(AEP_table):
    
    #AEP_table is expected to contain: WPC, MPC_SC, WPC, WSAD, WAEP, MAEP
    
    WS= AEP_table.index
    WPC=AEP_table['WPC']
    MPC_SC=round(AEP_table['MPC_SC'],0)
    WSAD=AEP_table['WASD']
    WAEP=round(AEP_table['WAEP']/1000000, 3)
    MAEP=round(AEP_table['MAEP']/1000000, 3)
    

    row_headers=None
    column_headers=('Wind speed bin (m/s)', 'WPC (kW)', 'MPC_SC (kW)',
                    'Annual Wind Distribution (h/year)', 'WAEP (GWh)', 'MAEP (GWh)')

    rcolors = plt.cm.BuPu(np.full(len(row_headers), 0.1))
    ccolors = plt.cm.BuPu(np.full(len(column_headers), 0.1))

    l_2d=[list(WS), list(WPC), list(MPC), list(WSAD), list(WAEP), list(MAEP)]

    cell_text=np.array(l_2d).T

    fig, ax = plt.subplots(figsize=(12,5)) 
    ax.set_axis_off()

    the_table=ax.table(cellText=cell_text,
                      rowLabels=row_headers,
                      rowColours=rcolors,
                      rowLoc='right',
                      colColours=ccolors,
                      colLabels=column_headers,
                      cellLoc ='center',
                      loc='upper center',
                      colWidths = [0.1] * 5)

    ax.set_title('Annual Energy Production for Site Calibration PC', 
                 fontweight ="bold", fontsize=14) 

    the_table.auto_set_font_size(False)
    the_table.set_fontsize(14)
    the_table.scale(2, 2)
    
    return ('AEP table printed')

## Data uploading

In [13]:
def uploading_csv(file_folder,file_name):
    
    #file folder required
    #file name required
    #file is expected to be in the data root: r'C:\Users\irgaa\Irma\Data'
    #this function uploads and formats csv/txt/xlsx datasets into DataFrame
    
    
    data_root=r'C:\Users\irgaa\Irma\Data'
    data_folder=str(file_folder)
    data_file=str(file_name)
    
    data_path=data_root+data_folder+data_file
    
    data1 = pd.read_csv(data_path)

    
    # We will save the WD_bin as the index
    
    return data1

In [14]:
#this function saves a data csv

def save (data, file_folder,file_name):
    
    #file folder required
    #file name required
    #file is expected to be saved in the data root: r'C:\Users\irgaa\Irma\Data'
    #this function saves a csv/txt/xlsx into Irma's folder
    #the saved file will keep the columns names but not the index
    
    data_root=r'C:\Users\irgaa\Irma\Data'
    data_folder=str(file_folder)
    data_file=str(file_name)
    
    data_path=data_root+data_folder+data_file
    
    data.to_csv (data_path, index = False, header=True)
    
    
    return print('file', data_file, 'saved in', data_folder, 'folder')

## Data selection

In [15]:
def data_selection(X_train, X_test, inputs):
    
    #this function returns the columns of the training and test sets in the inputs list
    
    X_train1 = pd.DataFrame()
    X_test1 = pd.DataFrame()
    
    
    X_train1 = X_train[inputs]
    X_test1 = X_test[inputs]

    
    return X_train1,X_test1

In [16]:
def data_drop(X_train, X_test, list_2drop):
    
    #this function returns the columns of the training and test sets in the inputs list

    X_train1 = X_train.drop(columns=list_2drop)
    X_test1 = X_test.drop(columns=list_2drop)

    
    
    return X_train1,X_test1

## Modelling

### Modelling XGBoost

In [17]:
def modelling_XGBoost (X, X_test, y, y_test, power_curve,  parameters, plot_error, plot):
    
    #creating the model
    max_depth=parameters['max_depth']
    n_estimators=parameters['n_estimators']
    learning_rate=parameters['learning_rate']
    subsample=parameters['subsample']
    colsample_bytree=parameters['colsample_bytree']
    min_child_weight=parameters['min_child_weight']
    gamma=parameters['gamma']
    reg_lambda=parameters['reg_lambda']
    
 
    model = xgb.XGBRegressor(max_depth=max_depth, n_estimators=n_estimators, random_state=42,
                               objective='reg:squarederror', booster='gbtree', learning_rate=learning_rate,
                               subsample=subsample, colsample_bytree=colsample_bytree,
                             min_child_weight=min_child_weight, reg_lambda=reg_lambda)
                               
    #model fitting
    model.fit(X_train, y_train)
        
    
    #model predicting
    y_pred_test=model.predict(X_test)
    y_pred_train=model.predict(X_train)
    
    test=pd.DataFrame(y_pred_test, columns = ['test'])
    train=pd.DataFrame(y_pred_train, columns = ['train'])
    
    

    #computing the results
    data_test = pd.DataFrame()
    data_train = pd.DataFrame()
    
    data_test['WS_pred']=test['test']
    data_test['Target']=y_test['Target']
    data_train['WS_pred']=train['train']
    data_train['Target']=y['Target']
    
    
    data=compute_results(data_test, data_train, power_curve, plot_error)
    print('XGBoost modelling performed')
    
    return model

### Grid Search XGBoost

In [18]:
def GridSearch_XGBoost (X, X_test, y, y_test, power_curve, parameters, param_grid, plot_error):
    
    
    #counting the runing time
    start_time = time.time()
    
    #creating the model
    max_depth=parameters['max_depth']
    n_estimators=parameters['n_estimators']
    learning_rate=parameters['learning_rate']
    subsample=parameters['subsample']
    colsample_bytree=parameters['colsample_bytree']
    min_child_weight=parameters['min_child_weight']
    gamma=parameters['gamma']
    reg_lambda=parameters['reg_lambda']
    
    #creating the model
    xgbr = xgb.XGBRegressor(max_depth=max_depth, n_estimators=n_estimators, random_state=42,
                           objective='reg:squarederror', booster='gbtree', learning_rate=learning_rate,
                           subsample=subsample, colsample_bytree=colsample_bytree,
                         min_child_weight=min_child_weight, gamma=gamma, reg_lambda=reg_lambda)
    
    
    
    #Grid Search CV
    clf = GridSearchCV(estimator=xgbr, 
                             param_grid=param_grid,
                             cv=4,
                             scoring='neg_mean_squared_error',
                             verbose=1) 
    
        
    #model fitting
    clf.fit(X_train, y_train)
    
    
    #model predicting
    
    y_pred_test=clf.predict(X_test)
    y_pred_train=clf.predict(X_train)
    
    test=pd.DataFrame(y_pred_test, columns = ['test'])
    train=pd.DataFrame(y_pred_train, columns = ['train'])
    
    print('')
    print('Best parameters :')
    print(clf.best_params_)
    print('')

    #computing the results
    data_test = pd.DataFrame()
    data_train = pd.DataFrame()
    
    data_test['WS_pred']=test['test']
    data_test['Target']=y_test['Target']
    data_train['WS_pred']=train['train']
    data_train['Target']=y['Target']
    
    compute_results(data_test, data_train, power_curve, plot_error)
    print('GridSearch_ XGBoost performed')
    print("--- %s minutes ---" % ((time.time() - start_time)/60))
    print('')
    print('Lowest RMSE: ', (-clf.best_score_)**(1/2.0))
    
    
    
    return clf

### Random Search XGBoost

In [19]:
def RandomSearch_XGBoost(X, X_test, y, y_test, power_curve, param_distribs, plot_error):
    
    #counting the runing time
    start_time = time.time()
    
    
    #creating the model
    xgbr = xgb.XGBRegressor(random_state=42)
    
    #Random Search CV
    clf = RandomizedSearchCV(estimator=xgbr, 
                             param_distributions=param_distribs,
                             cv=4,
                             scoring='neg_mean_squared_error',
                             n_iter=25,
                             verbose=1) 
    
    
    #model fitting
    clf.fit(X_train, y_train)
    
    
    #model predicting
    
    y_pred_test=clf.predict(X_test)
    y_pred_train=clf.predict(X_train)
    
    test=pd.DataFrame(y_pred_test, columns = ['test'])
    train=pd.DataFrame(y_pred_train, columns = ['train'])
    
    print('')
    print('Best parameters :')
    print(clf.best_params_)
    print('')

    #computing the results
    data_test = pd.DataFrame()
    data_train = pd.DataFrame()
    
    data_test['WS_pred']=test['test']
    data_test['Target']=y_test['Target']
    data_train['WS_pred']=train['train']
    data_train['Target']=y['Target']
    
    compute_results(data_test, data_train, power_curve, plot_error)
    print('RandomSearch_ XGBoost performed')
    print("--- %s minutes ---" % ((time.time() - start_time)/60))
    print('')
    print('Lowest RMSE: ', (-clf.best_score_)**(1/2.0))
    
    return clf
    

### Model Testing

In [20]:
def model_testing (X_train, X_test, y_train, y_test, power_curve, model, plot_error):

    

    y_pred_test=model.predict(X_test)
    y_pred_train=model.predict(X_train)
    
    test=pd.DataFrame(y_pred_test, columns = ['test'])
    train=pd.DataFrame(y_pred_train, columns = ['train'])


    data_test = pd.DataFrame()
    data_train = pd.DataFrame()
    
    data_test['WS_pred']=test['test']
    data_test['Target']=y_test['Target']
    data_train['WS_pred']=train['train']
    data_train['Target']=y_train['Target']
        
    
    compute_results(data_test, data_train, power_curve, plot_error)
    
    WS_pred=data_test['WS_pred']
    print('XGBoost results performed')
    
    return WS_pred

### Feature importance

In [21]:
def feature_importance (X_train, X_test, model):
    
    X_t, X_f, y_t, y_f = train_test_split(X_train,y_train, test_size=0.02, random_state=12)
    
    background = X_f.copy()
    
    explainer = shap.TreeExplainer(model)
    shap_values = explainer.shap_values(X_test)
    shap_obj=explainer(X_test)
    shap.summary_plot(shap_values, X_train, plot_type="bar")
    shap.plots.beeswarm(shap_obj)
    
    
    shap_v=pd.DataFrame(shap_values)
    feature_list=X_test.columns
    shap_v.columns=feature_list
    shap_v=shap_v.abs()
    k=pd.DataFrame(shap_v.mean()).reset_index()
    k.columns=['variables','SHAP_abs']
    k.sort_values(by='variables')
    
    
    
    print('Feature importance through SHAP values performed')
    
    return k

# Data analysis

## Dataset1

In [22]:
#upload the dataset with file_folder, file_name
# data_up= uploading_csv('\Dataset1-Normal_Site','\data_comp14.csv')
X_train= uploading_csv('\General','\X_train1.csv')
X_test= uploading_csv('\General','\X_test14.csv')
y_train= uploading_csv('\General','\y_train1.csv')
y_test= uploading_csv('\General','\y_test14.csv')


X_test.keys()

Index(['T2', 'RH2', 'T1', 'RH1', 'PR1', 'AD1', 'PR2', 'AD2', 'Rain', 'WS1',
       'WS3', 'WS4', 'WD1', 'WD3', 'WD4', 'WSHor', 'WDHor', 'WSVer', 'WDVer',
       'TI', 'WSH', 'WD_bin', 'tod', 'WVeer'],
      dtype='object')

In [23]:
y_test

Unnamed: 0,Target
0,8.476700
1,10.447586
2,8.664361
3,9.912774
4,9.596844
...,...
1289,7.825251
1290,8.458561
1291,5.542412
1292,10.032777


In [24]:
y_test.keys()

Index(['Target'], dtype='object')

In [25]:
PC= uploading_csv('\Dataset1-Normal_Site','\PC_1.15kgm-3.csv')

### Grid Search

In [49]:
#doing

In [50]:
parameters={
    'max_depth':5,
    'n_estimators': 1000,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':1,
    'gamma':0,
    'reg_lambda':1,
}

In [51]:
param_grid={
    'n_estimators': [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000],
}

In [52]:
 GridSearch_XGBoost (X_train, X_test, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 10 candidates, totalling 40 fits

Best parameters :
{'n_estimators': 200}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.333 m/s as root mean
Wind MAE:  0.255 m/s in avg
Wind MAPE:  3.062 %
Power RMSE:  157.216 kW as root mean
Power MAE:  99.476 kW in avg
Power MAPE:  7.15 %

Modelling errors for test set:
Wind RMSE:  0.397 m/s as root mean
Wind MAE:  0.308 m/s in avg
Wind MAPE:  3.562 %
Power RMSE:  176.958 kW as root mean
Power MAE:  110.319 kW in avg
Power MAPE:  7.719 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 5.194095158576966 minutes ---

Lowest RMSE:  0.48163921936001536


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=1,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=1000, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,

In [53]:
parameters={
    'max_depth':5,
    'n_estimators': 200,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':1,
    'gamma':0,
    'reg_lambda':1,
}

In [54]:
param_grid={
    'max_depth': [3, 4, 5, 6, 10, 15, 20],
    'min_child_weight': [1, 2, 3, 6, 10]
}

In [55]:
GridSearch_XGBoost (X_train, X_test, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 35 candidates, totalling 140 fits

Best parameters :
{'max_depth': 5, 'min_child_weight': 10}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.344 m/s as root mean
Wind MAE:  0.262 m/s in avg
Wind MAPE:  3.161 %
Power RMSE:  161.924 kW as root mean
Power MAE:  102.263 kW in avg
Power MAPE:  7.417 %

Modelling errors for test set:
Wind RMSE:  0.389 m/s as root mean
Wind MAE:  0.304 m/s in avg
Wind MAPE:  3.498 %
Power RMSE:  173.515 kW as root mean
Power MAE:  109.111 kW in avg
Power MAPE:  7.491 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 18.011303718884786 minutes ---

Lowest RMSE:  0.47759606601000154


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=1,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=200, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,


In [56]:
parameters={
    'max_depth':5,
    'n_estimators': 200,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':10,
    'gamma':0,
    'reg_lambda':1,
}

In [57]:
param_grid={
    'gamma': [0, 0.1, 0.2, 0.3, 0.4, 0.5]
}

In [58]:
GridSearch_XGBoost (X_train, X_test, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 6 candidates, totalling 24 fits

Best parameters :
{'gamma': 0.4}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.346 m/s as root mean
Wind MAE:  0.264 m/s in avg
Wind MAPE:  3.179 %
Power RMSE:  162.822 kW as root mean
Power MAE:  102.751 kW in avg
Power MAPE:  7.454 %

Modelling errors for test set:
Wind RMSE:  0.39 m/s as root mean
Wind MAE:  0.304 m/s in avg
Wind MAPE:  3.526 %
Power RMSE:  175.374 kW as root mean
Power MAE:  110.031 kW in avg
Power MAPE:  7.661 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 1.0888718247413636 minutes ---

Lowest RMSE:  0.47666355647084435


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=10,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=200, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,

In [59]:
parameters={
    'max_depth':5,
    'n_estimators': 200,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':10,
    'gamma':0.4,
    'reg_lambda':1,
}

In [60]:
param_grid={
    'subsample': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1],
    'colsample_bytree': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
}

In [61]:
GridSearch_XGBoost (X_train, X_test, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 49 candidates, totalling 196 fits

Best parameters :
{'colsample_bytree': 0.8, 'subsample': 0.8}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.346 m/s as root mean
Wind MAE:  0.264 m/s in avg
Wind MAPE:  3.179 %
Power RMSE:  162.822 kW as root mean
Power MAE:  102.751 kW in avg
Power MAPE:  7.454 %

Modelling errors for test set:
Wind RMSE:  0.39 m/s as root mean
Wind MAE:  0.304 m/s in avg
Wind MAPE:  3.526 %
Power RMSE:  175.374 kW as root mean
Power MAE:  110.031 kW in avg
Power MAPE:  7.661 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 9.23844188451767 minutes ---

Lowest RMSE:  0.47666355647084435


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0.4, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=10,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=200, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=Non

In [62]:
parameters={
    'max_depth':5,
    'n_estimators': 200,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':10,
    'gamma':0.4,
    'reg_lambda':1,
}

In [63]:
param_grid={
    'reg_lambda': [1, 10, 50, 100, 150]
}

In [64]:
GridSearch_XGBoost (X_train, X_test, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 5 candidates, totalling 20 fits

Best parameters :
{'reg_lambda': 1}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.346 m/s as root mean
Wind MAE:  0.264 m/s in avg
Wind MAPE:  3.179 %
Power RMSE:  162.822 kW as root mean
Power MAE:  102.751 kW in avg
Power MAPE:  7.454 %

Modelling errors for test set:
Wind RMSE:  0.39 m/s as root mean
Wind MAE:  0.304 m/s in avg
Wind MAPE:  3.526 %
Power RMSE:  175.374 kW as root mean
Power MAE:  110.031 kW in avg
Power MAPE:  7.661 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 1.2055770993232726 minutes ---

Lowest RMSE:  0.47666355647084435


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0.4, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=10,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=200, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=Non

In [65]:
parameters={
    'max_depth':5,
    'n_estimators': 200,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':10,
    'gamma':0.4,
    'reg_lambda':1,
}

In [66]:
param_grid={
    'learning_rate': [0.001, 0.01, 0.1, 0.5],
    'n_estimators': [800, 900, 1000, 1200, 1500, 1800, 2000]
}

In [67]:
GridSearch_XGBoost (X_train, X_test, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 28 candidates, totalling 112 fits

Best parameters :
{'learning_rate': 0.01, 'n_estimators': 2000}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.344 m/s as root mean
Wind MAE:  0.262 m/s in avg
Wind MAPE:  3.157 %
Power RMSE:  161.634 kW as root mean
Power MAE:  102.004 kW in avg
Power MAPE:  7.427 %

Modelling errors for test set:
Wind RMSE:  0.38 m/s as root mean
Wind MAE:  0.299 m/s in avg
Wind MAPE:  3.451 %
Power RMSE:  170.828 kW as root mean
Power MAE:  107.979 kW in avg
Power MAPE:  7.462 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 39.48564465443293 minutes ---

Lowest RMSE:  0.4744927778444109


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0.4, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=10,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=200, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=Non

### Manual modelling universal hyperparameters

In [None]:
#universal hyperparameters

In [44]:
parameters={
    'max_depth':10,
    'n_estimators': 2000,
    'learning_rate':0.01,
    'subsample':0.6,
    'colsample_bytree':0.7,
    'min_child_weight':10,
    'gamma':0,
    'reg_lambda':1,
}

In [45]:
# WTG14

In [None]:
#upload the dataset with file_folder, file_name
# data_up= uploading_csv('\Dataset1-Normal_Site','\data_comp14.csv')
X_train= uploading_csv('\General','\X_train1.csv')
X_test= uploading_csv('\General','\X_test14.csv')
y_train= uploading_csv('\General','\y_train1.csv')
y_test= uploading_csv('\General','\y_test14.csv')

X_test.keys()

In [None]:
y_test.keys()

In [None]:
PC= uploading_csv('\Dataset1-Normal_Site','\PC_1.15kgm-3.csv')

In [46]:
model = modelling_XGBoost (X_train, X_test, y_train, y_test, PC, parameters, plot_error=False, plot=True)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.229 m/s as root mean
Wind MAE:  0.162 m/s in avg
Wind MAPE:  1.943 %
Power RMSE:  107.324 kW as root mean
Power MAE:  63.637 kW in avg
Power MAPE:  4.499 %

Modelling errors for test set:
Wind RMSE:  0.465 m/s as root mean
Wind MAE:  0.349 m/s in avg
Wind MAPE:  4.033 %
Power RMSE:  208.451 kW as root mean
Power MAE:  127.814 kW in avg
Power MAPE:  8.69 %


Showing the results of the modelling: 
XGBoost modelling performed


In [47]:
WS_pred=model_testing (X_train, X_test, y_train, y_test, PC, model, plot_error=False)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.229 m/s as root mean
Wind MAE:  0.162 m/s in avg
Wind MAPE:  1.943 %
Power RMSE:  107.324 kW as root mean
Power MAE:  63.637 kW in avg
Power MAPE:  4.499 %

Modelling errors for test set:
Wind RMSE:  0.465 m/s as root mean
Wind MAE:  0.349 m/s in avg
Wind MAPE:  4.033 %
Power RMSE:  208.451 kW as root mean
Power MAE:  127.814 kW in avg
Power MAPE:  8.69 %


Showing the results of the modelling: 
XGBoost results performed


In [48]:
WS_pred=pd.DataFrame(WS_pred)
save(WS_pred,'\Results_','General_XGB_WTG14.csv')

file General_XGB_WTG14.csv saved in \Results_ folder


In [None]:
#########################################################################

In [32]:
# WTG15

In [33]:
#upload the dataset with file_folder, file_name

X_train= uploading_csv('\General','\X_train1.csv')
X_test= uploading_csv('\General','\X_test15.csv')
y_train= uploading_csv('\General','\y_train1.csv')
y_test= uploading_csv('\General','\y_test15.csv')

X_test.keys()

Index(['T2', 'RH2', 'T1', 'RH1', 'PR1', 'AD1', 'PR2', 'AD2', 'Rain', 'WS1',
       'WS3', 'WS4', 'WD1', 'WD3', 'WD4', 'WSHor', 'WDHor', 'WSVer', 'WDVer',
       'TI', 'WSH', 'WD_bin', 'tod', 'WVeer'],
      dtype='object')

In [35]:
WS_pred=model_testing (X_train, X_test, y_train, y_test, PC, model, plot_error=False)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.229 m/s as root mean
Wind MAE:  0.162 m/s in avg
Wind MAPE:  1.943 %
Power RMSE:  107.324 kW as root mean
Power MAE:  63.637 kW in avg
Power MAPE:  4.499 %

Modelling errors for test set:
Wind RMSE:  0.524 m/s as root mean
Wind MAE:  0.397 m/s in avg
Wind MAPE:  4.887 %
Power RMSE:  231.999 kW as root mean
Power MAE:  151.659 kW in avg
Power MAPE:  12.091 %


Showing the results of the modelling: 
XGBoost results performed


In [36]:
WS_pred=pd.DataFrame(WS_pred)
save(WS_pred,'\Results_','General_XGB_WTG15.csv')

file General_XGB_WTG15.csv saved in \Results_ folder


### Manual modelling tuning

In [43]:
#tuning hyperparameters

In [44]:
parameters={
    'max_depth':5,
    'n_estimators': 2000,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':10,
    'gamma':0.4,
    'reg_lambda':1,
}

In [45]:
#WTG14

In [58]:
#upload the dataset with file_folder, file_name
# data_up= uploading_csv('\Dataset1-Normal_Site','\data_comp14.csv')
X_train= uploading_csv('\General','\X_train1.csv')
X_test= uploading_csv('\General','\X_test14.csv')
y_train= uploading_csv('\General','\y_train1.csv')
y_test= uploading_csv('\General','\y_test14.csv')

X_test.keys()

Index(['T2', 'RH2', 'T1', 'RH1', 'PR1', 'AD1', 'PR2', 'AD2', 'Rain', 'WS1',
       'WS3', 'WS4', 'WD1', 'WD3', 'WD4', 'WSHor', 'WDHor', 'WSVer', 'WDVer',
       'TI', 'WSH', 'WD_bin', 'tod', 'WVeer'],
      dtype='object')

In [59]:
y_test.keys()

Index(['Target'], dtype='object')

In [60]:
PC= uploading_csv('\Dataset1-Normal_Site','\PC_1.15kgm-3.csv')

In [61]:
#modelling

In [62]:
model = modelling_XGBoost (X_train, X_test, y_train, y_test, PC, parameters, plot_error=False, plot=True)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.212 m/s as root mean
Wind MAE:  0.139 m/s in avg
Wind MAPE:  1.651 %
Power RMSE:  98.455 kW as root mean
Power MAE:  54.266 kW in avg
Power MAPE:  3.774 %

Modelling errors for test set:
Wind RMSE:  0.506 m/s as root mean
Wind MAE:  0.383 m/s in avg
Wind MAPE:  4.421 %
Power RMSE:  224.229 kW as root mean
Power MAE:  138.895 kW in avg
Power MAPE:  9.503 %


Showing the results of the modelling: 
XGBoost modelling performed


In [63]:
#testing

In [64]:
WS_pred=model_testing (X_train, X_test, y_train, y_test, PC, model, plot_error=False)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.212 m/s as root mean
Wind MAE:  0.139 m/s in avg
Wind MAPE:  1.651 %
Power RMSE:  98.455 kW as root mean
Power MAE:  54.266 kW in avg
Power MAPE:  3.774 %

Modelling errors for test set:
Wind RMSE:  0.506 m/s as root mean
Wind MAE:  0.383 m/s in avg
Wind MAPE:  4.421 %
Power RMSE:  224.229 kW as root mean
Power MAE:  138.895 kW in avg
Power MAPE:  9.503 %


Showing the results of the modelling: 
XGBoost results performed


In [65]:
WS_pred=pd.DataFrame(WS_pred)
save(WS_pred,'\Results_','General_tun_XGB_WTG14.csv')

file General_tun_XGB_WTG14.csv saved in \Results_ folder


In [66]:
############################################################################################

In [67]:
#WTG15

In [68]:
#upload the dataset with file_folder, file_name

X_train= uploading_csv('\General','\X_train1.csv')
X_test= uploading_csv('\General','\X_test15.csv')
y_train= uploading_csv('\General','\y_train1.csv')
y_test= uploading_csv('\General','\y_test15.csv')

X_test.keys()

Index(['T2', 'RH2', 'T1', 'RH1', 'PR1', 'AD1', 'PR2', 'AD2', 'Rain', 'WS1',
       'WS3', 'WS4', 'WD1', 'WD3', 'WD4', 'WSHor', 'WDHor', 'WSVer', 'WDVer',
       'TI', 'WSH', 'WD_bin', 'tod', 'WVeer'],
      dtype='object')

In [69]:
WS_pred=model_testing (X_train, X_test, y_train, y_test, PC, model, plot_error=False)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.212 m/s as root mean
Wind MAE:  0.139 m/s in avg
Wind MAPE:  1.651 %
Power RMSE:  98.455 kW as root mean
Power MAE:  54.266 kW in avg
Power MAPE:  3.774 %

Modelling errors for test set:
Wind RMSE:  0.547 m/s as root mean
Wind MAE:  0.415 m/s in avg
Wind MAPE:  5.128 %
Power RMSE:  241.42 kW as root mean
Power MAE:  159.094 kW in avg
Power MAPE:  12.707 %


Showing the results of the modelling: 
XGBoost results performed


In [70]:
WS_pred=pd.DataFrame(WS_pred)
save(WS_pred,'\Results_','General_tun_XGB_WTG15.csv')

file General_tun_XGB_WTG15.csv saved in \Results_ folder


## Dataset2

In [22]:
#doing

In [23]:
#upload the dataset with file_folder, file_name
# data_up= uploading_csv('\Dataset1-Normal_Site','\data_comp14.csv')
X_train= uploading_csv('\General','\X_train2.csv')
X_test= uploading_csv('\General','\X_test11.csv')
y_train= uploading_csv('\General','\y_train2.csv')
y_test= uploading_csv('\General','\y_test11.csv')

X_test.keys()

Index(['WS1', 'WS3', 'WS4', 'WD1', 'WD4', 'WSHor', 'WSVer', 'WDHor', 'RH1',
       'Rain', 'WSH', 'WVeer', 'TI', 'WDVer', 'WD_bin', 'tod'],
      dtype='object')

In [24]:
y_test.keys()

Index(['Target'], dtype='object')

In [25]:
PC= uploading_csv('\Dataset2-Complex_Site','\PC_V112.csv')

In [26]:
X_train=X_train.drop(columns=['RH1'])
X_test=X_test.drop(columns=['RH1'])

### Grid Search

In [27]:
#pending

In [51]:
parameters={
    'max_depth':5,
    'n_estimators': 1000,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':1,
    'gamma':0,
    'reg_lambda':1,
}

In [52]:
param_grid={
    'n_estimators': [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000],
}

In [53]:
 GridSearch_XGBoost (X_train, X_test, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 10 candidates, totalling 40 fits

Best parameters :
{'n_estimators': 300}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.274 m/s as root mean
Wind MAE:  0.21 m/s in avg
Wind MAPE:  2.322 %
Power RMSE:  124.162 kW as root mean
Power MAE:  78.482 kW in avg
Power MAPE:  5.479 %

Modelling errors for test set:
Wind RMSE:  0.571 m/s as root mean
Wind MAE:  0.444 m/s in avg
Wind MAPE:  4.795 %
Power RMSE:  228.146 kW as root mean
Power MAE:  150.49 kW in avg
Power MAPE:  10.834 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 2.0538490374883014 minutes ---

Lowest RMSE:  0.5882572614848081


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=1,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=1000, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,

In [54]:
parameters={
    'max_depth':5,
    'n_estimators': 300,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':1,
    'gamma':0,
    'reg_lambda':1,
}

In [55]:
param_grid={
    'max_depth': [3, 4, 5, 6, 10, 15, 20],
    'min_child_weight': [1, 2, 3, 6, 10]
}

In [56]:
GridSearch_XGBoost (X_train, X_test, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 35 candidates, totalling 140 fits

Best parameters :
{'max_depth': 5, 'min_child_weight': 10}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.308 m/s as root mean
Wind MAE:  0.234 m/s in avg
Wind MAPE:  2.591 %
Power RMSE:  139.128 kW as root mean
Power MAE:  87.141 kW in avg
Power MAPE:  6.1 %

Modelling errors for test set:
Wind RMSE:  0.576 m/s as root mean
Wind MAE:  0.448 m/s in avg
Wind MAPE:  4.838 %
Power RMSE:  230.553 kW as root mean
Power MAE:  152.859 kW in avg
Power MAPE:  10.932 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 6.669050025939941 minutes ---

Lowest RMSE:  0.5824688218535748


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=1,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=300, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,


In [57]:
parameters={
    'max_depth':5,
    'n_estimators': 300,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':10,
    'gamma':0,
    'reg_lambda':1,
}

In [58]:
param_grid={
    'gamma': [0, 0.1, 0.2, 0.3, 0.4, 0.5]
}

In [59]:
GridSearch_XGBoost (X_train, X_test, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 6 candidates, totalling 24 fits

Best parameters :
{'gamma': 0.1}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.305 m/s as root mean
Wind MAE:  0.232 m/s in avg
Wind MAPE:  2.558 %
Power RMSE:  137.71 kW as root mean
Power MAE:  86.198 kW in avg
Power MAPE:  6.002 %

Modelling errors for test set:
Wind RMSE:  0.578 m/s as root mean
Wind MAE:  0.45 m/s in avg
Wind MAPE:  4.861 %
Power RMSE:  231.781 kW as root mean
Power MAE:  153.612 kW in avg
Power MAPE:  11.008 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 0.6265353202819824 minutes ---

Lowest RMSE:  0.58234352670207


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=10,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=300, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,

In [60]:
parameters={
    'max_depth':5,
    'n_estimators': 300,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':10,
    'gamma':0.1,
    'reg_lambda':1,
}

In [61]:
param_grid={
    'subsample': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1],
    'colsample_bytree': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
}

In [62]:
GridSearch_XGBoost (X_train, X_test, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 49 candidates, totalling 196 fits

Best parameters :
{'colsample_bytree': 1, 'subsample': 0.8}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.296 m/s as root mean
Wind MAE:  0.225 m/s in avg
Wind MAPE:  2.486 %
Power RMSE:  132.841 kW as root mean
Power MAE:  83.338 kW in avg
Power MAPE:  5.829 %

Modelling errors for test set:
Wind RMSE:  0.57 m/s as root mean
Wind MAE:  0.441 m/s in avg
Wind MAPE:  4.748 %
Power RMSE:  226.396 kW as root mean
Power MAE:  149.379 kW in avg
Power MAPE:  10.623 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 7.9996373097101845 minutes ---

Lowest RMSE:  0.579821074153776


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0.1, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=10,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=300, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=Non

In [63]:
parameters={
    'max_depth':5,
    'n_estimators': 300,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':1,
    'min_child_weight':10,
    'gamma':0.1,
    'reg_lambda':1,
}

In [64]:
param_grid={
    'reg_lambda': [1, 10, 50, 100, 150]
}

In [65]:
GridSearch_XGBoost (X_train, X_test, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 5 candidates, totalling 20 fits

Best parameters :
{'reg_lambda': 1}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.296 m/s as root mean
Wind MAE:  0.225 m/s in avg
Wind MAPE:  2.486 %
Power RMSE:  132.841 kW as root mean
Power MAE:  83.338 kW in avg
Power MAPE:  5.829 %

Modelling errors for test set:
Wind RMSE:  0.57 m/s as root mean
Wind MAE:  0.441 m/s in avg
Wind MAPE:  4.748 %
Power RMSE:  226.396 kW as root mean
Power MAE:  149.379 kW in avg
Power MAPE:  10.623 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 0.657681401570638 minutes ---

Lowest RMSE:  0.579821074153776


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=1,
                                    gamma=0.1, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=10,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=300, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,

In [66]:
parameters={
    'max_depth':5,
    'n_estimators': 300,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':1,
    'min_child_weight':10,
    'gamma':0.1,
    'reg_lambda':1,
}

In [67]:
param_grid={
    'learning_rate': [0.001, 0.01, 0.1, 0.5],
    'n_estimators': [800, 900, 1000, 1200, 1500, 1800, 2000]
}

In [68]:
GridSearch_XGBoost (X_train, X_test, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 28 candidates, totalling 112 fits

Best parameters :
{'learning_rate': 0.01, 'n_estimators': 2000}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.348 m/s as root mean
Wind MAE:  0.267 m/s in avg
Wind MAPE:  2.933 %
Power RMSE:  155.407 kW as root mean
Power MAE:  98.184 kW in avg
Power MAPE:  6.839 %

Modelling errors for test set:
Wind RMSE:  0.565 m/s as root mean
Wind MAE:  0.44 m/s in avg
Wind MAPE:  4.743 %
Power RMSE:  225.776 kW as root mean
Power MAE:  149.54 kW in avg
Power MAPE:  10.683 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 16.78581728140513 minutes ---

Lowest RMSE:  0.5736045603820564


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=1,
                                    gamma=0.1, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=10,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=300, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,

### Manual modelling

In [28]:
parameters={
    'max_depth':10,
    'n_estimators': 2000,
    'learning_rate':0.01,
    'subsample':0.6,
    'colsample_bytree':0.7,
    'min_child_weight':10,
    'gamma':0,
    'reg_lambda':1,
}

In [30]:
#T11

In [29]:
model = modelling_XGBoost (X_train, X_test, y_train, y_test, PC, parameters, plot_error=False, plot=True)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.331 m/s as root mean
Wind MAE:  0.241 m/s in avg
Wind MAPE:  3.401 %
Power RMSE:  138.048 kW as root mean
Power MAE:  85.227 kW in avg
Power MAPE:  6.921173737366487e+16 %

Modelling errors for test set:
Wind RMSE:  0.809 m/s as root mean
Wind MAE:  0.564 m/s in avg
Wind MAPE:  8.784 %
Power RMSE:  303.141 kW as root mean
Power MAE:  168.545 kW in avg
Power MAPE:  1.0583801145387937e+18 %


Showing the results of the modelling: 
XGBoost modelling performed


In [31]:
#T17

In [32]:
#upload the dataset with file_folder, file_name
# data_up= uploading_csv('\Dataset1-Normal_Site','\data_comp14.csv')
X_train= uploading_csv('\General','\X_train2.csv')
X_test= uploading_csv('\General','\X_test17.csv')
y_train= uploading_csv('\General','\y_train2.csv')
y_test= uploading_csv('\General','\y_test17.csv')

X_test.keys()

Index(['WS1', 'WS3', 'WS4', 'WD1', 'WD4', 'WSHor', 'WSVer', 'WDHor', 'RH1',
       'Rain', 'WSH', 'WVeer', 'TI', 'WDVer', 'WD_bin', 'tod'],
      dtype='object')

In [33]:
X_train=X_train.drop(columns=['RH1'])
X_test=X_test.drop(columns=['RH1'])

In [34]:
PC= uploading_csv('\Dataset2-Complex_Site','\PC_V112.csv')

In [35]:
model_testing (X_train, X_test, y_train, y_test, PC, model, plot_error=False)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.331 m/s as root mean
Wind MAE:  0.241 m/s in avg
Wind MAPE:  3.401 %
Power RMSE:  138.048 kW as root mean
Power MAE:  85.227 kW in avg
Power MAPE:  6.921173737366487e+16 %

Modelling errors for test set:
Wind RMSE:  0.61 m/s as root mean
Wind MAE:  0.469 m/s in avg
Wind MAPE:  5.113 %
Power RMSE:  259.386 kW as root mean
Power MAE:  165.787 kW in avg
Power MAPE:  11.819 %


Showing the results of the modelling: 
XGBoost results performed


In [36]:
#T22

In [37]:
#upload the dataset with file_folder, file_name
# data_up= uploading_csv('\Dataset1-Normal_Site','\data_comp14.csv')
X_train= uploading_csv('\General','\X_train2.csv')
X_test= uploading_csv('\General','\X_test22.csv')
y_train= uploading_csv('\General','\y_train2.csv')
y_test= uploading_csv('\General','\y_test22.csv')

X_test.keys()

Index(['WS1', 'WS3', 'WS4', 'WD1', 'WD4', 'WSHor', 'WSVer', 'WDHor', 'RH1',
       'Rain', 'WSH', 'WVeer', 'TI', 'WDVer', 'WD_bin', 'tod'],
      dtype='object')

In [38]:
X_train=X_train.drop(columns=['RH1'])
X_test=X_test.drop(columns=['RH1'])

In [39]:
PC= uploading_csv('\Dataset2-Complex_Site','\PC_V117.csv')

In [40]:
model_testing (X_train, X_test, y_train, y_test, PC, model, plot_error=False)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.331 m/s as root mean
Wind MAE:  0.241 m/s in avg
Wind MAPE:  3.401 %
Power RMSE:  140.696 kW as root mean
Power MAE:  84.216 kW in avg
Power MAPE:  7.72010394800332e+16 %

Modelling errors for test set:
Wind RMSE:  0.774 m/s as root mean
Wind MAE:  0.59 m/s in avg
Wind MAPE:  7.682 %
Power RMSE:  347.184 kW as root mean
Power MAE:  233.666 kW in avg
Power MAPE:  24.364 %


Showing the results of the modelling: 
XGBoost results performed


## Dataset3

In [97]:
#WTG18

In [98]:
#upload the dataset with file_folder, file_name
# data_up= uploading_csv('\Dataset1-Normal_Site','\data_comp14.csv')
X_train= uploading_csv('\General','\X_train3.csv')
X_test= uploading_csv('\General','\X_test18.csv')
y_train= uploading_csv('\General','\y_train3.csv')
y_test= uploading_csv('\General','\y_test18.csv')

X_test.keys()

Index(['WS1', 'WS3', 'WS4', 'WSHor', 'WDHor', 'WSVer', 'WDVer', 'T1', 'RH1',
       'T2', 'RH2', 'PR1', 'AD1', 'PR2', 'AD2', 'Rain', 'WD1', 'WD3', 'WD4',
       'TI', 'WSH', 'WD_bin', 'tod', 'WVeer'],
      dtype='object')

In [99]:
y_test.keys()

Index(['Target'], dtype='object')

In [100]:
PC= uploading_csv('\Dataset3-New_Site','\PC_V150.csv')

### Grid Search

In [101]:
#doing

In [102]:
parameters={
    'max_depth':5,
    'n_estimators': 1000,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':1,
    'gamma':0,
    'reg_lambda':1,
}

In [103]:
param_grid={
    'n_estimators': [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000],
}

In [104]:
 GridSearch_XGBoost (X_train, X_test, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 10 candidates, totalling 40 fits

Best parameters :
{'n_estimators': 300}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.372 m/s as root mean
Wind MAE:  0.28 m/s in avg
Wind MAPE:  3.585 %
Power RMSE:  219.894 kW as root mean
Power MAE:  144.014 kW in avg
Power MAPE:  8.873 %

Modelling errors for test set:
Wind RMSE:  0.474 m/s as root mean
Wind MAE:  0.333 m/s in avg
Wind MAPE:  4.316 %
Power RMSE:  257.93 kW as root mean
Power MAE:  167.439 kW in avg
Power MAPE:  19.295 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 21.248409374554953 minutes ---

Lowest RMSE:  0.4854855389876249


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=1,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=1000, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,

In [105]:
parameters={
    'max_depth':5,
    'n_estimators': 300,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':1,
    'gamma':0,
    'reg_lambda':1,
}

In [106]:
param_grid={
    'max_depth': [3, 4, 5, 6, 10, 15, 20],
    'min_child_weight': [1, 2, 3, 6, 10]
}

In [107]:
GridSearch_XGBoost (X_train, X_test, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 35 candidates, totalling 140 fits

Best parameters :
{'max_depth': 5, 'min_child_weight': 10}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.378 m/s as root mean
Wind MAE:  0.284 m/s in avg
Wind MAPE:  3.637 %
Power RMSE:  222.672 kW as root mean
Power MAE:  145.841 kW in avg
Power MAPE:  8.999 %

Modelling errors for test set:
Wind RMSE:  0.47 m/s as root mean
Wind MAE:  0.331 m/s in avg
Wind MAPE:  4.289 %
Power RMSE:  257.066 kW as root mean
Power MAE:  166.287 kW in avg
Power MAPE:  18.721 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 58.45617016951243 minutes ---

Lowest RMSE:  0.4814161950829289


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=1,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=300, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,


In [108]:
parameters={
    'max_depth':5,
    'n_estimators': 300,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':10,
    'gamma':0,
    'reg_lambda':1,
}

In [109]:
param_grid={
    'gamma': [0, 0.1, 0.2, 0.3, 0.4, 0.5]
}

In [110]:
GridSearch_XGBoost (X_train, X_test, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 6 candidates, totalling 24 fits

Best parameters :
{'gamma': 0.5}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.382 m/s as root mean
Wind MAE:  0.286 m/s in avg
Wind MAPE:  3.671 %
Power RMSE:  224.334 kW as root mean
Power MAE:  146.909 kW in avg
Power MAPE:  9.083 %

Modelling errors for test set:
Wind RMSE:  0.471 m/s as root mean
Wind MAE:  0.331 m/s in avg
Wind MAPE:  4.285 %
Power RMSE:  257.289 kW as root mean
Power MAE:  166.464 kW in avg
Power MAPE:  18.805 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 7.851333196957906 minutes ---

Lowest RMSE:  0.4797302981235918


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=10,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=300, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=None,

In [111]:
parameters={
    'max_depth':5,
    'n_estimators': 300,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':10,
    'gamma':0.5,
    'reg_lambda':1,
}

In [112]:
param_grid={
    'subsample': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1],
    'colsample_bytree': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
}

In [113]:
GridSearch_XGBoost (X_train, X_test, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 49 candidates, totalling 196 fits

Best parameters :
{'colsample_bytree': 0.8, 'subsample': 0.8}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.382 m/s as root mean
Wind MAE:  0.286 m/s in avg
Wind MAPE:  3.671 %
Power RMSE:  224.334 kW as root mean
Power MAE:  146.909 kW in avg
Power MAPE:  9.083 %

Modelling errors for test set:
Wind RMSE:  0.471 m/s as root mean
Wind MAE:  0.331 m/s in avg
Wind MAPE:  4.285 %
Power RMSE:  257.289 kW as root mean
Power MAE:  166.464 kW in avg
Power MAPE:  18.805 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 37.00842117468516 minutes ---

Lowest RMSE:  0.4797302981235918


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0.5, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=10,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=300, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=Non

In [117]:
parameters={
    'max_depth':5,
    'n_estimators': 300,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':10,
    'gamma':0.5,
    'reg_lambda':1,
}

In [118]:
param_grid={
    'reg_lambda': [1, 10, 50, 100, 150]
}

In [119]:
GridSearch_XGBoost (X_train, X_test, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 5 candidates, totalling 20 fits

Best parameters :
{'reg_lambda': 10}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.388 m/s as root mean
Wind MAE:  0.29 m/s in avg
Wind MAPE:  3.711 %
Power RMSE:  227.046 kW as root mean
Power MAE:  148.525 kW in avg
Power MAPE:  9.182 %

Modelling errors for test set:
Wind RMSE:  0.469 m/s as root mean
Wind MAE:  0.333 m/s in avg
Wind MAPE:  4.306 %
Power RMSE:  255.949 kW as root mean
Power MAE:  167.262 kW in avg
Power MAPE:  19.358 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 4.018531171480815 minutes ---

Lowest RMSE:  0.47850929882396204


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0.5, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=10,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=300, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=1,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=Non

In [120]:
parameters={
    'max_depth':5,
    'n_estimators': 300,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':10,
    'gamma':0.5,
    'reg_lambda':10,
}

In [121]:
param_grid={
    'learning_rate': [0.001, 0.01, 0.1, 0.5],
    'n_estimators': [800, 900, 1000, 1200, 1500, 1800, 2000]
}

In [122]:
GridSearch_XGBoost (X_train, X_test, y_train, y_test, PC, parameters, param_grid, plot_error=False)

Fitting 4 folds for each of 28 candidates, totalling 112 fits

Best parameters :
{'learning_rate': 0.1, 'n_estimators': 800}

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.336 m/s as root mean
Wind MAE:  0.25 m/s in avg
Wind MAPE:  3.21 %
Power RMSE:  197.254 kW as root mean
Power MAE:  128.505 kW in avg
Power MAPE:  7.902 %

Modelling errors for test set:
Wind RMSE:  0.468 m/s as root mean
Wind MAE:  0.329 m/s in avg
Wind MAPE:  4.263 %
Power RMSE:  253.13 kW as root mean
Power MAE:  164.478 kW in avg
Power MAPE:  19.039 %


Showing the results of the modelling: 
GridSearch_ XGBoost performed
--- 98.95010008811951 minutes ---

Lowest RMSE:  0.479929974691109


GridSearchCV(cv=4,
             estimator=XGBRegressor(base_score=None, booster='gbtree',
                                    colsample_bylevel=None,
                                    colsample_bynode=None, colsample_bytree=0.8,
                                    gamma=0.5, gpu_id=None,
                                    importance_type='gain',
                                    interaction_constraints=None,
                                    learning_rate=0.1, max_delta_step=None,
                                    max_depth=5, min_child_weight=10,
                                    missing=nan, monotone_constraints=None,
                                    n_estimators=300, n_jobs=None,
                                    num_parallel_tree=None, random_state=42,
                                    reg_alpha=None, reg_lambda=10,
                                    scale_pos_weight=None, subsample=0.8,
                                    tree_method=None, validate_parameters=No

### Manual modelling universal hyperparameters

In [143]:
parameters={
    'max_depth':5,
    'n_estimators': 2000,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':10,
    'gamma':0.4,
    'reg_lambda':1,
}

In [144]:
#WTG18

In [145]:
#upload the dataset with file_folder, file_name
# data_up= uploading_csv('\Dataset1-Normal_Site','\data_comp14.csv')
X_train= uploading_csv('\General','\X_train3.csv')
X_test= uploading_csv('\General','\X_test18.csv')
y_train= uploading_csv('\General','\y_train3.csv')
y_test= uploading_csv('\General','\y_test18.csv')

X_test.keys()

Index(['WS1', 'WS3', 'WS4', 'WSHor', 'WDHor', 'WSVer', 'WDVer', 'T1', 'RH1',
       'T2', 'RH2', 'PR1', 'AD1', 'PR2', 'AD2', 'Rain', 'WD1', 'WD3', 'WD4',
       'TI', 'WSH', 'WD_bin', 'tod', 'WVeer'],
      dtype='object')

In [146]:
PC= uploading_csv('\Dataset3-New_Site','\PC_V150.csv')

In [147]:
#modelling

In [148]:
model = modelling_XGBoost (X_train, X_test, y_train, y_test, PC, parameters, plot_error=False, plot=True)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.271 m/s as root mean
Wind MAE:  0.192 m/s in avg
Wind MAPE:  2.453 %
Power RMSE:  158.669 kW as root mean
Power MAE:  99.299 kW in avg
Power MAPE:  5.966 %

Modelling errors for test set:
Wind RMSE:  0.493 m/s as root mean
Wind MAE:  0.338 m/s in avg
Wind MAPE:  4.388 %
Power RMSE:  263.596 kW as root mean
Power MAE:  168.53 kW in avg
Power MAPE:  19.361 %


Showing the results of the modelling: 
XGBoost modelling performed


In [149]:
WS_pred=model_testing (X_train, X_test, y_train, y_test, PC, model, plot_error=False)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.271 m/s as root mean
Wind MAE:  0.192 m/s in avg
Wind MAPE:  2.453 %
Power RMSE:  158.669 kW as root mean
Power MAE:  99.299 kW in avg
Power MAPE:  5.966 %

Modelling errors for test set:
Wind RMSE:  0.493 m/s as root mean
Wind MAE:  0.338 m/s in avg
Wind MAPE:  4.388 %
Power RMSE:  263.596 kW as root mean
Power MAE:  168.53 kW in avg
Power MAPE:  19.361 %


Showing the results of the modelling: 
XGBoost results performed


In [150]:
WS_pred=pd.DataFrame(WS_pred)
save(WS_pred,'\Results_','General_XGB_WTG18.csv')

file General_XGB_WTG18.csv saved in \Results_ folder


In [151]:
#WTG20

In [152]:
#upload the dataset with file_folder, file_name
X_train= uploading_csv('\General','\X_train3.csv')
X_test= uploading_csv('\General','\X_test20.csv')
y_train= uploading_csv('\General','\y_train3.csv')
y_test= uploading_csv('\General','\y_test20.csv')

X_test.keys()

Index(['WS1', 'WS3', 'WS4', 'WSHor', 'WDHor', 'WSVer', 'WDVer', 'T1', 'RH1',
       'T2', 'RH2', 'PR1', 'AD1', 'PR2', 'AD2', 'Rain', 'WD1', 'WD3', 'WD4',
       'TI', 'WSH', 'WD_bin', 'tod', 'WVeer'],
      dtype='object')

In [153]:
WS_pred=model_testing (X_train, X_test, y_train, y_test, PC, model, plot_error=False)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.271 m/s as root mean
Wind MAE:  0.192 m/s in avg
Wind MAPE:  2.453 %
Power RMSE:  158.669 kW as root mean
Power MAE:  99.299 kW in avg
Power MAPE:  5.966 %

Modelling errors for test set:
Wind RMSE:  0.507 m/s as root mean
Wind MAE:  0.352 m/s in avg
Wind MAPE:  4.463 %
Power RMSE:  278.96 kW as root mean
Power MAE:  179.647 kW in avg
Power MAPE:  10.402 %


Showing the results of the modelling: 
XGBoost results performed


In [154]:
WS_pred=pd.DataFrame(WS_pred)
save(WS_pred,'\Results_','General_XGB_WTG20.csv')

file General_XGB_WTG20.csv saved in \Results_ folder


In [155]:
#WTG43

In [156]:
#upload the dataset with file_folder, file_name
X_train= uploading_csv('\General','\X_train3.csv')
X_test= uploading_csv('\General','\X_test43.csv')
y_train= uploading_csv('\General','\y_train3.csv')
y_test= uploading_csv('\General','\y_test43.csv')

X_test.keys()

Index(['WS1', 'WS3', 'WS4', 'WSHor', 'WDHor', 'WSVer', 'WDVer', 'T1', 'RH1',
       'T2', 'RH2', 'PR1', 'AD1', 'PR2', 'AD2', 'Rain', 'WD1', 'WD3', 'WD4',
       'TI', 'WSH', 'WD_bin', 'tod', 'WVeer'],
      dtype='object')

In [157]:
WS_pred=model_testing (X_train, X_test, y_train, y_test, PC, model, plot_error=False)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.271 m/s as root mean
Wind MAE:  0.192 m/s in avg
Wind MAPE:  2.453 %
Power RMSE:  158.669 kW as root mean
Power MAE:  99.299 kW in avg
Power MAPE:  5.966 %

Modelling errors for test set:
Wind RMSE:  0.546 m/s as root mean
Wind MAE:  0.407 m/s in avg
Wind MAPE:  4.769 %
Power RMSE:  291.18 kW as root mean
Power MAE:  178.78 kW in avg
Power MAPE:  10.11 %


Showing the results of the modelling: 
XGBoost results performed


In [158]:
WS_pred=pd.DataFrame(WS_pred)
save(WS_pred,'\Results_','General_XGB_WTG43.csv')

file General_XGB_WTG43.csv saved in \Results_ folder


In [159]:
#WTG46

In [160]:
#upload the dataset with file_folder, file_name
X_train= uploading_csv('\General','\X_train3.csv')
X_test= uploading_csv('\General','\X_test46.csv')
y_train= uploading_csv('\General','\y_train3.csv')
y_test= uploading_csv('\General','\y_test46.csv')

X_test.keys()

Index(['WS1', 'WS3', 'WS4', 'WSHor', 'WDHor', 'WSVer', 'WDVer', 'T1', 'RH1',
       'T2', 'RH2', 'PR1', 'AD1', 'PR2', 'AD2', 'Rain', 'WD1', 'WD3', 'WD4',
       'TI', 'WSH', 'WD_bin', 'tod', 'WVeer'],
      dtype='object')

In [161]:
WS_pred=model_testing (X_train, X_test, y_train, y_test, PC, model, plot_error=False)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.271 m/s as root mean
Wind MAE:  0.192 m/s in avg
Wind MAPE:  2.453 %
Power RMSE:  158.669 kW as root mean
Power MAE:  99.299 kW in avg
Power MAPE:  5.966 %

Modelling errors for test set:
Wind RMSE:  0.505 m/s as root mean
Wind MAE:  0.368 m/s in avg
Wind MAPE:  4.925 %
Power RMSE:  305.023 kW as root mean
Power MAE:  192.413 kW in avg
Power MAPE:  12.893 %


Showing the results of the modelling: 
XGBoost results performed


In [162]:
WS_pred=pd.DataFrame(WS_pred)
save(WS_pred,'\Results_','General_XGB_WTG46.csv')

file General_XGB_WTG46.csv saved in \Results_ folder


### Manual modelling tuning

In [163]:
#tuning hyperparameters

In [164]:
parameters={
    'max_depth':5,
    'n_estimators': 800,
    'learning_rate':0.1,
    'subsample':0.8,
    'colsample_bytree':0.8,
    'min_child_weight':10,
    'gamma':0.5,
    'reg_lambda':10,
}

In [165]:
#WTG18

In [166]:
#upload the dataset with file_folder, file_name
# data_up= uploading_csv('\Dataset1-Normal_Site','\data_comp14.csv')
X_train= uploading_csv('\General','\X_train3.csv')
X_test= uploading_csv('\General','\X_test18.csv')
y_train= uploading_csv('\General','\y_train3.csv')
y_test= uploading_csv('\General','\y_test18.csv')

X_test.keys()

Index(['WS1', 'WS3', 'WS4', 'WSHor', 'WDHor', 'WSVer', 'WDVer', 'T1', 'RH1',
       'T2', 'RH2', 'PR1', 'AD1', 'PR2', 'AD2', 'Rain', 'WD1', 'WD3', 'WD4',
       'TI', 'WSH', 'WD_bin', 'tod', 'WVeer'],
      dtype='object')

In [167]:
PC= uploading_csv('\Dataset3-New_Site','\PC_V150.csv')

In [168]:
#modelling

In [169]:
model = modelling_XGBoost (X_train, X_test, y_train, y_test, PC, parameters, plot_error=False, plot=True)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.323 m/s as root mean
Wind MAE:  0.238 m/s in avg
Wind MAPE:  3.051 %
Power RMSE:  189.982 kW as root mean
Power MAE:  122.561 kW in avg
Power MAPE:  7.494 %

Modelling errors for test set:
Wind RMSE:  0.473 m/s as root mean
Wind MAE:  0.331 m/s in avg
Wind MAPE:  4.293 %
Power RMSE:  253.771 kW as root mean
Power MAE:  164.76 kW in avg
Power MAPE:  18.712 %


Showing the results of the modelling: 
XGBoost modelling performed


In [170]:
#testing

In [171]:
#WTG18

In [172]:
WS_pred=model_testing (X_train, X_test, y_train, y_test, PC, model, plot_error=False)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.323 m/s as root mean
Wind MAE:  0.238 m/s in avg
Wind MAPE:  3.051 %
Power RMSE:  189.982 kW as root mean
Power MAE:  122.561 kW in avg
Power MAPE:  7.494 %

Modelling errors for test set:
Wind RMSE:  0.473 m/s as root mean
Wind MAE:  0.331 m/s in avg
Wind MAPE:  4.293 %
Power RMSE:  253.771 kW as root mean
Power MAE:  164.76 kW in avg
Power MAPE:  18.712 %


Showing the results of the modelling: 
XGBoost results performed


In [173]:
WS_pred=pd.DataFrame(WS_pred)
save(WS_pred,'\Results_','General_tun_XGB_WTG18.csv')

file General_tun_XGB_WTG18.csv saved in \Results_ folder


In [174]:
#WTG20

In [175]:
#upload the dataset with file_folder, file_name
X_train= uploading_csv('\General','\X_train3.csv')
X_test= uploading_csv('\General','\X_test20.csv')
y_train= uploading_csv('\General','\y_train3.csv')
y_test= uploading_csv('\General','\y_test20.csv')

X_test.keys()

Index(['WS1', 'WS3', 'WS4', 'WSHor', 'WDHor', 'WSVer', 'WDVer', 'T1', 'RH1',
       'T2', 'RH2', 'PR1', 'AD1', 'PR2', 'AD2', 'Rain', 'WD1', 'WD3', 'WD4',
       'TI', 'WSH', 'WD_bin', 'tod', 'WVeer'],
      dtype='object')

In [176]:
WS_pred=model_testing (X_train, X_test, y_train, y_test, PC, model, plot_error=False)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.323 m/s as root mean
Wind MAE:  0.238 m/s in avg
Wind MAPE:  3.051 %
Power RMSE:  189.982 kW as root mean
Power MAE:  122.561 kW in avg
Power MAPE:  7.494 %

Modelling errors for test set:
Wind RMSE:  0.463 m/s as root mean
Wind MAE:  0.326 m/s in avg
Wind MAPE:  4.157 %
Power RMSE:  262.909 kW as root mean
Power MAE:  170.345 kW in avg
Power MAPE:  9.852 %


Showing the results of the modelling: 
XGBoost results performed


In [177]:
WS_pred=pd.DataFrame(WS_pred)
save(WS_pred,'\Results_','General_tun_XGB_WTG20.csv')

file General_tun_XGB_WTG20.csv saved in \Results_ folder


In [178]:
#WTG43

In [179]:
#upload the dataset with file_folder, file_name
X_train= uploading_csv('\General','\X_train3.csv')
X_test= uploading_csv('\General','\X_test43.csv')
y_train= uploading_csv('\General','\y_train3.csv')
y_test= uploading_csv('\General','\y_test43.csv')

X_test.keys()

Index(['WS1', 'WS3', 'WS4', 'WSHor', 'WDHor', 'WSVer', 'WDVer', 'T1', 'RH1',
       'T2', 'RH2', 'PR1', 'AD1', 'PR2', 'AD2', 'Rain', 'WD1', 'WD3', 'WD4',
       'TI', 'WSH', 'WD_bin', 'tod', 'WVeer'],
      dtype='object')

In [180]:
WS_pred=model_testing (X_train, X_test, y_train, y_test, PC, model, plot_error=False)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.323 m/s as root mean
Wind MAE:  0.238 m/s in avg
Wind MAPE:  3.051 %
Power RMSE:  189.982 kW as root mean
Power MAE:  122.561 kW in avg
Power MAPE:  7.494 %

Modelling errors for test set:
Wind RMSE:  0.518 m/s as root mean
Wind MAE:  0.391 m/s in avg
Wind MAPE:  4.593 %
Power RMSE:  275.975 kW as root mean
Power MAE:  171.84 kW in avg
Power MAPE:  9.731 %


Showing the results of the modelling: 
XGBoost results performed


In [181]:
WS_pred=pd.DataFrame(WS_pred)
save(WS_pred,'\Results_','General_tun_XGB_WTG43.csv')

file General_tun_XGB_WTG43.csv saved in \Results_ folder


In [182]:
#WTG46

In [183]:
#upload the dataset with file_folder, file_name
X_train= uploading_csv('\General','\X_train3.csv')
X_test= uploading_csv('\General','\X_test46.csv')
y_train= uploading_csv('\General','\y_train3.csv')
y_test= uploading_csv('\General','\y_test46.csv')

X_test.keys()

Index(['WS1', 'WS3', 'WS4', 'WSHor', 'WDHor', 'WSVer', 'WDVer', 'T1', 'RH1',
       'T2', 'RH2', 'PR1', 'AD1', 'PR2', 'AD2', 'Rain', 'WD1', 'WD3', 'WD4',
       'TI', 'WSH', 'WD_bin', 'tod', 'WVeer'],
      dtype='object')

In [184]:
WS_pred=model_testing (X_train, X_test, y_train, y_test, PC, model, plot_error=False)

power curve computation performed
power curve computation performed
Modelling errors for training set:
Wind RMSE:  0.323 m/s as root mean
Wind MAE:  0.238 m/s in avg
Wind MAPE:  3.051 %
Power RMSE:  189.982 kW as root mean
Power MAE:  122.561 kW in avg
Power MAPE:  7.494 %

Modelling errors for test set:
Wind RMSE:  0.457 m/s as root mean
Wind MAE:  0.337 m/s in avg
Wind MAPE:  4.541 %
Power RMSE:  281.613 kW as root mean
Power MAE:  179.069 kW in avg
Power MAPE:  12.02 %


Showing the results of the modelling: 
XGBoost results performed


In [185]:
WS_pred=pd.DataFrame(WS_pred)
save(WS_pred,'\Results_','General_tun_XGB_WTG46.csv')

file General_tun_XGB_WTG46.csv saved in \Results_ folder
