In [46]:
import pandas as pd 
import numpy as np 
import sys
import os
import matplotlib.pyplot as plt  
import seaborn as sns 
import plotly_express as px
from datetime import datetime, timedelta

module_path = os.path.abspath(os.path.join(".."))
if module_path not in sys.path:
    sys.path.append(module_path)

from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error
from xgboost import XGBRegressor
from src.paths import RAW_DATA_DIR, DATA_DIR

In [None]:
# Read the transformed data
wind_farms_data_features_target = pd.read_parquet(f'{TRANSFORMED_DATA_DIR}/wind_farm_topn_features.parquet')



Creation of the datetime features based on the final set of features utilized.  
This can be streamlined by converting it to a function at a later stage

In [None]:
wind_farms_predict = wind_farms_predict.set_index("DATETIME")
wind_farms_predict.index = pd.DatetimeIndex(wind_farms_predict.index)

In [None]:

wind_farms_predict['cosine_time_of_day'] = np.cos((wind_farms_predict.index.hour/24)*2*np.pi)

wind_farms_predict['cosine_day_of_week'] = np.cos((wind_farms_predict.index.dayofweek/7)*2*np.pi)

wind_farms_predict['sine_month'] = np.sin((wind_farms_predict.index.month/12)*2*np.pi)

In [None]:
wind_farms_predict = wind_farms_predict[wind_farms_data_features_target.columns]
wind_farms_predict['CF'] = 0

wind_farms_predict

# Utilize the tuned hyperparameters and fit the model on the provided data 

In [None]:
params = {'subsample': 0.7, 'n_estimators': 200, 'min_child_weight': 3, 'max_depth': 3, 'learning_rate': 0.1, 'gamma': 0, 'colsample_bytree': 0.7}
xgb_reg = XGBRegressor(**params, random_state=42)

X_train = wind_farms_data_features_target.values[:,1:]
y_train = wind_farms_data_features_target.values[:,0]
X_test = wind_farms_predict.values[:,1:]
xgb_reg.fit(X_train, y_train)

In [52]:
# Predict the provided data
wind_farms_predict_result = wind_farms_predict.copy()
wind_farms_predict_result['CF'] = xgb_reg.predict(X_test)
wind_farms_predict_result

Unnamed: 0,DATETIME,CF,WS_1133733,WS_1156770,WS_1156968,WS_1182195,WS_1193865,WS_1321569,WS_1324653,WS_1358699,...,WS_78208,WS_811655,WS_839753,WS_875373,WS_883683,WS_883699,WS_914573,WS_918665,WS_920775,WS_921049
0,2020-01-09 00:00:00,0.023735,1.69,1.88,3.15,0.92,2.27,1.14,1.24,2.98,...,1.40,1.27,1.27,1.43,1.28,1.91,1.12,1.15,1.35,1.46
1,2020-01-09 01:00:00,0.023752,1.60,2.02,3.91,0.44,2.36,1.15,1.63,2.83,...,1.02,1.11,1.43,2.04,1.60,1.57,1.03,1.28,1.49,1.29
2,2020-01-09 02:00:00,0.023961,1.79,2.14,4.20,0.65,2.06,1.28,2.36,3.07,...,1.39,1.42,1.42,2.91,1.81,1.85,1.10,1.56,1.54,1.56
3,2020-01-09 03:00:00,0.021891,2.01,2.32,5.17,0.51,2.31,0.83,3.39,3.38,...,1.55,2.01,1.26,2.49,2.09,2.09,1.06,2.12,1.66,2.18
4,2020-01-09 04:00:00,0.037744,2.12,2.54,4.19,0.77,4.52,0.97,2.89,3.79,...,1.64,2.40,1.69,1.36,2.08,2.06,1.08,2.33,2.17,2.60
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88,2020-01-12 16:00:00,0.034263,3.12,2.62,4.04,6.08,1.07,2.42,2.00,6.50,...,3.93,5.69,5.19,1.80,3.23,3.48,5.16,3.36,5.30,4.55
89,2020-01-12 17:00:00,0.024499,2.79,2.31,4.25,5.95,1.59,3.96,1.73,7.42,...,3.58,4.90,4.59,2.05,2.68,3.04,4.59,2.72,4.63,3.81
90,2020-01-12 18:00:00,0.024457,2.86,2.44,3.95,6.13,2.58,2.58,1.56,7.36,...,3.62,4.63,4.14,2.21,2.41,3.34,4.77,2.54,4.27,3.42
91,2020-01-12 19:00:00,0.025231,2.81,2.20,4.32,6.07,1.93,2.02,1.61,6.72,...,3.95,6.02,4.20,1.37,2.01,3.58,5.06,2.44,4.46,4.33


# Predicted Values as an excel file. As described in the project

In [51]:
# Predicted electricity output in excel file

wind_farms_predict_result['CF'].to_excel(f'{DATA_DIR}/output/Wind_data_predict.xlsx')

In [55]:
pd.concat([wind_farms_data_features_target, wind_farms_predict_result])

Unnamed: 0,CF,WS_2503597,WS_75936,WS_2508550,WS_73494,WS_75955,WS_78205,WS_78207,WS_75933,WS_75935,...,WS_78208,WS_811655,WS_839753,WS_875373,WS_883683,WS_883699,WS_914573,WS_918665,WS_920775,WS_921049
2019-08-14 23:00:00,0.169708,8.44,8.71,8.67,7.88,9.19,6.69,6.98,7.79,8.94,...,,,,,,,,,,
2019-08-15 00:00:00,0.170869,7.44,7.72,7.75,6.96,9.17,6.83,6.71,6.88,7.90,...,,,,,,,,,,
2019-08-15 01:00:00,0.151850,6.30,6.62,6.68,6.05,9.24,6.42,6.33,5.89,6.74,...,,,,,,,,,,
2019-08-15 02:00:00,0.136971,5.08,6.00,5.97,5.47,9.21,5.67,5.76,5.34,6.11,...,,,,,,,,,,
2019-08-15 03:00:00,0.120561,4.73,5.93,5.90,5.47,8.86,4.94,4.67,5.31,6.02,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88,0.034263,2.45,3.11,3.07,3.35,1.05,3.17,3.10,3.45,3.09,...,3.93,5.69,5.19,1.80,3.23,3.48,5.16,3.36,5.30,4.55
89,0.024499,1.99,2.54,2.54,2.78,1.65,2.77,2.86,2.75,2.54,...,3.58,4.90,4.59,2.05,2.68,3.04,4.59,2.72,4.63,3.81
90,0.024457,2.00,2.23,2.18,2.37,2.53,2.97,2.56,2.43,2.26,...,3.62,4.63,4.14,2.21,2.41,3.34,4.77,2.54,4.27,3.42
91,0.025231,1.37,1.79,1.83,2.42,1.80,2.98,2.71,2.42,1.80,...,3.95,6.02,4.20,1.37,2.01,3.58,5.06,2.44,4.46,4.33
