In [70]:
import pandas as pd 
import numpy as np 
import sys
import os
import matplotlib.pyplot as plt  
import seaborn as sns 
import plotly_express as px
from datetime import datetime
import plotly.graph_objects as go

module_path = os.path.abspath(os.path.join(".."))
if module_path not in sys.path:
    sys.path.append(module_path)

from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error
from xgboost import XGBRegressor
from src.paths import RAW_DATA_DIR, DATA_DIR

In [61]:
# Read the transformed data
wind_farms_data_features_target = pd.read_parquet(f'{TRANSFORMED_DATA_DIR}/wind_farm_topn_features.parquet')

Creation of the datetime features based on the final set of features utilized.  
This can be streamlined by converting it to a function at a later stage

In [62]:
wind_farms_predict = wind_farms_predict.set_index("DATETIME")
wind_farms_predict.index = pd.DatetimeIndex(wind_farms_predict.index)

In [63]:

wind_farms_predict['cosine_time_of_day'] = np.cos((wind_farms_predict.index.hour/24)*2*np.pi)

wind_farms_predict['cosine_day_of_week'] = np.cos((wind_farms_predict.index.dayofweek/7)*2*np.pi)

wind_farms_predict['sine_month'] = np.sin((wind_farms_predict.index.month/12)*2*np.pi)

In [64]:
wind_farms_predict = wind_farms_predict[wind_farms_data_features_target.columns]
wind_farms_predict['CF'] = 0

wind_farms_predict

Unnamed: 0_level_0,CF,WS_2503597,WS_75936,WS_2508550,WS_73494,WS_75955,WS_78205,WS_78207,WS_75933,WS_75935,WS_609612,cosine_time_of_day,cosine_day_of_week,sine_month
DATETIME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2020-01-09 00:00:00,0,0.83,1.18,1.08,1.31,2.40,1.95,1.98,1.41,1.16,1.68,1.000000e+00,-0.900969,0.5
2020-01-09 01:00:00,0,1.11,1.24,0.92,1.07,2.57,1.85,2.13,1.54,1.37,1.41,9.659258e-01,-0.900969,0.5
2020-01-09 02:00:00,0,0.97,1.26,0.86,1.14,2.17,1.99,2.22,1.63,1.42,1.69,8.660254e-01,-0.900969,0.5
2020-01-09 03:00:00,0,1.05,1.61,1.19,1.54,2.37,2.31,1.80,2.12,1.73,1.91,7.071068e-01,-0.900969,0.5
2020-01-09 04:00:00,0,0.89,1.81,1.39,1.81,4.58,2.29,2.01,2.45,1.91,1.98,5.000000e-01,-0.900969,0.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-01-12 16:00:00,0,2.45,3.11,3.07,3.35,1.05,3.17,3.10,3.45,3.09,3.38,-5.000000e-01,0.623490,0.5
2020-01-12 17:00:00,0,1.99,2.54,2.54,2.78,1.65,2.77,2.86,2.75,2.54,3.01,-2.588190e-01,0.623490,0.5
2020-01-12 18:00:00,0,2.00,2.23,2.18,2.37,2.53,2.97,2.56,2.43,2.26,3.11,-1.836970e-16,0.623490,0.5
2020-01-12 19:00:00,0,1.37,1.79,1.83,2.42,1.80,2.98,2.71,2.42,1.80,3.25,2.588190e-01,0.623490,0.5


# Utilize the tuned hyperparameters and fit the model on the provided data 

In [65]:
params = {'subsample': 0.7, 'n_estimators': 200, 'min_child_weight': 3, 'max_depth': 3, 'learning_rate': 0.1, 'gamma': 0, 'colsample_bytree': 0.7}
xgb_reg = XGBRegressor(**params, random_state=42)

X_train = wind_farms_data_features_target.values[:,1:]
y_train = wind_farms_data_features_target.values[:,0]
X_test = wind_farms_predict.values[:,1:]
xgb_reg.fit(X_train, y_train)

In [66]:
wind_farms_predict

Unnamed: 0_level_0,CF,WS_2503597,WS_75936,WS_2508550,WS_73494,WS_75955,WS_78205,WS_78207,WS_75933,WS_75935,WS_609612,cosine_time_of_day,cosine_day_of_week,sine_month
DATETIME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2020-01-09 00:00:00,0,0.83,1.18,1.08,1.31,2.40,1.95,1.98,1.41,1.16,1.68,1.000000e+00,-0.900969,0.5
2020-01-09 01:00:00,0,1.11,1.24,0.92,1.07,2.57,1.85,2.13,1.54,1.37,1.41,9.659258e-01,-0.900969,0.5
2020-01-09 02:00:00,0,0.97,1.26,0.86,1.14,2.17,1.99,2.22,1.63,1.42,1.69,8.660254e-01,-0.900969,0.5
2020-01-09 03:00:00,0,1.05,1.61,1.19,1.54,2.37,2.31,1.80,2.12,1.73,1.91,7.071068e-01,-0.900969,0.5
2020-01-09 04:00:00,0,0.89,1.81,1.39,1.81,4.58,2.29,2.01,2.45,1.91,1.98,5.000000e-01,-0.900969,0.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-01-12 16:00:00,0,2.45,3.11,3.07,3.35,1.05,3.17,3.10,3.45,3.09,3.38,-5.000000e-01,0.623490,0.5
2020-01-12 17:00:00,0,1.99,2.54,2.54,2.78,1.65,2.77,2.86,2.75,2.54,3.01,-2.588190e-01,0.623490,0.5
2020-01-12 18:00:00,0,2.00,2.23,2.18,2.37,2.53,2.97,2.56,2.43,2.26,3.11,-1.836970e-16,0.623490,0.5
2020-01-12 19:00:00,0,1.37,1.79,1.83,2.42,1.80,2.98,2.71,2.42,1.80,3.25,2.588190e-01,0.623490,0.5


In [67]:
# Predict the provided data
wind_farms_predict_result = wind_farms_predict.copy()
wind_farms_predict_result['CF'] = xgb_reg.predict(X_test)
wind_farms_predict_result

Unnamed: 0_level_0,CF,WS_2503597,WS_75936,WS_2508550,WS_73494,WS_75955,WS_78205,WS_78207,WS_75933,WS_75935,WS_609612,cosine_time_of_day,cosine_day_of_week,sine_month
DATETIME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2020-01-09 00:00:00,0.023735,0.83,1.18,1.08,1.31,2.40,1.95,1.98,1.41,1.16,1.68,1.000000e+00,-0.900969,0.5
2020-01-09 01:00:00,0.023752,1.11,1.24,0.92,1.07,2.57,1.85,2.13,1.54,1.37,1.41,9.659258e-01,-0.900969,0.5
2020-01-09 02:00:00,0.023961,0.97,1.26,0.86,1.14,2.17,1.99,2.22,1.63,1.42,1.69,8.660254e-01,-0.900969,0.5
2020-01-09 03:00:00,0.021891,1.05,1.61,1.19,1.54,2.37,2.31,1.80,2.12,1.73,1.91,7.071068e-01,-0.900969,0.5
2020-01-09 04:00:00,0.037744,0.89,1.81,1.39,1.81,4.58,2.29,2.01,2.45,1.91,1.98,5.000000e-01,-0.900969,0.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-01-12 16:00:00,0.034263,2.45,3.11,3.07,3.35,1.05,3.17,3.10,3.45,3.09,3.38,-5.000000e-01,0.623490,0.5
2020-01-12 17:00:00,0.024499,1.99,2.54,2.54,2.78,1.65,2.77,2.86,2.75,2.54,3.01,-2.588190e-01,0.623490,0.5
2020-01-12 18:00:00,0.024457,2.00,2.23,2.18,2.37,2.53,2.97,2.56,2.43,2.26,3.11,-1.836970e-16,0.623490,0.5
2020-01-12 19:00:00,0.025231,1.37,1.79,1.83,2.42,1.80,2.98,2.71,2.42,1.80,3.25,2.588190e-01,0.623490,0.5


# Predicted Values as an excel file. As described in the project

In [72]:
wind_farm_predicted_result = pd.concat([wind_farms_data_features_target, wind_farms_predict_result])


fig = px.line(
    wind_farms_data_features_target,
    x=wind_farms_data_features_target.index,
    y="CF",
    markers=True,
    hover_data=["CF"]
)
fig.add_trace(go.Scatter(x=wind_farms_predict_result.index, y=wind_farms_predict_result['CF'], mode='lines', name=str(wind_farms_data_features_target.index.max()), line=dict(color='red')))


fig.update_layout(title="Predicted vs Actuals")

fig.update_layout(annotations=[
    dict(
        x=1.05,  # x and y coordinates for positioning, adjust as needed
        y=1.02,
        xref='paper',
        yref='paper',
        text='Max Training date',  # Legend title text
        showarrow=False,
        align='right'
    )
])
fig.show()


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



In [None]:
# Predicted electricity output in excel file

wind_farms_predict_result['CF'].to_excel(f'{DATA_DIR}/output/Wind_data_predict.xlsx')