# Imports

In [1]:
import pandas as pd
import numpy as np
import joblib
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px
# import plotly.offline as pyo

import plotly.io as pio
pio.renderers.default='notebook'

import matplotlib.pyplot as plt
# pyo.init_notebook_mode()

In [2]:
import sklearn.metrics as metrics
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

# Cargar datasets

In [3]:
X_train = pd.read_csv('C:/Users/vmore/notebooks/TFM/Modelos eolica/datos_exogena/X_train_2y.csv', parse_dates=[0], index_col='datetime_utc')
y_train = pd.read_csv('C:/Users/vmore/notebooks/TFM/Modelos eolica/datos_exogena/y_train_2y.csv', parse_dates=[0], index_col='datetime_utc')
X_test = pd.read_csv('C:/Users/vmore/notebooks/TFM/Modelos eolica/datos_exogena/X_test.csv', parse_dates=[0], index_col='datetime_utc')
y_test = pd.read_csv('C:/Users/vmore/notebooks/TFM/Modelos eolica/datos_exogena/y_test.csv', parse_dates=[0], index_col='datetime_utc')
test_data = pd.read_csv('C:/Users/vmore/notebooks/TFM/Modelos eolica/datos_exogena/test_data.csv', parse_dates=[0], index_col='datetime_utc')

In [4]:
# percentage_columns = [col for col in X_train.columns if 'percentage' in col]
# wind_columns = [col for col in X_train.columns if 'wind' in col]
# new_column_order = percentage_columns + wind_columns

In [5]:
# new_column_order

In [6]:
# X_train = X_train[new_column_order]
# X_test = X_test[new_column_order]

# 1. Modelo Persistencia

In [7]:
def persitence(X_train, steps):
    y = np.zeros((X_train.shape[0], steps))
    for i in range(steps):
        y[:,i]= X_train[:,-1]
        
    return y

In [8]:
p = persitence(X_test.values, 6)
p

array([[4.235594 , 4.235594 , 4.235594 , 4.235594 , 4.235594 , 4.235594 ],
       [4.3006697, 4.3006697, 4.3006697, 4.3006697, 4.3006697, 4.3006697],
       [4.2877026, 4.2877026, 4.2877026, 4.2877026, 4.2877026, 4.2877026],
       ...,
       [5.6014013, 5.6014013, 5.6014013, 5.6014013, 5.6014013, 5.6014013],
       [5.5645123, 5.5645123, 5.5645123, 5.5645123, 5.5645123, 5.5645123],
       [5.5645123, 5.5645123, 5.5645123, 5.5645123, 5.5645123, 5.5645123]])

In [9]:
for i in range (6):
    print(metrics.mean_absolute_error(p[:,i], y_test.values[:,i]))
    

4.881929987010194
4.881909343391774
4.881889714782179
4.881874280175876
4.881865849213045
4.88186391678795


In [10]:
metrics.mean_absolute_error(p[:,0], X_test.values[:,-1])

0.0

# 2. Modelo Ridge sklearn

### Training

In [11]:
# Create the Ridge model
ridge = Ridge()
std_sc = StandardScaler()

# Pipeline
ridge_pipeline = Pipeline(steps=[('std_sc', StandardScaler()),
                       ('ridge', ridge)])

# Define the parameter grid to search over
param_grid = {'ridge__alpha': [0.0000001, 0.000001, 0.00001, 0.0001, 0.0005, 0.001, 0.01, 0.1, 1],
#               'ridge__solver': ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']
             }

# TimeSeries Cross validation
tscv = TimeSeriesSplit(n_splits=2)

# Create the GridSearchCV object
grid_search = GridSearchCV(ridge_pipeline, param_grid, cv=tscv, scoring='neg_mean_absolute_error')

# Fit the GridSearchCV object to the training data
grid_search.fit(X_train.values, y_train.values)

In [12]:
best_ridge_score = grid_search.best_score_
best_ridge_model = grid_search.best_estimator_

In [13]:
best_ridge_model

### Forecasting

In [14]:
y_pred_ridge = best_ridge_model.predict(X_test.values)

In [15]:
y_pred_ridge

array([[0.14702763, 0.14183277, 0.14117829, 0.14318662, 0.1470781 ,
        0.15326129],
       [0.14546886, 0.14576658, 0.14851724, 0.152837  , 0.15890032,
        0.16602093],
       [0.14311807, 0.14529791, 0.14940428, 0.15521649, 0.16194443,
        0.1667511 ],
       ...,
       [0.23979039, 0.2457183 , 0.25070181, 0.25094109, 0.24457419,
        0.23585996],
       [0.31940928, 0.33537936, 0.33906944, 0.33302463, 0.32316593,
        0.3104838 ],
       [0.31096827, 0.31019755, 0.30232612, 0.29216996, 0.28043937,
        0.27051718]])

In [16]:
MAE_model_Ridge=metrics.mean_absolute_error(y_test, y_pred_ridge)

In [17]:
df_y_pred_ridge = pd.DataFrame(y_pred_ridge, columns=['t+1', 't+2', 't+3', 't+4', 't+5', 't+6'])
df_y_pred_ridge

Unnamed: 0,t+1,t+2,t+3,t+4,t+5,t+6
0,0.147028,0.141833,0.141178,0.143187,0.147078,0.153261
1,0.145469,0.145767,0.148517,0.152837,0.158900,0.166021
2,0.143118,0.145298,0.149404,0.155216,0.161944,0.166751
3,0.176531,0.184788,0.191566,0.197334,0.200009,0.199876
4,0.177018,0.182757,0.188537,0.191423,0.191234,0.188170
...,...,...,...,...,...,...
8737,0.219011,0.223608,0.226508,0.232556,0.238976,0.242157
8738,0.218943,0.220671,0.226014,0.231960,0.234971,0.232524
8739,0.239790,0.245718,0.250702,0.250941,0.244574,0.235860
8740,0.319409,0.335379,0.339069,0.333025,0.323166,0.310484


In [18]:
MAE_Ridge_t1=metrics.mean_absolute_error(y_test['percentage(t+1)'], df_y_pred_ridge['t+1'])
MAE_Ridge_t2=metrics.mean_absolute_error(y_test['percentage(t+2)'], df_y_pred_ridge['t+2'])
MAE_Ridge_t3=metrics.mean_absolute_error(y_test['percentage(t+3)'], df_y_pred_ridge['t+3'])
MAE_Ridge_t4=metrics.mean_absolute_error(y_test['percentage(t+4)'], df_y_pred_ridge['t+4'])
MAE_Ridge_t5=metrics.mean_absolute_error(y_test['percentage(t+5)'], df_y_pred_ridge['t+5'])
MAE_Ridge_t6=metrics.mean_absolute_error(y_test['percentage(t+6)'], df_y_pred_ridge['t+6'])

In [19]:
print('MAE en t+1: {:2.2%}'.format(MAE_Ridge_t1))
print('MAE en t+2: {:2.2%}'.format(MAE_Ridge_t2))
print('MAE en t+3: {:2.2%}'.format(MAE_Ridge_t3))
print('MAE en t+4: {:2.2%}'.format(MAE_Ridge_t4))
print('MAE en t+5: {:2.2%}'.format(MAE_Ridge_t5))
print('MAE en t+6: {:2.2%}'.format(MAE_Ridge_t6))

MAE en t+1: 0.77%
MAE en t+2: 1.49%
MAE en t+3: 2.11%
MAE en t+4: 2.62%
MAE en t+5: 3.03%
MAE en t+6: 3.37%


In [20]:
y_test

Unnamed: 0_level_0,percentage(t+1),percentage(t+2),percentage(t+3),percentage(t+4),percentage(t+5),percentage(t+6)
datetime_utc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-01-01 11:00:00+00:00,0.149221,0.143816,0.162418,0.171720,0.187189,0.195005
2022-01-01 12:00:00+00:00,0.143816,0.162418,0.171720,0.187189,0.195005,0.213811
2022-01-01 13:00:00+00:00,0.162418,0.171720,0.187189,0.195005,0.213811,0.226927
2022-01-01 14:00:00+00:00,0.171720,0.187189,0.195005,0.213811,0.226927,0.237620
2022-01-01 15:00:00+00:00,0.187189,0.195005,0.213811,0.226927,0.237620,0.239414
...,...,...,...,...,...,...
2022-12-31 13:00:00+00:00,0.216203,0.231023,0.285819,0.304648,0.321047,0.329687
2022-12-31 14:00:00+00:00,0.231023,0.285819,0.304648,0.321047,0.329687,0.315409
2022-12-31 15:00:00+00:00,0.285819,0.304648,0.321047,0.329687,0.315409,0.297348
2022-12-31 16:00:00+00:00,0.304648,0.321047,0.329687,0.315409,0.297348,0.245423


In [21]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=test_data.index, y=test_data.percentage,
                         mode='lines',
                         hoverinfo='x+y',
                         name='real value',
                         hovertemplate='%{y:.1%}'+' Real ' +'%{x|%H:%M}<extra></extra>',
#                          opacity=0.7,
                         marker_line_color='white',
                         line=dict(color='#004481')),
)

fig.add_trace(go.Scatter(x=y_test.index + pd.DateOffset(hours=1), y=df_y_pred_ridge['t+1'],
                         mode='lines',
                         hoverinfo='y+x',
                         name='t+1',
                         hovertemplate='%{y:.1%}'+' t+1 ' +'%{x|%H:%M}<extra></extra>',
                         opacity=0.5,
                         line=dict(color='#2dcccd')),
)

fig.add_trace(go.Scatter(x=y_test.index + pd.DateOffset(hours=2), y=df_y_pred_ridge['t+2'],
                         mode='lines',
                         hoverinfo='y+x',
                         name='t+2',
                         hovertemplate='%{y:.1%}'+' t+2 ' +'%{x|%H:%M}<extra></extra>',
                         opacity=0.5,
                         line=dict(color='#388D4F')),
)

fig.add_trace(go.Scatter(x=y_test.index + pd.DateOffset(hours=3), y=df_y_pred_ridge['t+3'],
                         mode='lines',
                         hoverinfo='y+x',
                         name='t+3',
                         hovertemplate='%{y:.1%}'+' t+3 ' +'%{x|%H:%M}<extra></extra>',
                         opacity=0.5,
                         line=dict(color='#C49735')),
)

fig.add_trace(go.Scatter(x=y_test.index + pd.DateOffset(hours=4), y=df_y_pred_ridge['t+4'],
                         mode='lines',
                         hoverinfo='y+x',
                         name='t+4',
                         hovertemplate='%{y:.1%}'+' t+4 ' +'%{x|%H:%M}<extra></extra>',
                         opacity=0.5,
                         line=dict(color='#7C6AC7')),
)

fig.add_trace(go.Scatter(x=y_test.index + pd.DateOffset(hours=5), y=df_y_pred_ridge['t+5'],
                         mode='lines',
                         hoverinfo='y+x',
                         name='t+5',
                         hovertemplate='%{y:.1%}'+' t+5 ' +'%{x|%H:%M}<extra></extra>',
                         opacity=0.5,
                         line=dict(color='#D8732C')),
)

fig.add_trace(go.Scatter(x=y_test.index + pd.DateOffset(hours=6), y=df_y_pred_ridge['t+6'],
                         mode='lines',
                         hoverinfo='y+x',
                         name='t+6',
                         hovertemplate='%{y:.1%}'+' t+6 ' +'%{x|%H:%M}<extra></extra>',
                         opacity=0.5,
                         line=dict(color='#D44B50')),
)

fig.update_layout(
    showlegend=True,
    hovermode='x unified',
    legend=dict(orientation="h", x=0.1),
    xaxis=dict(
        rangeslider=dict(
            visible=True
        ),
        type="date"
    )
)

fig.update_yaxes(tickformat=".0%")

# Guardar el modelo

In [22]:
# joblib.dump(best_ridge_model, "modelos_exogena/ridge_model.joblib")

# Cargar el modelo

In [23]:
# load model
# loaded_ridge = joblib.load("modelos_exogena/ridge_model.joblib")

In [24]:
# y_pred_ridge_loaded = loaded_ridge.predict(X_test)

In [25]:
# y_pred_ridge_loaded