# Visualizando Predicciones de Viento

Se importan las librerías

In [54]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from tensorflow import keras
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.preprocessing.sequence import TimeseriesGenerator
from keras.layers import LSTM

Se importan los datos

In [55]:
df = pd.read_csv('final_train_2015.csv', index_col=0, parse_dates=True)
df = df.asfreq('h')
df = df[['WS1HA','SRGLOAVG1H','RHMAX1H','WD1HA']]

In [56]:
df_test = pd.read_csv('final_test_2015.csv', index_col=0, parse_dates=True)
df_test = df_test.asfreq('h')
df_test = df_test[['WS1HA','SRGLOAVG1H','RHMAX1H','WD1HA']]

In [57]:
scaler = MinMaxScaler()

Se normalizan los datos

In [58]:
scaler.fit(df)

MinMaxScaler()

In [59]:
scaled_train = scaler.transform(df)
scaled_test = scaler.transform(df_test)

Se importan los mejores modelos.

In [60]:
best_univariado = keras.models.load_model('../../Best_models/cuenca/LSTM_univariado_Viento.h5')
best_multivariado = keras.models.load_model('../../Best_models/cuenca/LSTM_multivariado_viento.h5')

Se establecen los coeficientes del modelo de referencia AR.

In [61]:
coef = [0.207770,
    0.760548, -0.016077, 0.000998, 0.013141,
    0.009231, 0.002767, 0.014599, -0.009329,
    -0.007011, -0.015566, -0.004139, -0.011600,
    0.001925, -0.018531, 0.016056, -0.002663,
    -0.000195, 0.019879, 0.010009, -0.004381,
    0.046295, 0.051081, 0.077785, 0.049039,
    0.019718, -0.015886, -0.027731, -0.038589]

In [62]:
len(coef)

29

Se crea una función para aplicar la ecuación AR.

In [63]:
def get_pred(coef,data):
    prediction = coef[0] # constante
    # los datos se reciben en forma cronológica
    j = 1
    for i in range(len(data)-1,-1,-1):
        prediction += coef[j] * data[i]
        j+=1
    return prediction

## Predicciones

**Modelo AR:** El modelo trabaja con 28 retrasos.

In [64]:
retrasos = 28

In [65]:
viento = df_test['WS1HA']

In [66]:
predicciones_AR = []
for i in range(retrasos,len(df_test)):
    past_data = viento.iloc[i-retrasos:i].values
    predicciones_AR.append(get_pred(coef,past_data))

In [67]:
predicciones_AR = np.ndarray.flatten(np.array(predicciones_AR))

In [68]:
len(predicciones_AR)

3481

In [69]:
real_data = df_test.iloc[retrasos:]['WS1HA'].values

In [70]:
len(real_data)

3481

In [71]:
df_predicciones = {
    'Real Data':real_data,
    'AR':predicciones_AR
}

In [72]:
df_predicciones = pd.DataFrame(df_predicciones)

In [73]:
df_predicciones.head()

Unnamed: 0,Real Data,AR
0,2.7,3.091876
1,3.0,2.662508
2,3.5,2.861125
3,3.2,3.163219
4,2.9,2.944217


In [74]:
df_predicciones = df_predicciones.set_index(df_test.iloc[retrasos:].index)

In [75]:
df_predicciones.head()

Unnamed: 0_level_0,Real Data,AR
Local_Time,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-08-09 00:00:00,2.7,3.091876
2016-08-09 01:00:00,3.0,2.662508
2016-08-09 02:00:00,3.5,2.861125
2016-08-09 03:00:00,3.2,3.163219
2016-08-09 04:00:00,2.9,2.944217


**Modelo LSTM univariado:** El modelo trabaja con 24 retrasos.

In [76]:
n_input = 24
n_features = 0

In [77]:
scaled_test[:3]

array([[0.35555556, 0.        , 0.75714286, 0.12256267],
       [0.33333333, 0.        , 0.72857143, 0.13649025],
       [0.34444444, 0.        , 0.75714286, 0.14206128]])

In [78]:
viento = scaled_test[:,0]

Se obtienen sólo los datos de viento

In [79]:
viento

array([0.35555556, 0.33333333, 0.34444444, ..., 0.14444444, 0.02222222,
       0.04444444])

Se producen las predicciones usando el modelo univariado.

In [80]:
predictions = []
for j in range(n_input,len(viento)):
    model_input = viento[j-n_input:j]
    model_input = model_input.reshape((1, n_input, n_features+1))
    predictions.append(best_univariado.predict(model_input)[0])
predictions = np.ndarray.flatten(np.array(predictions)) 

In [81]:
len(predictions)

3485

Se transforman los datos normalizados para tener los valores reales.

In [82]:
test_predictions =  np.zeros(shape=(len(predictions), 4))
test_predictions[:,0] = predictions
true_predictions = scaler.inverse_transform(test_predictions)
viento_predictions = true_predictions[:,0]

In [83]:
viento_predictions[0]

1.767444983124733

Se quitan los primeros cuatro valores para que la longitud coincida con los resultados de AR.

In [84]:
len(viento_predictions[4:])

3481

In [85]:
df_predicciones['LSTM univariado']=viento_predictions[4:]

In [86]:
df_predicciones.head()

Unnamed: 0_level_0,Real Data,AR,LSTM univariado
Local_Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2016-08-09 00:00:00,2.7,3.091876,3.202639
2016-08-09 01:00:00,3.0,2.662508,2.718846
2016-08-09 02:00:00,3.5,2.861125,3.071737
2016-08-09 03:00:00,3.2,3.163219,3.549186
2016-08-09 04:00:00,2.9,2.944217,3.222625


**Modelo LSTM multivariado:** El modelo trabaja con 24 retrasos.

In [87]:
n_input = 24
n_features = 3

In [88]:
predictions = []
for j in range(n_input,len(df_test)):
    model_input = scaled_test[j-n_input:j]
    model_input = model_input.reshape((1, n_input, n_features+1))
    predictions.append(best_multivariado.predict(model_input)[0])

In [89]:
len(predictions)

3485

Se quitan los 4 primeros datos para que coincida con la longitud actual del DataFrame.

In [90]:
multi_predictions = predictions[4:]

In [91]:
len(multi_predictions)

3481

In [92]:
predictions = np.ndarray.flatten(np.array(multi_predictions)) 

In [93]:
test_predictions =  np.zeros(shape=(len(predictions), 4))
test_predictions[:,0] = predictions
true_predictions = scaler.inverse_transform(test_predictions)
viento_predictions = true_predictions[:,0]

In [94]:
df_predicciones['LSTM multivariado'] = viento_predictions

In [96]:
df_predicciones.to_csv('predicciones_modelos_viento.csv')

## Visualización de los datos con suavizado

Se importan las librerías para visualización.

In [97]:
import ipywidgets as widgets
import plotly.express as px
import plotly.graph_objects as go

In [98]:
df_predicciones = pd.read_csv('predicciones_modelos_viento.csv',index_col=0, parse_dates=True)

In [99]:
df_predicciones.head()

Unnamed: 0_level_0,Real Data,AR,LSTM univariado,LSTM multivariado
Local_Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016-08-09 00:00:00,2.7,3.091876,3.202639,3.106787
2016-08-09 01:00:00,3.0,2.662508,2.718846,2.706135
2016-08-09 02:00:00,3.5,2.861125,3.071737,2.980914
2016-08-09 03:00:00,3.2,3.163219,3.549186,3.468579
2016-08-09 04:00:00,2.9,2.944217,3.222625,3.09231


In [100]:
def grafico_linea_tiempo(suavizado):
    plt.figure(figsize=(15,15))
    ventana = int(suavizado)*24
    df_plot = df_predicciones
    if(suavizado!=0):
        df_plot = df_plot.rolling(window=ventana).mean()
        df_plot = df_plot[ventana:]
    print('RMSE suavizado')
    print('AR:',np.sqrt(mean_squared_error(df_plot['Real Data'].values,df_plot['AR'])))
    print('LSTM univariado:',np.sqrt(mean_squared_error(df_plot['Real Data'].values,df_plot['LSTM univariado'])))
    print('LSTM multivariado:',np.sqrt(mean_squared_error(df_plot['Real Data'].values,df_plot['LSTM multivariado'])))
    fig = px.line(df_plot, x=df_plot.index, y=df_plot.columns, title='Predicciones')
    fig.update_xaxes(rangeslider_visible=True,
            rangeselector=dict(
            buttons=list([
              dict(count=1, label="1d", step="day", stepmode="backward"),
              dict(count=7, label="1w", step="day", stepmode="backward"),
              dict(count=14, label="2w", step="day", stepmode="todate"),
              dict(count=1, label="1m", step="month", stepmode="backward"),
              dict(step="all")
            ]))
    )
    fig.update_layout(    autosize=False,
    width=1000,
    height=500)
    fig.show()    
    
suavizado = widgets.SelectionSlider(options=[0, 1, 7, 14, 30], value=0,
    description='Número de días para suavizar',
    layout=widgets.Layout(width='70%'))
suavizado.style = {'description_width': '300px'}

ui = widgets.VBox(children=[suavizado])
out = widgets.interactive_output(grafico_linea_tiempo, {'suavizado':suavizado})
display(ui,out)

VBox(children=(SelectionSlider(description='Número de días para suavizar', layout=Layout(width='70%'), options…

Output()

Al suavizar la curva, el modelo univariado obtiene un error más pequeño que el multivariado.