# Visualizando Predicciones de Cuenca

Se importan las librerías

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.preprocessing.sequence import TimeseriesGenerator
from keras.layers import LSTM

Se importan los datos

In [48]:
df = pd.read_csv('./final_train_2015.csv', index_col=0, parse_dates=True)
df = df.asfreq('h')
df = df.between_time('08:00:00','17:00:00')
df = df[['SRGLOAVG1H','TAMAX1H','TAMIN1H','RHMIN1H','RHMAX1H']]

In [49]:
df_test = pd.read_csv('./final_test_2015.csv', index_col=0, parse_dates=True)
df_test = df_test.asfreq('h')
df_test = df_test[['SRGLOAVG1H','TAMAX1H','TAMIN1H','RHMIN1H','RHMAX1H']]
df_test = df_test.between_time('08:00:00','17:00:00')

In [50]:
from sklearn.preprocessing import MinMaxScaler

Se normalizan los datos,

In [51]:
scaler = MinMaxScaler()
scaler.fit(df)
scaled_train = scaler.transform(df)
scaled_test = scaler.transform(df_test)

Se importan los mejores modelos

In [6]:
best_univariado = keras.models.load_model('../../../Best_models/cuenca/LSTM_univariado_RS.h5')

In [7]:
best_multivariado = keras.models.load_model('../../../Best_models/cuenca/LSTM_multivariado_RS.h5')

Se establecen los coeficientes del modelo de referencia AR.

In [8]:
coef = [88.465247,
                0.635393,-0.103026,0.043116,-0.032294,
                -0.004597,-0.002247,-0.010747,0.021191,
                0.111859,0.140868,0.000666,-0.033811,
                -0.006970,-0.041583,-0.016905,-0.012530,
                0.000388,0.007379,0.072742,0.063291,0.011766,
                -0.066640]

Se crea una función para aplicar la ecuación AR.

In [9]:
def get_pred(coef,data):
    prediction = coef[0] # constante
    # los datos se reciben en forma cronológica
    j = 1
    for i in range(len(data)-1,-1,-1):
        prediction += coef[j] * data[i]
        j+=1
    return prediction

## Predicciones

**Modelo AR:** El modelo trabaja con 22 retrasos.

In [17]:
retrasos = 22

In [22]:
RS = df_test['SRGLOAVG1H']

In [26]:
predicciones_AR = []
for i in range(retrasos,len(df_test)):
    past_data = RS.iloc[i-retrasos:i].values
    predicciones_AR.append(get_pred(coef,past_data))

In [28]:
predicciones_AR = np.ndarray.flatten(np.array(predicciones_AR))

In [29]:
len(predicciones_AR)

1438

In [30]:
real_data = df_test.iloc[retrasos:]['SRGLOAVG1H'].values

In [31]:
len(real_data)

1438

In [32]:
df_predicciones = {
    'Real Data':real_data,
    'AR':predicciones_AR
}

In [33]:
df_predicciones = pd.DataFrame(df_predicciones)

In [36]:
df_predicciones.head()

Unnamed: 0,Real Data,AR
0,376.0,522.985281
1,700.0,392.226992
2,665.0,644.23775
3,666.0,572.807753
4,687.0,607.49172


In [39]:
df_predicciones = df_predicciones.set_index(df_test.iloc[retrasos:].index)

In [42]:
df_predicciones.head()

Unnamed: 0_level_0,Real Data,AR
Local_Time,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-08-10 10:00:00,376.0,522.985281
2016-08-10 11:00:00,700.0,392.226992
2016-08-10 12:00:00,665.0,644.23775
2016-08-10 13:00:00,666.0,572.807753
2016-08-10 14:00:00,687.0,607.49172


**Modelo LSTM univariado**: El modelo trabaja con 22 retrasos.

In [77]:
n_input = 22
n_features = 0

In [75]:
scaled_test[:3]

array([[0.10439105, 0.38974359, 0.48166259, 0.86419753, 0.92857143],
       [0.27091964, 0.54358974, 0.55501222, 0.66666667, 0.85714286],
       [0.24275062, 0.53846154, 0.61858191, 0.62962963, 0.72857143]])

In [73]:
RS = scaled_test[:,0]

Se tienen sólo los datos de radiación solar

In [74]:
RS

array([0.10439105, 0.27091964, 0.24275062, ..., 0.14581607, 0.15078708,
       0.09362055])

In [79]:
predictions = []
for j in range(n_input,len(RS)):
    model_input = RS[j-n_input:j]
    model_input = model_input.reshape((1, n_input, n_features+1))
    predictions.append(best_univariado.predict(model_input)[0])
predictions = np.ndarray.flatten(np.array(predictions)) 

In [80]:
len(predictions)

1438

In [81]:
test_predictions =  np.zeros(shape=(len(predictions), 5))
test_predictions[:,0] = predictions
true_predictions = scaler.inverse_transform(test_predictions)
rsg_predictions = true_predictions[:,0]

In [82]:
rsg_predictions[:5]

array([617.07289559, 433.83766162, 715.0388537 , 590.57204348,
       578.90335196])

In [83]:
df_predicciones['LSTM univariado']=rsg_predictions

In [91]:
df_predicciones.head()

Unnamed: 0_level_0,Real Data,AR,LSTM univariado
Local_Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2016-08-10 10:00:00,376.0,522.985281,617.072896
2016-08-10 11:00:00,700.0,392.226992,433.837662
2016-08-10 12:00:00,665.0,644.23775,715.038854
2016-08-10 13:00:00,666.0,572.807753,590.572043
2016-08-10 14:00:00,687.0,607.49172,578.903352


**Modelo LSTM multivariado**: El modelo trabaja con 20 retrasos.

In [92]:
n_input = 20
n_features = 4

In [94]:
predictions = []
for j in range(n_input,len(df_test)):
    model_input = scaled_test[j-n_input:j]
    model_input = model_input.reshape((1, n_input, n_features+1))
    predictions.append(best_multivariado.predict(model_input)[0])

In [96]:
len(predictions)

1440

Se quitan los dos primeros resultados para poder compararlo con las demás predicciones.

In [97]:
multi_predictions = predictions[2:]

In [100]:
len(multi_predictions)

1438

In [101]:
predictions = np.ndarray.flatten(np.array(multi_predictions)) 

In [102]:
test_predictions =  np.zeros(shape=(len(predictions), 5))
test_predictions[:,0] = predictions
true_predictions = scaler.inverse_transform(test_predictions)
rsg_predictions = true_predictions[:,0]

In [104]:
rsg_predictions[:5]

array([600.36640906, 441.9084934 , 699.22503352, 610.82962692,
       581.63091958])

In [105]:
df_predicciones['LSTM multivariado'] = rsg_predictions

In [None]:
df_predicciones.to_csv('predicciones_modelos.csv',index=False)

## Visualización de los resultados con suavizado


In [31]:
import ipywidgets as widgets
import plotly.express as px
import plotly.graph_objects as go

In [23]:
df_predicciones = pd.read_csv('predicciones_modelos.csv',index_col=0, parse_dates=True)

In [45]:
def grafico_linea_tiempo(suavizado):
    plt.figure(figsize=(15,15))
    ventana = int(suavizado)*24
    df_plot = df_predicciones
    if(suavizado!=0):
        df_plot = df_plot.rolling(window=ventana).mean()
        df_plot = df_plot[ventana:]
    print('RMSE suavizado')
    print('AR:',np.sqrt(mean_squared_error(df_plot['Real Data'].values,df_plot['AR'])))
    print('LSTM univariado:',np.sqrt(mean_squared_error(df_plot['Real Data'].values,df_plot['LSTM univariado'])))
    print('LSTM multivariado:',np.sqrt(mean_squared_error(df_plot['Real Data'].values,df_plot['LSTM multivariado'])))
    fig = px.line(df_plot, x=df_plot.index, y=df_plot.columns, title='Predicciones')
    fig.update_xaxes(rangeslider_visible=True,
            rangeselector=dict(
            buttons=list([
              dict(count=1, label="1d", step="day", stepmode="backward"),
              dict(count=7, label="1w", step="day", stepmode="backward"),
              dict(count=14, label="2w", step="day", stepmode="todate"),
              dict(count=1, label="1m", step="month", stepmode="backward"),
              dict(step="all")
            ]))
    )
    fig.update_layout(    autosize=False,
    width=1000,
    height=500)
    fig.show()    
    
suavizado = widgets.SelectionSlider(options=[0, 1, 7, 14, 30], value=0,
    description='Número de días para suavizar',
    layout=widgets.Layout(width='70%'))
suavizado.style = {'description_width': '300px'}
variable.style = {'description_width': '300px'}

ui = widgets.VBox(children=[suavizado])
out = widgets.interactive_output(grafico_linea_tiempo, {'suavizado':suavizado})
display(ui,out)

VBox(children=(SelectionSlider(description='Número de días para suavizar', layout=Layout(width='70%'), options…

Output()

En las curvas suavizadas, el modelo univariado LSTM tiene el error más pequeño.