<a href="https://colab.research.google.com/github/albreyes/pronostico/blob/main/Predicci%C3%B3n_Generadores.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.estimator import Estimator
from sklearn.preprocessing import QuantileTransformer
from typing import List
from pprint import pprint

tf.autograph.set_verbosity(0)
pd.set_option('display.max_rows', 8)

from os import path

In [None]:
# !pip freeze | grep tensorflow

In [None]:
# !unzip model_1_3_8_6_7.zip
# !mv content/model_1_3_8_6_7/ model_1_3_8_6_7
# !rm -r content

In [None]:
# data_path = path.join('datos', 'potencia_velocidad')
data_path = ''

rename_cols = {
    'Time': 'time',
    'Avg Wind Speed(m/s)': 'wind_speed',
    'Avg Active Power (kW)': 'active_power',
    'last time Energy Yield(h)': 'energy_yield',
    'Avg Yaw Position(deg)': 'yaw',
}

cols = [
    # 'time',
    'wind_speed',
    'active_power',
    'month',
    'day',
    'hour',
    'minute',
    # 'yaw_sin',
    # 'yaw_cos',
]

In [None]:
def get_nth_generator_data(n: int):
    file_name = f'generador_{n}.hdf'
    file_path = path.join(data_path, file_name)
    df = pd.read_hdf(file_path, 'df')
    # Get features
    df.rename(columns=rename_cols, inplace=True)
    df['month'] = df.time.dt.month
    df['day'] = df.time.dt.day
    df['hour'] = df.time.dt.hour
    df['minute'] = df.time.dt.minute
    df.set_index('time', inplace=True)
    # df['yaw_sin'] = np.sin(2 * np.pi * df.yaw / 360)
    # df['yaw_cos'] = np.cos(2 * np.pi * df.yaw / 360)
    return df[cols].dropna()

In [None]:
# df = get_nth_generator_data(9)
# df.loc[df.index.to_series() < '2014-01-01']

# Preprocesamiento

In [None]:
PAST_HISTORY = 7 * 24
# Data is given in 10 minute frames
HOUR_RESOLUTION = 6

FUTURE_TARGET = 24

series_cols = [
    'wind_speed',
    # 'active_power',
]

categ_cols = [
    'month',
    'day',
    'hour',
    'minute',
]

target_cols = [
    'wind_speed',
    # 'active_power',
]

## Datos de entrenamiento, evaluación y validación

In [None]:
TRAIN_SPLIT = 100000
BUFFER_SIZE = 8000
BATCH_SIZE = 64
EVALUATION_INTERVAL = 50
EPOCHS = 20
VALIDATION_STEPS = 100
PATIENCE = 5

In [None]:
def data_fn(mode, n_generator):
    target_batch = lambda window: window.batch(FUTURE_TARGET)
    ts_batch = lambda window: window.batch(PAST_HISTORY * HOUR_RESOLUTION)

    def get_ts_data(df):
        df_ts = df[series_cols]
        return tf.data.Dataset.from_tensor_slices(df_ts)\
            .window(PAST_HISTORY * HOUR_RESOLUTION, 1, 1, True)\
            .flat_map(ts_batch)

    def get_cat_data(df):
        df_cat = df[categ_cols][PAST_HISTORY * HOUR_RESOLUTION - 1:-(FUTURE_TARGET * HOUR_RESOLUTION)]
        return tf.data.Dataset.from_tensor_slices(df_cat), len(df_cat)

    def get_target_data(df, dataset_size):
        df_target = df[target_cols][PAST_HISTORY * HOUR_RESOLUTION:]
        df_target = df_target.rolling(HOUR_RESOLUTION).mean().dropna()
        return tf.data.Dataset.from_tensor_slices(df_target)\
            .window(FUTURE_TARGET, 1, 1, True)\
            .flat_map(target_batch)\
            .take(dataset_size)

    def data_tx(d1, d2, t):
        return {"categorical_input": d1, "timeseries_input": d2}, t

    def get_dataset(df):
        time_series_data = get_ts_data(df)
        date_data, dataset_size = get_cat_data(df)
        target_data = get_target_data(df, dataset_size)
        return tf.data.Dataset.zip((date_data, time_series_data, target_data))\
                              .map(data_tx)

    if isinstance(n_generator, List):
        generator = n_generator[0]
        df = get_nth_generator_data(generator)
        if mode == tf.estimator.ModeKeys.TRAIN:
            df = df.loc[df.index.to_series() < '2014-01-01']
        elif mode == tf.estimator.ModeKeys.EVAL or mode == tf.estimator.ModeKeys.PREDICT:
            df = df.loc[df.index.to_series() >= '2014-01-01']
        dataset = get_dataset(df)
        for generator in n_generator[1:]:
            df = get_nth_generator_data(generator)
            if mode == tf.estimator.ModeKeys.TRAIN:
                df = df.loc[df.index.to_series() < '2014-01-01']
            elif mode == tf.estimator.ModeKeys.EVAL or mode == tf.estimator.ModeKeys.PREDICT:
                df = df.loc[df.index.to_series() >= '2014-01-01']
            dataset_i = get_dataset(df)
            dataset = dataset.concatenate(dataset_i)
    else:
        df = get_nth_generator_data(n_generator)
        if mode == tf.estimator.ModeKeys.TRAIN:
            df = df.loc[df.index.to_series() < '2014-01-01']
        elif mode == tf.estimator.ModeKeys.EVAL or mode == tf.estimator.ModeKeys.PREDICT:
            df = df.loc[df.index.to_series() >= '2014-01-01']
        dataset = get_dataset(df)

    if mode == tf.estimator.ModeKeys.TRAIN:
        train_data = dataset.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()
        return train_data

    elif mode == tf.estimator.ModeKeys.EVAL:
        val_data = dataset.batch(BATCH_SIZE)
        return val_data

    elif mode == tf.estimator.ModeKeys.PREDICT:
        pred_data = dataset.batch(1)
        return pred_data


# Modelo

## Modelo de dos caminos

In [None]:
keras.backend.clear_session()

In [None]:
TS_INPUTS = PAST_HISTORY * HOUR_RESOLUTION

# Dense Neural Network Path
input_ct = keras.Input(
    shape=(len(categ_cols),),
    name='vector_input')

# Recurrent Neural Network Path
input_ts = keras.Input(
    shape=(TS_INPUTS, len(series_cols)),
    name='timeseries_input')

x_ts = layers.GRU(FUTURE_TARGET * HOUR_RESOLUTION, return_sequences=True)(input_ts)
x_ts = layers.GRU(FUTURE_TARGET, return_sequences=False)(input_ts)

x_ct = layers.Dense(2)(input_ct)

x = layers.Concatenate(axis=1)([x_ts, x_ct])
x = layers.Dense(FUTURE_TARGET)(x)
# Last layer without return sequences and ReLU activation
outputs = layers.Dense(FUTURE_TARGET)(x)

In [None]:
model = keras.models.Model(inputs=[input_ct, input_ts], outputs=outputs)

keras.utils.plot_model(model, "Modelo RNN dos caminos.png", show_shapes=True)

In [None]:
model.summary()

In [None]:
model.compile(
    optimizer='adam',
    loss='mse',
    metrics=['mse', 'mae', 'mape'],
)

In [None]:
n_generator = [1, 3, 8, 6, 7]

# early_stopping = keras.callbacks.EarlyStopping(monitor='loss', patience=PATIENCE)

# history = model.fit(data_fn(tf.estimator.ModeKeys.TRAIN, n_generator),
#                     epochs=EPOCHS,
#                     steps_per_epoch=EVALUATION_INTERVAL,
#                     validation_data=data_fn(tf.estimator.ModeKeys.EVAL, n_generator),
#                     validation_steps=VALIDATION_STEPS,
#                     verbose=2)

keras_estimator = tf.keras.estimator.model_to_estimator(
    keras_model=model,
    model_dir=f'model_{"_".join([str(gen) for gen in n_generator])}',
    config = tf.estimator.RunConfig(
        tf_random_seed=1,
        save_checkpoints_steps=EVALUATION_INTERVAL,
        keep_checkpoint_max=10,
    )
)


In [None]:
# list(data_fn(tf.estimator.ModeKeys.EVAL, n_generator).take(1))

In [None]:
train_spec = tf.estimator.TrainSpec(
    input_fn=lambda: data_fn(tf.estimator.ModeKeys.TRAIN, n_generator),
    max_steps=500)

eval_spec = tf.estimator.EvalSpec(
    input_fn=lambda: data_fn(tf.estimator.ModeKeys.EVAL, n_generator),
    throttle_secs=60)

tf.estimator.train_and_evaluate(keras_estimator, train_spec, eval_spec)

In [None]:
%load_ext tensorboard

In [None]:
%tensorboard --logdir /content/model_1_3_8_6_7/

In [None]:
# !zip -r /content/model_1_3_8_6_7.zip /content/model_1_3_8_6_7

In [None]:
# !rm -r /content/model_1_3_8_6_7

In [None]:
def create_time_steps(length):
    return list(range(-length, 0))

def multi_step_plot(history, true_future, prediction, title):
    plt.figure(figsize=(12, 6))
    num_in = create_time_steps(len(history))
    num_out = len(true_future)

    plt.plot(num_in, np.array(history), label='Historia')
    plt.plot(np.arange(num_out), np.array(true_future), 'bo',
              label='Verdadero Futuro')
    if prediction.any():
        plt.plot(np.arange(num_out), np.array(prediction), 'ro',
                 label='Predicción')
    plt.legend(loc='upper left')
    plt.ylabel('Velocidad del viento (m/s)')
    plt.tight_layout()
    plt.savefig(title)
    plt.show()

In [None]:
dataset_take = data_fn(tf.estimator.ModeKeys.PREDICT, n_generator).take(1)
x = list(dataset_take.as_numpy_iterator())
input_data, target_data = x[0]
past_history = input_data['timeseries_input'][0][:, 0]

In [None]:
true_future = target_data[0][:, 0]
# true_future
predictor = keras_estimator.predict(lambda: data_fn(tf.estimator.ModeKeys.PREDICT, n_generator))
# next(predictor)
prediction = next(predictor)
prediction = prediction['dense_2']
# prediction

In [None]:
# prediction = prediction['dense_3']

In [None]:
# true_future = 

In [None]:
# past_history

In [None]:
multi_step_plot(past_history, true_future, prediction, 'prediction total history')

In [None]:
multi_step_plot([], true_future, prediction)

In [None]:
multi_step_plot(past_history[850:], true_future, prediction, 'zoomed in prediction')