# Trabajo Práctico 2 - Parte 2

### Asignatura: Análisis de Series Temporales

#### Docente: Rodrigo Del Rosso
#### Asistentes: Sebastian Calcagno - Fernando Martinez

#### Integrantes:
- Del Villar, Javier Alonso
- Otrino, Facundo Damián
- Pistoya, Haydeé Soledad
- Rojas, Mariano Arturo
- Sorza, Edwin Andrés
- Vaillard, Leandro Carlos

## Carga de Librerías

In [None]:
import h2o
import matplotlib.pyplot as plt
# import numpy as np
# import os
import pandas as pd
# import random
# import tensorflow as tf

from h2o.automl import H2OAutoML
# from kerashypetune import KerasRandomSearch, KerasGridSearch
from pandas.plotting import register_matplotlib_converters
# from scipy import stats
# from sklearn.metrics import mean_squared_error
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import StandardScaler
# from statsmodels.tsa.vector_ar.var_model import VAR
# from tensorflow.keras.callbacks import *
# from tensorflow.keras.layers import *
# from tensorflow.keras.models import *
# from tensorflow.keras.optimizers import *
# from tqdm import tqdm

register_matplotlib_converters()

# AutoML

En esta sección se utilizará AutoML para generar tres modelos para las variables:

* novillo_precio_kilo_vivo_dolares
* faena_total_pais
* consumo_interno_per_capita_kg_por_habitante

## Funciónes de apoyo

In [None]:
def time_series_to_supervised(data, n_lag=1, n_fut=1, selLag=None, selFut=None, dropnan=True):
    """
    Converts a time series to a supervised learning data set by adding time-shifted prior and future period
    data as input or output (i.e., target result) columns for each period
    :param data:  a series of periodic attributes as a list or NumPy array
    :param n_lag: number of PRIOR periods to lag as input (X); generates: Xa(t-1), Xa(t-2); min= 0 --> nothing lagged
    :param n_fut: number of FUTURE periods to add as target output (y); generates Yout(t+1); min= 0 --> no future periods
    :param selLag:  only copy these specific PRIOR period attributes; default= None; EX: ['Xa', 'Xb' ]
    :param selFut:  only copy these specific FUTURE period attributes; default= None; EX: ['rslt', 'xx']
    :param dropnan: True= drop rows with NaN values; default= True
    :return: a Pandas DataFrame of time series data organized for supervised learning
    NOTES:
    (1) The current period's data is always included in the output.
    (2) A suffix is added to the original column names to indicate a relative time reference: e.g., (t) is the current
        period; (t-2) is from two periods in the past; (t+1) is from the next period
    (3) This is an extension of Jason Brownlee's series_to_supervised() function, customized for MFI use
    """
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    origNames = df.columns
    cols, names = list(), list()
    # include all current period attributes
    cols.append(df.shift(0))
    names += [('%s' % origNames[j]) for j in range(n_vars)]
 
    # lag any past period attributes (t-n_lag,...,t-1)
    n_lag = max(0, n_lag)  # force valid number of lag periods
    for i in range(n_lag, 0, -1):
        suffix= '(t-%d)' % i
        if (None == selLag):   # copy all attributes from PRIOR periods?
            cols.append(df.shift(i))
            names += [('%s%s' % (origNames[j], suffix)) for j in range(n_vars)]
        else:
            for var in (selLag):
                cols.append(df[var].shift(i))
                names+= [('%s%s' % (var, suffix))]
 
    # include future period attributes (t+1,...,t+n_fut)
    n_fut = max(n_fut, 0)  # force valid number of future periods to shift back
    for i in range(1, n_fut + 1):
        suffix= '(t+%d)' % i
        if (None == selFut):  # copy all attributes from future periods?
            cols.append(df.shift(-i))
            names += [('%s%s' % (origNames[j], suffix)) for j in range(n_vars)]
        else:  # copy only selected future attributes
            for var in (selFut):
                cols.append(df[var].shift(-i))
                names += [('%s%s' % (var, suffix))]
    # combine everything
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values introduced by lagging
    if dropnan:
        agg.dropna(inplace=True)
    return agg

def series_plot(df_train, df_test, to_plot):
    """
    Plots the series target values
    """
    plt.figure(figsize=(13, 4))
    plt.plot(df_train.index, df_train[to_plot + '(t+1)'], color='blue')
    plt.plot(df_test.index, df_test[to_plot + '(t+1)'], color='red')
    plt.ylabel(to_plot, fontsize=18)
    plt.legend(['train', 'test'])
    plt.show()

def dispersion_plot(df, to_plot):
    plt.figure(figsize=(5,5))
    plt.scatter(x=df[to_plot + '_pred'], y=df[to_plot], s=40, color='red')
    plt.xlabel(to_plot + '_pred')
    plt.ylabel(to_plot)
    plt.show()

def prediction_plot(df, to_plot):
    plt.plot(df[to_plot], color='black')
    plt.plot(df[to_plot + '_pred'], color='red')
    plt.ylabel(to_plot)
    plt.legend([to_plot, to_plot + '_pred'])
    plt.show()


## Carga de Datos

In [None]:
df = pd.read_csv('../input/carnes/serie-tiempo-indicadores-mensuales-bovinos.csv')
df = df[['novillo_precio_kilo_vivo_dolares', 'faena_total_pais', 'consumo_interno_per_capita_kg_por_habitante']]

df.columns = ['kgVivo', 'faenaTotal', 'consumoInterno']

print(df.shape)
df.head()

## Transformación de Serie a Datos Supervisados

In [None]:
df_kgVivo = time_series_to_supervised(df, n_lag=1, n_fut=1 , selLag=None, selFut=['kgVivo'], dropnan=True)
df_faenaTotal = time_series_to_supervised(df, n_lag=1, n_fut=1 , selLag=None, selFut=['faenaTotal'], dropnan=True)
df_consumoInterno = time_series_to_supervised(df, n_lag=1, n_fut=1 , selLag=None, selFut=['consumoInterno'], dropnan=True)

## Datasets pata Entrenamiento y Testeo

In [None]:
train = df.iloc[:241]
test = df.iloc[241:]

train_kgVivo = df_kgVivo.iloc[:239]
test_kgVivo = df_kgVivo.iloc[239:]

train_faenaTotal = df_faenaTotal.iloc[:239]
test_faenaTotal = df_faenaTotal.iloc[239:]

train_consumoInterno = df_consumoInterno.iloc[:239]
test_consumoInterno = df_consumoInterno.iloc[239:]

## Plots de las series a analizar

In [None]:
series_plot(train_kgVivo, test_kgVivo, 'kgVivo')
series_plot(train_faenaTotal, test_faenaTotal, 'faenaTotal')
series_plot(train_consumoInterno, test_consumoInterno, 'consumoInterno')

## Inicialización de sesión h2o

In [None]:
h2o.init()

## Conversión de datasets a h2o frames

In [None]:
datasets = {
    'kgVivo': [
        h2o.H2OFrame(train_kgVivo), 
        h2o.H2OFrame(test_kgVivo)
    ],
    'faenaTotal': [
        h2o.H2OFrame(train_faenaTotal), 
        h2o.H2OFrame(test_faenaTotal)
    ], 
    'consumoInterno': [
        h2o.H2OFrame(train_consumoInterno), 
        h2o.H2OFrame(test_consumoInterno)
    ], 
}

## Configuración de Experimentos

In [None]:
aml_results = {}

for target in datasets:
    print('AutoML with ' + target)
    y = target + '(t+1)'
    X = datasets[target][0].columns
    X.remove(y)

    aml = H2OAutoML(
#         max_runtime_secs=180, 
        seed=1234, 
        verbosity=None, 
#         include_algos=['DRF', 'XGBoost', 'GBM']
    )

    aml.train(
        x=X, 
        y=y, 
        training_frame=datasets[target][0], 
        leaderboard_frame=datasets[target][1]
    )

    aml_results[target] = aml


## Resultados de AutoML

In [None]:
for target in aml_results:
    print('Top 3 de Modelos para ' + target)
    print(aml_results[target].leaderboard[:3, :])

## Predicciones con el modelo ganador de cada serie

In [None]:
predictions = pd.DataFrame()
for target in aml_results:
    preds = aml_results[target].leader.predict(datasets[target][1])
    predictions[target + '_pred'] = h2o.as_list(preds, use_pandas=True)['predict']

In [None]:
test_results = pd.concat([test.reset_index(drop=True), predictions], axis=1)
test_results

## Gráfico de Dispersión de Predicciones

In [None]:
dispersion_plot(test_results, 'kgVivo')
dispersion_plot(test_results, 'faenaTotal')
dispersion_plot(test_results, 'consumoInterno')

## Gráfico de Predicciones

In [None]:
prediction_plot(test_results, 'kgVivo')
prediction_plot(test_results, 'faenaTotal')
prediction_plot(test_results, 'consumoInterno')