In [42]:
import pandas as pd
import math
import json
import numpy as np
import random

import sagemaker
from sagemaker.sklearn.model import SKLearnModel

#### Configuracion

In [47]:
np.random.seed(42)
random.seed(42)

sagemaker_session = sagemaker.Session()
region = sagemaker_session.boto_region_name
image_name = sagemaker.image_uris.retrieve("forecasting-deepar", region)
role='LabRole'
s3_bucket = 'mia-electiva3-dollar-predictor'
s3_output_path = "s3://{}/{}/output".format(s3_bucket, 'deepar-model')

### Cargar dataset

In [8]:
# Cargar el dataset de precios por hora del dolar
df = pd.read_csv('dollar_hourly_price.csv')

In [44]:
df.head()

Unnamed: 0,fecha,precio
0,2023-02-17 13:00:00,4935.512073
1,2023-02-17 14:00:00,4933.093721
2,2023-02-17 15:00:00,4930.773292
3,2023-02-17 16:00:00,4922.921354
4,2023-02-17 17:00:00,4921.032044


### Entrenamiento - DeepAR
El modelo por defecto de SageMaker para series de tiempo es DeepAR, y teniendo en cuenta los malos resultados obtenidos con otros modelos, se va a realizar el ejercicio de entrenamiento y despligue con este modelo

In [15]:
# Dividir el dataset en datos de entrenamiento y test
train_size = math.ceil(len(df) * 0.7)
train_data = df[:train_size]
test_data = df[train_size:]

##### Convierto el dataset al formato json que acepta DeepAR

In [29]:
json_training_data = [
    {
        "start": str(ts['fecha']),
        "target": [ts['precio']],  # We use -1, because pandas indexing includes the upper bound
    }
    for idx, ts in train_data.iterrows()
]
print(json_training_data[:5])

[{'start': '2023-02-17 13:00:00', 'target': [4935.512073285198]}, {'start': '2023-02-17 14:00:00', 'target': [4933.093720736434]}, {'start': '2023-02-17 15:00:00', 'target': [4930.773292125985]}, {'start': '2023-02-17 16:00:00', 'target': [4922.921353719009]}, {'start': '2023-02-17 17:00:00', 'target': [4921.032043902439]}]


In [31]:
json_test_data = [
    {
        "start": str(ts['fecha']),
        "target": [ts['precio']],  # We use -1, because pandas indexing includes the upper bound
    }
    for idx, ts in test_data.iterrows()
]
print(json_test_data[-5:])

[{'start': '2023-04-05 14:00:00', 'target': [4567.517916143497]}, {'start': '2023-04-05 15:00:00', 'target': [4571.212102392345]}, {'start': '2023-04-05 16:00:00', 'target': [4571.11388]}, {'start': '2023-04-05 17:00:00', 'target': [4570.980660150375]}, {'start': '2023-04-05 18:00:00', 'target': [4570.7655]}]


In [35]:
# Guardo el archivo json para ser usado luego en el entranamiento
def write_dicts_to_file(path, data):
    with open(path, "wb") as fp:
        for d in data:
            fp.write(json.dumps(d).encode("utf-8"))
            fp.write("\n".encode("utf-8"))

In [36]:
%%time
write_dicts_to_file("train.json", json_training_data)
write_dicts_to_file("test.json", json_test_data)

CPU times: user 1.85 ms, sys: 0 ns, total: 1.85 ms
Wall time: 2.9 ms


##### Entrenamiento del Modelo

In [None]:
estimator = sagemaker.estimator.Estimator(
    image_uri=image_name,
    sagemaker_session=sagemaker_session,
    role=role,
    train_instance_count=1,
    train_instance_type="ml.m4.large",
    base_job_name="dollar-predictor",
    output_path=s3_output_path,
)

In [None]:
hyperparameters = {
    "time_freq": freq,
    "epochs": "400",
    "early_stopping_patience": "40",
    "mini_batch_size": "64",
    "learning_rate": "5E-4",
    "context_length": str(context_length),
    "prediction_length": str(prediction_length),
}

In [None]:
estimator.set_hyperparameters(**hyperparameters)