# LNN SageMaker Algorithm

Clone the repository.

In [None]:
!git clone https://github.com/flaviagiammarino/lnn-sagemaker

## 1. Create the SageMaker Algorithm

### 1.1 Create the training image

In [None]:
%%bash

algorithm_name=lnn-sagemaker-training
algorithm_region=eu-west-1

cd amazon_sagemaker_algorithm/training_image

account=$(aws sts get-caller-identity --query Account --output text)

fullname="${account}.dkr.ecr.${algorithm_region}.amazonaws.com/${algorithm_name}:latest"

aws ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1
if [ $? -ne 0 ]
then
    aws ecr create-repository --repository-name "${algorithm_name}" > /dev/null
fi

$(aws ecr get-login --region ${algorithm_region} --no-include-email)

$(aws ecr get-login --registry-ids 763104351884 --region us-east-1 --no-include-email)

docker build  -t ${algorithm_name} . --build-arg REGION=${algorithm_region}

docker tag ${algorithm_name} ${fullname}

docker push ${fullname}

### 1.2 Create the inference image

In [None]:
%%bash

algorithm_name=lnn-sagemaker-inference
algorithm_region=eu-west-1

cd amazon_sagemaker_algorithm/i_image

account=$(aws sts get-caller-identity --query Account --output text)

fullname="${account}.dkr.ecr.${algorithm_region}.amazonaws.com/${algorithm_name}:latest"

aws ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1
if [ $? -ne 0 ]
then
    aws ecr create-repository --repository-name "${algorithm_name}" > /dev/null
fi

$(aws ecr get-login --region ${algorithm_region} --no-include-email)

$(aws ecr get-login --registry-ids 763104351884 --region us-east-1 --no-include-email)

docker build  -t ${algorithm_name} . --build-arg REGION=${algorithm_region}

docker tag ${algorithm_name} ${fullname}

docker push ${fullname}

In [None]:
import io
import datetime
import sagemaker
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [None]:
# SageMaker session
sagemaker_session = sagemaker.Session()

# S3 bucket
bucket = sagemaker_session.default_bucket()

# EC2 instance
instance_type = "ml.m5.2xlarge"

# fixed hyperparameters
context_length = 200     # number of timesteps used as input
prediction_length = 100  # number of timesteps to output

**Training dataset:**

In [None]:
training_dataset = pd.read_csv("sample_data/train.csv")

In [None]:
training_dataset.shape

In [None]:
training_dataset.head()

In [None]:
fig, axs = plt.subplots(nrows=len([c for c in training_dataset.columns if c != "ts"]), ncols=1, sharex=True, figsize=(6, 8))
for i, c in enumerate([c for c in training_dataset.columns if c != "ts"]):
    axs[i].plot(training_dataset[c], color="#7f8ea3", lw=1)
    axs[i].set_title(c, size=10)
    axs[i].set(xlabel="Time", ylabel="Value")
    axs[i].xaxis.set_tick_params(labelbottom=True)
    axs[i].tick_params(axis="both", which="major", labelsize=7)
    axs[i].tick_params(axis="both", which="minor", labelsize=7)
fig.suptitle("Training Dataset")
fig.tight_layout()
fig.show()

In [None]:
training_data = sagemaker_session.upload_string_as_file_body(
    body=training_dataset.to_csv(index=False),
    bucket=bucket,
    key="data/training/train.csv"
)

In [None]:
training_data

**Validation dataset:**

In [None]:
validation_dataset = pd.read_csv("sample_data/valid.csv")

In [None]:
validation_dataset.shape

In [None]:
validation_dataset.head()

In [None]:
fig, axs = plt.subplots(nrows=len([c for c in validation_dataset.columns if c != "ts"]), ncols=1, sharex=True, figsize=(6, 8))
for i, c in enumerate([c for c in validation_dataset.columns if c != "ts"]):
    axs[i].plot(validation_dataset[c], color="#7f8ea3", lw=1)
    axs[i].set_title(c, size=10)
    axs[i].set(xlabel="Time", ylabel="Value")
    axs[i].xaxis.set_tick_params(labelbottom=True)
    axs[i].tick_params(axis="both", which="major", labelsize=7)
    axs[i].tick_params(axis="both", which="minor", labelsize=7)
fig.suptitle("Validation Dataset")
fig.tight_layout()
fig.show()

In [None]:
validation_data = sagemaker_session.upload_string_as_file_body(
    body=validation_dataset.to_csv(index=False),
    bucket=bucket,
    key="data/training/valid.csv"
)

In [None]:
validation_data

**Test dataset:**

In [None]:
test_dataset = pd.read_csv("sample_data/test.csv")

In [None]:
test_dataset.shape

In [None]:
test_dataset.head()

In [None]:
fig, axs = plt.subplots(nrows=len([c for c in test_dataset.columns if c != "ts"]), ncols=1, sharex=True, figsize=(6, 8))
for i, c in enumerate([c for c in test_dataset.columns if c != "ts"]):
    axs[i].plot(test_dataset[c], color="#7f8ea3", lw=1)
    axs[i].set_title(c, size=10)
    axs[i].set(xlabel="Time", ylabel="Value")
    axs[i].xaxis.set_tick_params(labelbottom=True)
    axs[i].tick_params(axis="both", which="major", labelsize=7)
    axs[i].tick_params(axis="both", which="minor", labelsize=7)
fig.suptitle("Test Dataset")
fig.tight_layout()
fig.show()

In [None]:
test_data = sagemaker_session.upload_string_as_file_body(
    body=test_dataset.to_csv(index=False),
    bucket=bucket,
    key="data/inference/input/test.csv"
)

In [None]:
test_data

## 3. Train a machine learning model

Now that the dataset is available in an accessible Amazon S3 bucket, we are ready to train a machine learning model. 

### A. Set up environment

In [None]:
role = sagemaker.get_execution_role()

### B. Train a model

In [None]:
hyperparameters = {
    "context-length": context_length,
    "prediction-length": prediction_length,
    "sequence-stride": 1,
    "backbone-layers": 1,
    "backbone-units": 128,
    "backbone-activation": "silu",
    "backbone-dropout": 0,
    "hidden-size": 64,
    "minimal": 0,
    "no-gate": 0,
    "use-ltc": 0,
    "use-mixed": 0,
    "lr": 0.001,
    "lr-decay": 0.999,
    "batch-size": 64,
    "epochs": 50,
}

In [None]:
metric_definitions = [
    # metrics logged to CloudWatch during training
    {"Name": "training_mse", "Regex": "train_mse: ([0-9\\.]+)"},
    {"Name": "validation_mse", "Regex": "valid_mse: ([0-9\\.]+)"},
    {"Name": "training_mae", "Regex": "train_mae: ([0-9\\.]+)"},
    {"Name": "validation_mae", "Regex": "valid_mae: ([0-9\\.]+)"},
    # metrics logged to CloudWatch at the end of training
    {"Name": "train:mse", "Regex": "train:mse ([0-9\\.]+)"},
    {"Name": "valid:mse", "Regex": "valid:mse ([0-9\\.]+)"},
    {"Name": "train:mae", "Regex": "train:mae ([0-9\\.]+)"},
    {"Name": "valid:mae", "Regex": "valid:mae ([0-9\\.]+)"},
]

For information on creating an `Estimator` object, see the [documentation](https://sagemaker.readthedocs.io/en/stable/api/training/estimators.html).

In [None]:
estimator = sagemaker.algorithm.AlgorithmEstimator(
    algorithm_arn=algo_arn,
    base_job_name="lnn-training",
    role=role,
    instance_count=1,
    instance_type=instance_type,
    input_mode="File",
    sagemaker_session=sagemaker_session,
    hyperparameters=hyperparameters,
    metric_definitions=metric_definitions
)

estimator.fit({"training": training_data, "validation": validation_data})

See this [blog-post](https://aws.amazon.com/blogs/machine-learning/easily-monitor-and-visualize-metrics-while-training-models-on-amazon-sagemaker/) for more information how to visualize metrics during the process. You can also open the training job from [Amazon SageMaker console](https://console.aws.amazon.com/sagemaker/home?#/jobs/) and monitor the metrics/logs in **Monitor** section.

# Incremental Training

In [None]:
estimator = sagemaker.algorithm.AlgorithmEstimator(
    model_uri=f's3://{bucket}/{estimator.latest_training_job.name}/output/model.tar.gz',
    algorithm_arn=algo_arn,
    base_job_name="lnn-fine-tuning",
    role=role,
    instance_count=1,
    instance_type=instance_type,
    input_mode="File",
    sagemaker_session=sagemaker_session,
    hyperparameters=hyperparameters,
    metric_definitions=metric_definitions
)

estimator.fit({"training": training_data, "validation": validation_data})

## 4. Real-time inference

Now you can deploy the model for performing real-time inference.

In [None]:
serializer = sagemaker.serializers.CSVSerializer(content_type="text/csv")
deserializer = sagemaker.base_deserializers.PandasDeserializer(accept="text/csv")

### A. Deploy trained model

In [None]:
predictor = estimator.deploy(
    initial_instance_count=1,
    instance_type=instance_type,
    model_name=f"lnn-model-{datetime.datetime.now().strftime(format='%Y-%m-%d-%H-%M-%S-%f')}",
    endpoint_name=f"lnn-endpoint-{datetime.datetime.now().strftime(format='%Y-%m-%d-%H-%M-%S-%f')}",
)

Once the endpoint is in service, you can perform real-time inference.

### B. Create input payload

The inference algorithm takes as input a CSV file containing the time series. The CSV file should have the same format as the one used for training.

Given that the test dataset used in this experiment is relatively small, we invoke the endpoint using all the data.

In [None]:
payload = test_dataset.to_csv(index=False)

### C. Perform real-time inference

In [None]:
response = sagemaker_session.sagemaker_runtime_client.invoke_endpoint(
    EndpointName=predictor.endpoint_name,
    ContentType="text/csv",
    Body=payload
)

real_time_predictions = deserializer.deserialize(response["Body"], content_type="text/csv")

In [None]:
response

In [None]:
real_time_results = sagemaker_session.upload_string_as_file_body(
    body=real_time_predictions.to_csv(index=False),
    bucket=bucket,
    key="data/inference/output/real-time/real_time_predictions.csv"
)

In [None]:
real_time_results

### D. Visualize output

The inference algorithm outputs the predicted values of the time series and the standard deviation of the predictions. 

**Notes:** 

a) The model predicts the time series sequence by sequence. For instance, if the `context-length` is set equal to 200, and the `prediction-length` is set equal to 100, then the first 200 data points (from 1 to 200) are used as input to predict the next 100 data points (from 201 to 300). As a result, the algorithm does not return the predicted values of the first 200 data points, which are set to missing in the output CSV file.

b) The outputs include the out-of-sample forecasts beyond the last time step of the inputs. For instance, if the number of input samples is 500, and the `prediction-length` is 100, then the output CSV file will contain 600 samples, where the last 100 samples are the out-of-sample forecasts.  

In [None]:
real_time_predictions.shape

In [None]:
real_time_predictions.head()

In [None]:
real_time_predictions.tail()

In [None]:
real_time_predictions.isna().sum(axis=0)

In [None]:
fig, axs = plt.subplots(nrows=len([c for c in test_dataset.columns if c.startswith("y")]), ncols=1, sharex=True, figsize=(6, 5))
for i, c in enumerate([c for c in test_dataset.columns if c.startswith("y")]):
    axs[i].plot(test_dataset.index, test_dataset[c].values, color="#7f8ea3", lw=1, label="Actual" if i == 0 else None)
    axs[i].plot(real_time_predictions.index, real_time_predictions[f"{c}_mean"].values, color="#009ad3", lw=1, label="Predicted" if i == 0 else None)
    axs[i].fill_between(real_time_predictions.index, real_time_predictions[f"{c}_mean"].values + real_time_predictions[f"{c}_std"].values, real_time_predictions[f"{c}_mean"].values - real_time_predictions[f"{c}_std"].values, color="#009ad3", alpha=0.2, lw=1, label="Predicted +/- 1 Std. Dev." if i == 0 else None)
    axs[i].fill_between(real_time_predictions.index, real_time_predictions[f"{c}_mean"].values + 2 * real_time_predictions[f"{c}_std"].values, real_time_predictions[f"{c}_mean"].values - 2 * real_time_predictions[f"{c}_std"].values, color="#009ad3", alpha=0.1, lw=1, label="Predicted +/- 2 Std. Dev." if i == 0 else None)
    axs[i].set_title(c, size=10)
    axs[i].set(xlabel="Time", ylabel="Value")
    axs[i].xaxis.set_tick_params(labelbottom=True)
    axs[i].tick_params(axis="both", which="major", labelsize=7)
    axs[i].tick_params(axis="both", which="minor", labelsize=7)
fig.suptitle("Real Time Predictions on Test Dataset")
fig.legend(bbox_to_anchor=(1, 0, 0.4, 1), frameon=False)
fig.tight_layout()
fig.show()

### E. Calculate relevant metrics

In this section we assess the model's forecasting performance against the ground truth time series values. Note that in this section we calculate the mean squared error (MSE) and mean absolute error (MAE) separately for each time series using unscaled data. These are different from the metrics returned by the algorithm, which are the average MSE and MAE across all time series calculated using scaled data.   

In [None]:
for c in [c for c in test_dataset.columns if c.startswith("y")]:
    mse = mean_squared_error(test_dataset[c].iloc[context_length:], real_time_predictions[f"{c}_mean"].iloc[context_length: - prediction_length])
    mae = mean_absolute_error(test_dataset[c].iloc[context_length:], real_time_predictions[f"{c}_mean"].iloc[context_length: -prediction_length])
    print(f"Time series: {c}, MSE: {format(mse, '.2f')}, MAE: {format(mae, '.2f')}")

If [Amazon SageMaker Model Monitor](https://docs.aws.amazon.com/sagemaker/latest/dg/model-monitor.html) supports the type of problem you are trying to solve using this algorithm, use the following examples to add Model Monitor support to your product.
For sample code to enable and monitor the model, see following notebooks:
1. [Enable Amazon SageMaker Model Monitor](https://github.com/awslabs/amazon-sagemaker-examples/blob/master/sagemaker_model_monitor/enable_model_monitor/SageMaker-Enable-Model-Monitor.ipynb)
2. [Amazon SageMaker Model Monitor - visualizing monitoring results](https://github.com/awslabs/amazon-sagemaker-examples/blob/master/sagemaker_model_monitor/visualization/SageMaker-Model-Monitor-Visualize.ipynb)

### F. Delete the endpoint

Now that you have successfully performed a real-time inference, you do not need the endpoint any more. You can terminate the same to avoid being charged.

In [None]:
predictor.delete_model()
predictor.delete_endpoint(delete_endpoint_config=True)

Since this is an experiment, you do not need to run a hyperparameter tuning job. However, if you would like to see how to tune a model trained using a third-party algorithm with Amazon SageMaker's hyperparameter tuning functionality, you can run the optional tuning step.

## 5. Tune your model! (optional)

### A. Tuning guidelines

The model has the following hyperparameters, all of which are tunable:
- `context-length`: `int`. The length of the input sequences.
- `prediction-length`: `int`. The length of the output sequences.
- `sequence-stride`: `int`. The period between consecutive output sequences.
- `backbone-layers`: `int`. The number of hidden layers of the backbone neural network. 
- `backbone-units`: `int`. The number of hidden units of the backbone neural network.
- `backbone-activation`: `str`. The activation function of the backbone neural network.
- `backbone-dropout`: `float`. The dropout rate of the backbone neural network.
- `hidden-size`: `int`. The number of hidden units of the neural network heads.
- `minimal`: `bool`. If set to 1, the model will use the CfC direct solution.
- `no-gate`: `bool`. If set to 1, the model will use a CfC without the (1 - sigmoid) part.
- `use-ltc`: `bool`. If set to 1, the model will use an LTC instead of a CfC.
- `use-mixed`: `bool`. If set to 1, the model will mix the CfC RNN-state with an LSTM.
- `lr`: `float`. The learning rate used for training.
- `lr-decay`: `float`. The decay factor applied to the learning rate.
- `batch-size`: `int`. The batch size used for training.
- `epochs`: `int`. The number of training epochs.

### B. Define tuning configuration

In [None]:
hyperparameter_ranges = {
    "backbone-layers": sagemaker.parameter.IntegerParameter(1, 3),
    "backbone-units": sagemaker.parameter.CategoricalParameter([32, 64, 128, 256]),
    "backbone-activation": sagemaker.parameter.CategoricalParameter(["silu", "relu", "tanh", "gelu", "lecun"]),
    "backbone-dropout": sagemaker.parameter.ContinuousParameter(0, 0.5),
    "hidden-size": sagemaker.parameter.CategoricalParameter([32, 64, 128, 256]),
    "lr": sagemaker.parameter.ContinuousParameter(0.0001, 0.01),
    "lr-decay": sagemaker.parameter.ContinuousParameter(0.9, 1.0),
    "batch-size": sagemaker.parameter.CategoricalParameter([32, 64, 128, 256]),
    "epochs": sagemaker.parameter.IntegerParameter(20, 200),
}

We use the validation mean absolute error (MAE) as the objective to be minimized.

In [None]:
objective_metric_name = "valid_mae"

In [None]:
objective_type = "Minimize"

### C. Run a model tuning job

Make sure to detach the pre-trained model before tuning, otherwise the algorithm will keep using the same hyperparameters with the exception of the batch size, learning rate decay factor and number of epochs.

In [None]:
estimator.model_uri = None

In the interest of time, we run the tuner only for a few iterations. 

In [None]:
tuner = sagemaker.tuner.HyperparameterTuner(
    estimator=estimator,
    base_tuning_job_name="lnn-tuning",
    objective_metric_name=objective_metric_name,
    objective_type=objective_type,
    hyperparameter_ranges=hyperparameter_ranges,
    max_jobs=4,
    max_parallel_jobs=4,
    random_seed=100,
)

In [None]:
tuner.fit({"training": training_data, "validation": validation_data})

In [None]:
tuner.analytics().dataframe().sort_values(by="FinalObjectiveValue", ascending=True, ignore_index=True)

In [None]:
tuning_job_result = sagemaker_session.sagemaker_client.describe_hyper_parameter_tuning_job(
    HyperParameterTuningJobName=tuner.latest_tuning_job.name
)

In [None]:
print("Best hyperparameters:")
tuning_job_result["BestTrainingJob"]["TunedHyperParameters"]

In [None]:
print("Best score:")
tuning_job_result["BestTrainingJob"]["FinalHyperParameterTuningJobObjectiveMetric"]["Value"]

Once you have completed a tuning job (or even while the job is still running), you can [clone and use this notebook](https://github.com/awslabs/amazon-sagemaker-examples/blob/master/hyperparameter_tuning/analyze_results/HPO_Analyze_TuningJob_Results.ipynb) to analyze the results to understand how each hyperparameter effects the quality of the model.

## 6. Perform batch inference

In this section, you will perform batch inference.

In [None]:
transformer = estimator.transformer(
    instance_count=1,
    instance_type=instance_type,
)

In [None]:
transformer.base_transform_job_name = "lnn-transform"

In [None]:
transformer.transform(
    data=test_data,
    content_type="text/csv",
)

In [None]:
batch_predictions = sagemaker_session.read_s3_file(
    bucket=bucket,
    key_prefix=f"{transformer.latest_transform_job.name}/test.csv.out"
)

batch_predictions = pd.read_csv(io.StringIO(batch_predictions), dtype=float)

In [None]:
batch_predictions.shape

In [None]:
batch_predictions.head()

In [None]:
batch_predictions.tail()

In [None]:
batch_predictions.isna().sum(axis=0)

In [None]:
np.all(np.isclose(batch_predictions.dropna().values, real_time_predictions.dropna().values))

In [None]:
fig, axs = plt.subplots(nrows=len([c for c in test_dataset.columns if c.startswith("y")]), ncols=1, sharex=True, figsize=(6, 5))
for i, c in enumerate([c for c in test_dataset.columns if c.startswith("y")]):
    axs[i].plot(test_dataset.index, test_dataset[c].values, color="#7f8ea3", lw=1, label="Actual" if i == 0 else None)
    axs[i].plot(batch_predictions.index, batch_predictions[f"{c}_mean"].values, color="#009ad3", lw=1, label="Predicted" if i == 0 else None)
    axs[i].fill_between(batch_predictions.index, batch_predictions[f"{c}_mean"].values + batch_predictions[f"{c}_std"].values, batch_predictions[f"{c}_mean"].values - batch_predictions[f"{c}_std"].values, color="#009ad3", alpha=0.2, lw=1, label="Predicted +/- 1 Std. Dev." if i == 0 else None)
    axs[i].fill_between(batch_predictions.index, batch_predictions[f"{c}_mean"].values + 2 * batch_predictions[f"{c}_std"].values, batch_predictions[f"{c}_mean"].values - 2 * batch_predictions[f"{c}_std"].values, color="#009ad3", alpha=0.1, lw=1, label="Predicted +/- 2 Std. Dev." if i == 0 else None)
    axs[i].set_title(c, size=10)
    axs[i].set(xlabel="Time", ylabel="Value")
    axs[i].xaxis.set_tick_params(labelbottom=True)
    axs[i].tick_params(axis="both", which="major", labelsize=7)
    axs[i].tick_params(axis="both", which="minor", labelsize=7)
fig.suptitle("Batch Predictions on Test Dataset")
fig.legend(bbox_to_anchor=(1, 0, 0.4, 1), frameon=False)
fig.tight_layout()
fig.show()

In [None]:
batch_results = sagemaker_session.upload_string_as_file_body(
    body=batch_predictions.to_csv(index=False),
    bucket=bucket,
    key="data/inference/output/batch/batch_predictions.csv"
)

In [None]:
batch_results

## 7. Clean-up

### A. Delete the model

In [None]:
transformer.delete_model()

### B. Unsubscribe to the listing (optional)

If you would like to unsubscribe to the algorithm, follow these steps. Before you cancel the subscription, ensure that you do not have any [deployable model](https://console.aws.amazon.com/sagemaker/home#/models) created from the model package or using the algorithm. Note - You can find this information by looking at the container name associated with the model. 

**Steps to unsubscribe to product from AWS Marketplace**:
1. Navigate to __Machine Learning__ tab on [__Your Software subscriptions page__](https://aws.amazon.com/marketplace/ai/library?productType=ml&ref_=mlmp_gitdemo_indust)
2. Locate the listing that you want to cancel the subscription for, and then choose __Cancel Subscription__  to cancel the subscription.

