# Tuning Darts LSTM model

In [None]:
import logging
import warnings
from pathlib import Path
from datetime import datetime

import numpy as np
import pandas as pd
import optuna
from darts import TimeSeries
from darts import metrics
from darts.dataprocessing.transformers import Scaler
from pytorch_lightning.utilities.warnings import PossibleUserWarning

import matplotlib.pyplot as plt
import optuna.visualization as vis

from src.resrnn import ResRNNModel

In [None]:
# disable warnings from PyTorch Lightning (false positive) and lower level of logging
logging.getLogger("pytorch_lightning").setLevel(logging.ERROR)
warnings.filterwarnings("ignore", category=PossibleUserWarning)

## Notebook parameters

The following cell contains the parameters of the notebook, that are changed by [papermill](https://github.com/nteract/papermill) when running non-interactively (on a larger scale):

- `resultdir`, the folder where to save results,
- `material`, which material to use,
- `sample`, which sample to use,
- `train_ratio`, fraction of data to set aside for training (vs. test),
- `val_ratio`; fraction of training data to set aside for validation
- `n_trials`, the total number of models  fitted by Optuna,
- `n_epochs_max`, maximum number of epochs to train one model,
- `seed`, random generator seed,
- `use_tpe`, use TPE estimator (or BoTorch estimator if False).

In [None]:
resultsdir = f"../results/tune_darts_lstm_{datetime.now():%Y%m%dT%H%M}"
material = "G4"
sample = "S2"
train_ratio = 0.4
val_ratio = 0.10
n_trials = 5
n_epochs_max = 100
seed = 42
use_tpe = True

## Data preparation

In [None]:
dset = pd.read_csv("../results/dataset_minutes.csv")
dset

In [None]:
def prepare_col(dset, col):
    serie = TimeSeries.from_dataframe(dset, time_col="Time [min]", value_cols=col)
    return serie.astype(np.float32)

In [None]:
dset_sample = dset[(dset["material"] == material) & (dset["sample"] == sample)]
serie_wear = prepare_col(dset_sample, "Wear Loss [mm]")
serie_idx = prepare_col(dset_sample, "Time [min]")
serie_train, serie_test = serie_wear.split_after(train_ratio)

In [None]:
_, ax = plt.subplots(figsize=(7, 4))
serie_train.plot(label="train", ax=ax)
serie_test.plot(label="test", ax=ax)
_ = ax.set_ylabel("Wear Loss [mm]")

In [None]:
scaler = Scaler()
serie_train_scaled, serie_idx_scaled = scaler.fit_transform([serie_train, serie_idx])

## Model tuning

In [None]:
resultspath = Path(resultsdir)
resultspath.mkdir(exist_ok=True, parents=True)

In [None]:
def objective(trial):
    workdir = resultspath / f"trial_{trial.number}"
    workdir.mkdir(exist_ok=True, parents=True)

    train_len = int(len(serie_train_scaled) * (1.0 - val_ratio))
    warmup = trial.suggest_int("warmup", 10, train_len - 1)

    n_rnn_layers = trial.suggest_int("n_rnn_layers", 1, 3)
    if n_rnn_layers > 1:
        dropout = trial.suggest_float("dropout", 0, 0.5)
    else:
        dropout = 0.0

    model = ResRNNModel(
        model="LSTM",
        input_chunk_length=warmup,
        training_length=warmup,
        hidden_dim=trial.suggest_int("hidden_dim", 16, 128),
        n_rnn_layers=n_rnn_layers,
        dropout=dropout,
        batch_size=trial.suggest_int("batch_size", 1, 64),
        n_epochs=trial.suggest_int("n_epochs", 20, n_epochs_max, step=10),
        optimizer_kwargs={"lr": trial.suggest_float("lr", 1e-5, 1e-2, log=True)},
        random_state=seed,
        force_reset=True,
        log_tensorboard=True,
        pl_trainer_kwargs={
            "accelerator": "gpu",
            "devices": 1,
            "enable_progress_bar": False,
        },
        show_warnings=True,
        work_dir=resultspath,
        model_name=f"trial_{trial.number}",
    )

    model.fit(
        series=serie_train_scaled[:train_len],
        val_series=serie_train_scaled[train_len - warmup :],
        future_covariates=serie_idx_scaled,
        val_future_covariates=serie_idx_scaled,
    )

    val_len = len(serie_train_scaled) - train_len
    preds = model.predict(val_len, future_covariates=serie_idx_scaled)

    return metrics.mae(serie_train_scaled[-1], preds[-1])

In [None]:
storage = f"sqlite:///{resultspath.resolve() / 'study.db'}"

if use_tpe:
    sampler = optuna.samplers.TPESampler(seed=seed)
else:
    sampler = optuna.integration.BoTorchSampler(seed=seed)

study = optuna.create_study(
    direction="minimize",
    study_name="darts_lstm",
    storage=storage,
    load_if_exists=True,
    sampler=sampler,
)

n_runs = max(0, n_trials - len(study.get_trials()))
study.optimize(objective, n_trials=n_runs)

In [None]:
study.best_params

In [None]:
vis.plot_parallel_coordinate(study)

## Model training

In [None]:
model = ResRNNModel(
    model="LSTM",
    input_chunk_length=study.best_params["warmup"],
    training_length=study.best_params["warmup"],
    hidden_dim=study.best_params["hidden_dim"],
    n_rnn_layers=study.best_params["n_rnn_layers"],
    dropout=study.best_params.get("dropout", 0),
    batch_size=study.best_params["batch_size"],
    n_epochs=study.best_params["n_epochs"],
    optimizer_kwargs={"lr": study.best_params["lr"]},
    random_state=seed,
    force_reset=True,
    log_tensorboard=True,
    pl_trainer_kwargs={
        "accelerator": "gpu",
        "devices": 1,
        "enable_progress_bar": False,
    },
    show_warnings=True,
    work_dir=resultspath,
    model_name="model",
)

In [None]:
%%time
model.fit(serie_train_scaled, future_covariates=serie_idx_scaled)

In [None]:
model.save(str(resultspath / "model.pt"))

## Predictions

In [None]:
serie_forecast_scaled = model.predict(
    len(serie_test), future_covariates=serie_idx_scaled
)
serie_forecast = scaler.inverse_transform(serie_forecast_scaled)

In [None]:
fig, ax = plt.subplots(figsize=(7, 4))
serie_train.plot(label="train", ax=ax)
serie_test.plot(label="test", ax=ax)
serie_forecast.plot(label="predictions", ax=ax)
_ = ax.set_ylabel("Wear Loss [mm]")

In [None]:
pd_serie_forecast = serie_forecast.pd_series().rename("forecast")
dset_forecast = dset_sample.set_index("Time [min]").join(pd_serie_forecast)
dset_forecast.to_csv(resultspath / "predictions.csv")

In [None]:
fig.savefig(resultspath / "predictions.png", bbox_inches="tight")

In [None]:
scores_functions = {
    "MAE": metrics.mae,
    "RMSE": metrics.rmse,
    "MAPE": metrics.mape,
}

In [None]:
scores = {}
for key, func in scores_functions.items():
    scores[f"{key}_end"] = func(serie_test[-1], serie_forecast[-1])
    scores[f"{key}_avg"] = func(serie_test, serie_forecast)

scores = (
    pd.DataFrame.from_dict(scores, orient="index", columns=("value",))
    .reset_index()
    .rename(columns={"index": "metric"})
)

In [None]:
scores

In [None]:
scores.to_csv(resultspath / "scores.csv", index=False)