# Ray Tune

In [None]:
# Let's import and connect to ray:

import ray
ray.init()
from ray import tune


In [None]:
# checking ray version
ray.__version__

In [None]:
# import PyTorch suite
import torch
import torch.nn as nn

# import Seaborn statistical dataset package
import seaborn as sns
# data processing tools
import numpy as np
import pandas as pd
# plotting
import matplotlib.pyplot as plt
# metrics and 
from sklearn.metrics import max_error
from sklearn.preprocessing import MinMaxScaler
# execution timing, os utils
import time
import os
%matplotlib inline

## We will be using the Flight dataset from Seaborn package for demonstration

In [None]:
flight_data = sns.load_dataset("flights")
flight_data.head()

## Data Exploration, normally a first step in any ML process


Some Data Processing to make it easier for LSTM

In [None]:
all_data = flight_data['passengers'].values.astype(float)
test_data_size = 12
scaler = MinMaxScaler()
all_data_normalized = scaler.fit_transform(all_data .reshape(-1, 1))
train_data_normalized  = all_data_normalized[:-test_data_size]
test_data_normalized = all_data_normalized[-test_data_size:]
train_data_normalized = torch.FloatTensor(train_data_normalized).view(-1)


In [None]:
fig_size = plt.rcParams["figure.figsize"]
fig_size[0] = 15
fig_size[1] = 5
plt.rcParams["figure.figsize"] = fig_size
plt.title('Month vs Passenger')
plt.ylabel('Total Passengers')
plt.xlabel('Months')
plt.grid(True)
plt.autoscale(axis='x',tight=True)
plt.plot(flight_data['passengers'])


## Some data preparations for LSTM Model

In [None]:
# the passenger value is the input feature, we will be predicting the number of passengers. Need to encode the timeseries in to 12 months windows, normalize values etc.
all_data = flight_data['passengers'].values.astype(float)
test_data_size = 12
scaler = MinMaxScaler()
all_data_normalized = scaler.fit_transform(all_data .reshape(-1, 1))
train_data_normalized  = all_data_normalized[:-test_data_size]
test_data_normalized = all_data_normalized[-test_data_size:]

In [None]:
# timeseries sequence splicing. This can be optimized as a generator function based on input stream.
train_data_normalized = torch.FloatTensor(train_data_normalized).view(-1)
train_window = 12
def create_inout_sequences(input_data, tw):
    inout_seq = []
    L = len(input_data)
    for i in range(L-tw):
        train_seq = input_data[i:i+tw]
        train_label = input_data[i+tw:i+tw+1]
        inout_seq.append((train_seq ,train_label))
    return inout_seq
train_inout_seq = create_inout_sequences(train_data_normalized, train_window)



## With data preparatin and data exploration done, we will start construct the model

In [None]:
# the useal PyToch way -- class based model instantiation, define forward function.
class LSTM(nn.Module):
    def __init__(self, input_size=1, hidden_layer_size=100, output_size=1):
        super().__init__()
        self.hidden_layer_size = hidden_layer_size

        self.lstm = nn.LSTM(input_size, hidden_layer_size)

        self.linear = nn.Linear(hidden_layer_size, output_size)

        self.hidden_cell = (torch.zeros(1,1,self.hidden_layer_size),
                            torch.zeros(1,1,self.hidden_layer_size))

    def forward(self, input_seq):
        lstm_out, self.hidden_cell = self.lstm(input_seq.view(len(input_seq) ,1, -1), self.hidden_cell)
        predictions = self.linear(lstm_out.view(len(input_seq), -1))
        return predictions[-1]

### With a model defined, we go into defining the training behavior and testing behavior. In this tutorial, we define a simple trainable, leveraging python Duck Typing.

In [None]:

def train(model, optimizer, loss_function, epochs):
    for i in range(epochs):
        for seq, labels in train_inout_seq:
            optimizer.zero_grad()
            model.hidden_cell = (torch.zeros(1, 1, model.hidden_layer_size),
                        torch.zeros(1, 1, model.hidden_layer_size))
            y_pred = model(seq)
            single_loss = loss_function(y_pred, labels)
            single_loss.backward()
            optimizer.step()
        # optional -- you can add fine grain checkpoints here.
        # below is only pseudo code.
#             with tune.checkpoint_dir(step=epoch) as checkpoint_dir:
#                 path = os.path.join(checkpoint_dir, "checkpoint")
#                 torch.save(
#                     (model.state_dict(), optimizer.state_dict()), path)
    print(f'epoch: {i:3} loss: {single_loss.item():10.10f}')
def test(model, test_input, truth):
    model.eval()
    for i in range(train_window):
        seq = torch.FloatTensor(test_input[-train_window:])
        with torch.no_grad():
            model.hidden = (torch.zeros(1, 1, model.hidden_layer_size),
                        torch.zeros(1, 1, model.hidden_layer_size))
            test_input.append(model(seq).item())
    return max_error(test_input[train_window:],test_data_normalized)


In [None]:
## Orchestrate behaves like a Trainable, taking in a config, then execute the train and test, remit evaluation metrics back to the main train function/actor.
def orchestrate(config):  
    epochs = 50
    model = LSTM()
    loss_function = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"])
    train(model, optimizer, loss_function, epochs)
    test_inputs = train_data_normalized[-train_window:].tolist()
    
    error = test(model, test_inputs, test_data_normalized)
    tune.report(error = error)
    # This saves the model to the trial directory
    torch.save(model.state_dict(), "./model.pth")
    print(error)

In [None]:
## Define the parameter search space, kick off the training.
search_space = {
    "lr":  tune.grid_search([0.1,0.01,0.001,0.0001]),
}

analysis = tune.run(orchestrate, config=search_space)

In [None]:
# Example of how to run with trial scheduler. This example won't run, because the model is not reporting a mean accuracy
# from ray.tune.schedulers import   ASHAScheduler
# # AsyncHyperBandScheduler

# analysis = tune.run(
#     orchestrate,
#     num_samples=20,
#     scheduler=ASHAScheduler(metric="mean_accuracy", mode="max"),
#     config=search_space)

# Example of how to run the training with fine grain control on distribution and how much resource to allocate per trial.
# tune.run(trainable, num_samples=100, resources_per_trial=tune.PlacementGroupFactory([{"CPU": 2, "GPU": 1}]))

# We do have some Hyperparameter selection algoriths
# from ray.tune.suggest.hyperopt import HyperOptSearch
# tune.run(my_function, search_alg=HyperOptSearch(...))
# https://docs.ray.io/en/latest/tune/api_docs/suggestion.html


### Let's look at the diagram again

### Let's now look at the training result, and analyze the outputs

In [None]:
print("Best config: ", analysis.get_best_config(
    metric="error", mode="min"))


In [None]:
best_trial = analysis.get_best_trial("error", "min", "last")
print(best_trial)

In [None]:
df = analysis.results_df
df.head()

In [None]:
# Depends on whether your model takes in hyper parameters or not.
# the trial specific hyper perameter can be retrieved via below
# best_trial.config["l1"], best_trial.config["l2"]
## Construct the model
best_model = LSTM() 

logdir = best_trial.logdir
state_dict = torch.load(os.path.join(logdir, "model.pth"))
best_model.load_state_dict(state_dict)


### Alternatively, if you have enabled checkpointing, you can load more models from checkpoint dir

In [None]:

## construct checkpoint location, load from checkpoint
# checkpoint_path = os.path.join(best_trial.checkpoint.value, "checkpoint")
# model_state, optimizer_state = torch.load(checkpoint_path)
# best_trained_model.load_state_dict(model_state)


In [None]:
best_model.eval()
fut_pred = 12
test_inputs = train_data_normalized[-train_window:].tolist()
for i in range(fut_pred):
    seq = torch.FloatTensor(test_inputs[-train_window:])
    with torch.no_grad():
        best_model.hidden = (torch.zeros(1, 1, best_model.hidden_layer_size),
                        torch.zeros(1, 1, best_model.hidden_layer_size))
        test_inputs.append(best_model(seq).item())

actual_predictions = scaler.inverse_transform(np.array(test_inputs[train_window:] ).reshape(-1, 1))

print(actual_predictions)

In [None]:
x = np.arange(132, 144, 1)

plt.title('Month vs Passenger')
plt.ylabel('Total Passengers')
plt.grid(True)
plt.autoscale(axis='x', tight=True)
plt.plot(flight_data['passengers'])
plt.plot(x,actual_predictions)
plt.show()

In [None]:
plt.title('Month vs Passenger')
plt.ylabel('Total Passengers')
plt.grid(True)
plt.autoscale(axis='x', tight=True)

plt.plot(flight_data['passengers'][-train_window:])
plt.plot(x,actual_predictions)
plt.show()