# Hyper-parameter Tuning with Ray-Tune Tutorial
This tutorial helps you tune hyper-parameters (e.g., learning rate, batch size, number of latent dimensions, etc) in BEMB.

We will be using the `ray` library, which enables parallelization, to tune hyper-parameters.

For more details regarding using Ray to tune hyper-parameters of models (especially PyTorch lightning models), please refer to this [tutorial](https://docs.ray.io/en/latest/ray-core/examples/using-ray-with-pytorch-lightning.html).

Author: Tianyu Du
Date: July. 27, 2022

In [1]:
import argparse
import os
import time
from datetime import datetime


from typing import List
import numpy as np
import pandas as pd
import torch
from torch_choice.data import ChoiceDataset
from bemb.model import LitBEMBFlex
from bemb.utils.run_helper import run
import matplotlib.pyplot as plt
import seaborn as sns

from torch.utils.data import DataLoader

from bemb.model import LitBEMBFlex

import numpy as np
import pandas as pd
import pytorch_lightning as pl
import torch
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger
from ray import tune
from ray.tune import CLIReporter
from ray.tune.integration.pytorch_lightning import TuneReportCallback
from ray.tune.schedulers import ASHAScheduler



The idea behind tuning hyper-parameters is to find the best configuration of the model among a space of hyper-parameters.

## Simulate Datasets

I use the simulated datasets from the *simulation* tutorial (refer to the `tutorials/simulation/simulation.ipynb` notebook).

The `simulate_dataset` method returns a list of three `ChoiceDataset` corresponding to the train/validation/test dataset.

In [2]:
# def simulate_dataset() -> List[ChoiceDataset]:
num_users = 1500
num_items = 50
data_size = 1000
user_index = torch.LongTensor(np.random.choice(num_users, size=data_size))
Us = np.arange(num_users)
Is = np.sin(np.arange(num_users) / num_users * 4 * np.pi)
Is = (Is + 1) / 2 * num_items
Is = Is.astype(int)

PREFERENCE = dict((u, i) for (u, i) in zip(Us, Is))

item_index = torch.LongTensor(np.random.choice(num_items, size=data_size))

for idx in range(data_size):
    if np.random.rand() <= 0.5:
        item_index[idx] = PREFERENCE[int(user_index[idx])]

# df = pd.DataFrame(data={'item': item_index, 'user': user_index}).groupby(['item', 'user']).size().rename('size').reset_index()
# df = df.pivot('item', 'user', 'size').fillna(0.0)

user_obs = torch.zeros(num_users, num_items)
user_obs[torch.arange(num_users), Is] = 1

item_obs = torch.eye(num_items)

dataset = ChoiceDataset(user_index=user_index, item_index=item_index, user_obs=user_obs, item_obs=item_obs)

idx = np.random.permutation(len(dataset))
train_size = int(0.8 * len(dataset))
val_size = int(0.1 * len(dataset))
train_idx = idx[:train_size]
val_idx = idx[train_size: train_size + val_size]
test_idx = idx[train_size + val_size:]

dataset_list = [dataset[train_idx], dataset[val_idx], dataset[test_idx]]
    # return dataset_list

No `session_index` is provided, assume each choice instance is in its own session.


In [3]:
# dataset_list = simulate_dataset()
dataset_list

[ChoiceDataset(label=[], item_index=[800], user_index=[800], session_index=[800], item_availability=[], user_obs=[1500, 50], item_obs=[50, 50], device=cpu),
 ChoiceDataset(label=[], item_index=[100], user_index=[100], session_index=[100], item_availability=[], user_obs=[1500, 50], item_obs=[50, 50], device=cpu),
 ChoiceDataset(label=[], item_index=[100], user_index=[100], session_index=[100], item_availability=[], user_obs=[1500, 50], item_obs=[50, 50], device=cpu)]

## Model

In [7]:
num_samples = 3
num_epochs = 50

In [8]:
callback = TuneReportCallback({'val_loss': 'val_loss', 'val_acc': 'val_acc'}, on='validation_end')

In [9]:
def train_tune(hparams):
    bemb = LitBEMBFlex(
        learning_rate=0.03,  # set the learning rate, feel free to play with different levels.
        pred_item=True,  # let the model predict item_index, don't change this one.
        num_seeds=32,  # number of Monte Carlo samples for estimating the ELBO.
        utility_formula='theta_user * alpha_item',  # the utility formula.
        num_users=num_users,
        num_items=num_items,
        num_user_obs=dataset.user_obs.shape[1],
        num_item_obs=dataset.item_obs.shape[1],
        # whether to turn on obs2prior for each parameter.
        obs2prior_dict={'theta_user': hparams['obs2prior'], 'alpha_item': hparams['obs2prior']},
        # the dimension of latents, since the utility is an inner product of theta and alpha, they should have
        # the same dimension.
        coef_dim_dict={'theta_user': hparams['latent_dim'], 'alpha_item': hparams['latent_dim']}
    )

    trainer = pl.Trainer(
        max_epochs=1,
        check_val_every_n_epoch=1,
        log_every_n_steps=1,
        gpus=0,
        progress_bar_refresh_rate=0,
        auto_lr_find=False,
        logger=TensorBoardLogger(save_dir='./', name='', version='.'),
        callbacks=[callback, EarlyStopping(monitor='val_loss', patience=30, mode='min')])

    # find an appropriate learning rate.
    trainer.tune(bemb,
                 train_dataloaders=DataLoader(dataset_list[0]),
                 val_dataloaders=dataset_list[1],
                 test_dataloaders=dataset_list[2])
    # trainer.fit(bemb)


In [10]:
config = {
    'learning_rate': tune.choice([0.01, 0.03, 0.1, 0.3]),
    'latent_dim': tune.choice([10, 20, 50, 100]),
    'obs2prior': tune.choice([True, False])
}

# scheduler = ASHAScheduler(max_t=num_epochs, grace_period=1, reduction_factor=2)
scheduler = None

reporter = CLIReporter(parameter_columns=list(config.keys()),
                        metric_columns=list(callback._metrics.keys()))

analysis = tune.run(
    tune.with_parameters(train_tune, epochs=num_epochs),
    metric='val_loss',
    mode='min',
    resources_per_trial={'cpu': 4, 'gpu': 0},
    config=config,
    num_samples=num_samples,
    scheduler=scheduler,
    progress_reporter=reporter)


2022-07-27 01:04:14,741	ERROR services.py:1488 -- Failed to start the dashboard: Failed to start the dashboard, return code 1
Failed to read dashboard log: [Errno 2] No such file or directory: '/tmp/ray/session_2022-07-27_01-04-13_590765_58763/logs/dashboard.log'
2022-07-27 01:04:14,742	ERROR services.py:1489 -- Failed to start the dashboard, return code 1
Failed to read dashboard log: [Errno 2] No such file or directory: '/tmp/ray/session_2022-07-27_01-04-13_590765_58763/logs/dashboard.log'
Traceback (most recent call last):
  File "/Users/tianyudu/miniforge3/envs/ml/lib/python3.9/site-packages/ray/_private/services.py", line 1451, in start_dashboard
    with open(dashboard_log, "rb") as f:
FileNotFoundError: [Errno 2] No such file or directory: '/tmp/ray/session_2022-07-27_01-04-13_590765_58763/logs/dashboard.log'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/tianyudu/miniforge3/envs/ml/lib/python3.9/site-packa

: 

: 