# 17445 Fall 2024 Homework 3: Tools for Production ML Systems

## Ray Tune - Hyperparameter Optimization and Experimentation

## Demo by: Derek Zhu

The following demo was designed and tested using Google Colab.

In [None]:
# Install Surprise and Ray Tune
!pip install scikit-surprise
!pip install "ray[tune]"

In [2]:
# Import libraries
import pandas as pd
import json
from surprise import Reader, Dataset, SVD, accuracy
from surprise.model_selection import cross_validate, train_test_split
from ray import train, tune
from ray.train import RunConfig
from ray.tune.schedulers import ASHAScheduler

In [3]:
# Load and pre-process data
users_df = pd.read_csv('users.csv').dropna()
user_watched_df = pd.read_csv('user_watched.csv').dropna()
movies_df = pd.read_csv('movies.csv').dropna()

merged_df = user_watched_df.merge(users_df, on = 'user_id').merge(movies_df, left_on = 'movie_id', right_on = 'id')
merged_df = merged_df[merged_df['rating'] != -1]

# Prepare data for SVD model
reader = Reader(rating_scale = (1, 5))
data = Dataset.load_from_df(merged_df[['user_id', 'id', 'rating']], reader)
train_data, test_data = train_test_split(data, test_size = 0.2, random_state = 2314)

In [4]:
# Train the model based on hyperparameters in config and output metrics to optimize on
def objective(config):
  svd = SVD(n_factors = config["n_factors"], n_epochs = config["n_epochs"],
            lr_all = config["lr_all"], reg_all = config["reg_all"])
  svd.fit(train_data)

  train_predictions = svd.test(train_data.all_ratings())
  train_rmse = accuracy.rmse(train_predictions)
  predictions = svd.test(test_data)
  rmse = accuracy.rmse(predictions)
  rmse_difference = train_rmse - rmse

  return {"train_rmse": train_rmse, "rmse": rmse, "rmse_difference": rmse_difference}

# Define the search space for hyperparameters of SVD model
search_space = {
    "n_factors": tune.grid_search([50, 100, 200]),     # default 100
    "n_epochs": tune.grid_search([10, 20, 40]),        # default 20
    "lr_all": tune.grid_search([0.0025, 0.005, 0.01]), # default 0.005
    "reg_all": tune.grid_search([0.01, 0.02, 0.04])    # default 0.02
}

# Initialize Tuner
# TuneConfig: Use ASHA scheduling to abandon trials underperforming on rmse
# RunConfig: Set file path to store results
tuner = tune.Tuner(objective, param_space = search_space,
                   tune_config = tune.TuneConfig(scheduler = ASHAScheduler(), metric = "rmse", mode = "min"),
                   run_config = RunConfig(storage_path = "/content/results", name = "demo_experiment"))

# Conduct trials defined by the search_space and print the best model based on minimum rmse
results = tuner.fit()
print("Lowest RMSE Model:", results.get_best_result(metric = "rmse", mode = "min").config)

2024-11-03 21:48:39,728	INFO worker.py:1816 -- Started a local Ray instance.
2024-11-03 21:48:41,414	INFO tune.py:253 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.


+----------------------------------------------------------+
| Configuration for experiment     demo_experiment         |
+----------------------------------------------------------+
| Search algorithm                 BasicVariantGenerator   |
| Scheduler                        AsyncHyperBandScheduler |
| Number of trials                 81                      |
+----------------------------------------------------------+

View detailed results here: /content/results/demo_experiment
To visualize your results with TensorBoard, run: `tensorboard --logdir /tmp/ray/session_2024-11-03_21-48-36_244092_570/artifacts/2024-11-03_21-48-41/demo_experiment/driver_artifacts`

Trial status: 81 PENDING
Current time: 2024-11-03 21:48:53. Total running time: 7s
Logical resource usage: 2.0/2 CPUs, 0/0 GPUs
+------------------------------------------------------------------------------------+
| Trial name              status       n_factors     n_epochs     lr_all     reg_all |
+------------------------

2024-11-03 21:53:33,300	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/content/results/demo_experiment' in 0.0493s.



Trial objective_64145_00080 completed after 1 iterations at 2024-11-03 21:53:33. Total running time: 4min 47s
+------------------------------------------------+
| Trial objective_64145_00080 result             |
+------------------------------------------------+
| checkpoint_dir_name                            |
| time_this_iter_s                        0.6359 |
| time_total_s                            0.6359 |
| training_iteration                           1 |
| rmse                                   0.84795 |
| rmse_difference                        0.05381 |
| train_rmse                             0.90176 |
+------------------------------------------------+

Trial status: 81 TERMINATED
Current time: 2024-11-03 21:53:33. Total running time: 4min 47s
Logical resource usage: 1.0/2 CPUs, 0/0 GPUs
Current best trial: 64145_00005 with rmse=0.8381768107819348 and params={'n_factors': 50, 'n_epochs': 20, 'lr_all': 0.01, 'reg_all': 0.01}
+--------------------------------------------------

In [5]:
# Set path to params.json and result.json file corresponding to experiment
params_path = '/content/results/demo_experiment/objective_64145_00000_0_lr_all=0.0025,n_epochs=10,n_factors=50,reg_all=0.0100_2024-11-03_21-48-49/params.json'
result_path = '/content/results/demo_experiment/objective_64145_00000_0_lr_all=0.0025,n_epochs=10,n_factors=50,reg_all=0.0100_2024-11-03_21-48-49/result.json'

# Load JSON files to analyze experiments
with open(params_path, 'r') as file:
    params_data = json.load(file)
print(json.dumps(params_data, indent = 4))

with open(result_path, 'r') as file:
    result_data = json.load(file)
print(json.dumps(result_data, indent = 4))

{
    "lr_all": 0.0025,
    "n_epochs": 10,
    "n_factors": 50,
    "reg_all": 0.01
}
{
    "train_rmse": 0.900504084675624,
    "rmse": 0.8559254541394724,
    "rmse_difference": 0.04457863053615163,
    "timestamp": 1730670536,
    "checkpoint_dir_name": null,
    "done": false,
    "training_iteration": 1,
    "trial_id": "64145_00000",
    "date": "2024-11-03_21-48-56",
    "time_this_iter_s": 0.2460336685180664,
    "time_total_s": 0.2460336685180664,
    "pid": 1718,
    "hostname": "9e4a9774ff74",
    "node_ip": "172.28.0.12",
    "config": {
        "n_factors": 50,
        "n_epochs": 10,
        "lr_all": 0.0025,
        "reg_all": 0.01
    },
    "time_since_restore": 0.2460336685180664,
    "iterations_since_restore": 1
}
