# Import packages

In [None]:
import numpy as np
import pandas as pd
import optuna
import xgboost as xgb
from optuna.integration import XGBoostPruningCallback

In [None]:
num_act = ...  # Number of activity labels
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load data

In [None]:
X_train = ...  # Encoded prefixes from the training set
y_train = ...  # Label indices (next activity) from the training set

X_val = ...    # Encoded prefixes from the validation set
y_val = ...    # Label indices (next activity) from the validation set

# Hyperparameter tuning

In [None]:
def objective(trial):
    
    param = {
        'objective': 'multi:softprob',
        'eval_metric': 'mlogloss',
        'num_class': num_act,
        'max_depth': trial.suggest_int('max_depth', 3, 12),
        'min_child_weight': trial.suggest_float('min_child_weight', 1, 10.0),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'lambda': trial.suggest_float('lambda', 0.1, 10.0),
        'eta': trial.suggest_float('eta', 0.01, 0.3, log=True)
    }

    dtrain = xgb.DMatrix(data=X_train, label=y_train)
    dvalid = xgb.DMatrix(data=X_val, label=y_val)

    pruning_callback = optuna.integration.XGBoostPruningCallback(trial, 'validation-mlogloss')

    model = xgb.train(param, 
                      dtrain, 
                      evals=[(dvalid, 'validation')], 
                      num_boost_round = 100000, 
                      early_stopping_rounds=20, 
                      callbacks=[pruning_callback],
                      verbose_eval=False)

    return model.best_score

In [None]:
study = optuna.create_study(direction='minimize', sampler=optuna.samplers.TPESampler(seed=7))
study.optimize(objective, n_trials=100, show_progress_bar=True)

In [None]:
best_params = study.best_params
best_loss = study.best_value
print("Best Hyperparameters: ", best_params)
print("Best loss: ", best_loss)

# Retrain the model with best hyperparameters

In [None]:
param = {
        'objective': 'multi:softprob',
        'eval_metric': 'mlogloss',
        'num_class': num_act,
        'max_depth': ...,
        'min_child_weight': ...,
        'subsample': ...,
        'colsample_bytree': ...,
        'lambda': ...,
        'eta': ...
    }

dtrain = xgb.DMatrix(data=X_train, label=y_train)
dvalid = xgb.DMatrix(data=X_val, label=y_val)

model = xgb.train(param, 
                   dtrain, 
                   evals=[(dtrain, 'train'), (dvalid, 'validation')], 
                   num_boost_round = 100000,
                   early_stopping_rounds=20,
                   verbose_eval=10)