In [1]:
import os
from models import AdmmSlim
from datasets import BaseDataset, ValidDataset
import numpy as np
import pandas as pd

In [2]:
import optuna
from optuna import Trial
from optuna.samplers import TPESampler

In [3]:
import torch
import random

def set_seed(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    # some cudnn methods can be random even after fixing the seed
    # unless you tell it to be deterministic
    torch.backends.cudnn.deterministic = True
    
seed = 10
set_seed(seed)

In [4]:
train_dataset = BaseDataset(path = '../data/') # args.path = '../data/'
valid_dataset = ValidDataset(train_dataset = train_dataset)

train_X = train_dataset.train_input_data
valid_X = valid_dataset.valid_input_data

Creating interaction Train/ Vaild Split...
Train/Vaild Split Complete. Takes in 19.75568699836731 sec


In [5]:
# def get_score_recall(model, train_X, valid_X):
#     n_users = train_X.shape[0]
#     y_predict = model.predict(train_X)
#     unseen_predict = y_predict*(1-train_X)
#     top_items = np.argsort(unseen_predict, axis=1)[:, -10:]

#     recall = 0.0
#     for ans_items, sol_items in zip(valid_X, top_items):
#         ans_set = set(ans_items)
#         sol_set = set(sol_items)
#         denominator = min(10, len(ans_items))
#         numerator = len(ans_set.intersection(sol_set))
#         recall += numerator/denominator
    
#     recall /= n_users
#     return recall

def get_score_recall(model, train_X, valid_X):
    y_predict = model.predict(train_X)
    unseen_predict = y_predict*(1-train_X)
    top_items = np.argsort(unseen_predict, axis=1)[:, -10:]

    solutions = np.zeros_like(valid_X)
    solutions[np.arange(len(solutions))[:, None], top_items] = 1

    numerator = (solutions*valid_X).sum(1)
    denominator = valid_X.sum(1)
    recall = numerator/denominator
    return recall.mean()

In [6]:
def objective(trial: Trial) -> float:
    params_lgb = {
        # "random_state": 10,
        "verbose": True,
        # "learning_rate": 0.05,
        # "n_estimators": 10000,
        # "objective": "multiclass",
        # "metric": "multi_logloss",
        "lambda_1": trial.suggest_int("lambda_1", 0, 50),
        "lambda_2": trial.suggest_int("lambda_2", 300, 700),
        "rho": 10000, #trial.suggest_int("rho", 1, 20),
        "n_iter": 50, #trial.suggest_int("n_iter", 2, 256),
        "eps_rel": trial.suggest_float("eps_rel", 1e-5, 1e-3), #1e-4
        "eps_abs": trial.suggest_float("eps_abs", 1e-4, 1e-2), #1e-3
    }
    model = AdmmSlim(
        **params_lgb
    )
    model.fit(train_X)
    
    log_score = get_score_recall(model, train_X, valid_X)
    
    return log_score

In [7]:
sampler = TPESampler(seed=10)

study = optuna.create_study(
    study_name="ADMM_SLIM",
    direction="maximize",
    sampler=sampler,
)

study.optimize(objective, n_trials=10)
print("Best Score:", study.best_value)
print("Best trial:", study.best_trial.params)


[32m[I 2022-04-12 06:06:48,492][0m A new study created in memory with name: ADMM_SLIM[0m


 --- init
 --- iteration start.


100%|██████████| 50/50 [02:14<00:00,  2.69s/it]
[32m[I 2022-04-12 06:09:31,160][0m Trial 0 finished with value: 0.15099763870239258 and parameters: {'lambda_1': 39, 'lambda_2': 308, 'eps_rel': 0.0006373117525770126, 'eps_abs': 0.007513158437132258}. Best is trial 0 with value: 0.15099763870239258.[0m


 --- init
 --- iteration start.


100%|██████████| 50/50 [02:13<00:00,  2.67s/it]
[32m[I 2022-04-12 06:12:14,618][0m Trial 1 finished with value: 0.1517079621553421 and parameters: {'lambda_1': 25, 'lambda_2': 390, 'eps_rel': 0.00020608223611202773, 'eps_abs': 0.0076292540507696925}. Best is trial 1 with value: 0.1517079621553421.[0m


 --- init
 --- iteration start.


100%|██████████| 50/50 [02:14<00:00,  2.69s/it]
[32m[I 2022-04-12 06:15:00,611][0m Trial 2 finished with value: 0.15292716026306152 and parameters: {'lambda_1': 8, 'lambda_2': 335, 'eps_rel': 0.0006885062201841193, 'eps_abs': 0.009538594127329872}. Best is trial 2 with value: 0.15292716026306152.[0m


 --- init
 --- iteration start.


100%|██████████| 50/50 [02:02<00:00,  2.45s/it]
[32m[I 2022-04-12 06:17:39,885][0m Trial 3 finished with value: 0.14910277724266052 and parameters: {'lambda_1': 0, 'lambda_2': 505, 'eps_rel': 0.0008144947520355924, 'eps_abs': 0.006164008061610943}. Best is trial 2 with value: 0.15292716026306152.[0m


 --- init
 --- iteration start.


100%|██████████| 50/50 [02:14<00:00,  2.68s/it]
[32m[I 2022-04-12 06:20:22,280][0m Trial 4 finished with value: 0.15095195174217224 and parameters: {'lambda_1': 36, 'lambda_2': 417, 'eps_rel': 0.000918596381287814, 'eps_abs': 0.007174300255637137}. Best is trial 2 with value: 0.15292716026306152.[0m


 --- init
 --- iteration start.


100%|██████████| 50/50 [02:13<00:00,  2.67s/it]
[32m[I 2022-04-12 06:23:05,368][0m Trial 5 finished with value: 0.1516708880662918 and parameters: {'lambda_1': 27, 'lambda_2': 357, 'eps_rel': 0.00037960735245095456, 'eps_abs': 0.006773922789156819}. Best is trial 2 with value: 0.15292716026306152.[0m


 --- init
 --- iteration start.


100%|██████████| 50/50 [02:13<00:00,  2.66s/it]
[32m[I 2022-04-12 06:25:49,302][0m Trial 6 finished with value: 0.1517740786075592 and parameters: {'lambda_1': 22, 'lambda_2': 474, 'eps_rel': 0.0006215893086846241, 'eps_abs': 0.005180068601288471}. Best is trial 2 with value: 0.15292716026306152.[0m


 --- init
 --- iteration start.


100%|██████████| 50/50 [02:16<00:00,  2.72s/it]
[32m[I 2022-04-12 06:28:34,459][0m Trial 7 finished with value: 0.1508912742137909 and parameters: {'lambda_1': 33, 'lambda_2': 541, 'eps_rel': 0.000807170964864419, 'eps_abs': 0.005264306808696978}. Best is trial 2 with value: 0.15292716026306152.[0m


 --- init
 --- iteration start.


100%|██████████| 50/50 [02:12<00:00,  2.66s/it]
[32m[I 2022-04-12 06:31:15,624][0m Trial 8 finished with value: 0.1502230316400528 and parameters: {'lambda_1': 46, 'lambda_2': 428, 'eps_rel': 9.95547557781983e-05, 'eps_abs': 0.0030769305606984133}. Best is trial 2 with value: 0.15292716026306152.[0m


 --- init
 --- iteration start.


100%|██████████| 50/50 [02:15<00:00,  2.70s/it]
[32m[I 2022-04-12 06:34:02,226][0m Trial 9 finished with value: 0.15250138938426971 and parameters: {'lambda_1': 5, 'lambda_2': 632, 'eps_rel': 5.642735619535727e-05, 'eps_abs': 0.0063002427682827865}. Best is trial 2 with value: 0.15292716026306152.[0m


Best Score: 0.15292716026306152
Best trial: {'lambda_1': 8, 'lambda_2': 335, 'eps_rel': 0.0006885062201841193, 'eps_abs': 0.009538594127329872}
