In [80]:
import os
from models import AdmmSlim
from datasets import BaseDataset, ValidDataset
import numpy as np
import pandas as pd

In [81]:
import optuna
from optuna import Trial
from optuna.samplers import TPESampler

In [82]:
import torch
import random

def set_seed(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    # some cudnn methods can be random even after fixing the seed
    # unless you tell it to be deterministic
    torch.backends.cudnn.deterministic = True
    
seed = 10
set_seed(seed)

In [83]:
train_dataset = BaseDataset(path = '../data/') # args.path = '../data/'
valid_dataset = ValidDataset(train_dataset = train_dataset)

train_X = train_dataset.train_input_data
valid_X = valid_dataset.valid_input_data

Creating interaction Train/ Vaild Split...


KeyboardInterrupt: 

In [None]:
# def get_score_recall(model, train_X, valid_X):
#     n_users = train_X.shape[0]
#     y_predict = model.predict(train_X)
#     unseen_predict = y_predict*(1-train_X)
#     top_items = np.argsort(unseen_predict, axis=1)[:, -10:]

#     recall = 0.0
#     for ans_items, sol_items in zip(valid_X, top_items):
#         ans_set = set(ans_items)
#         sol_set = set(sol_items)
#         denominator = min(10, len(ans_items))
#         numerator = len(ans_set.intersection(sol_set))
#         recall += numerator/denominator
    
#     recall /= n_users
#     return recall

def get_score_recall(model, train_X, valid_X):
    n_users = train_X.shape[0]
    y_predict = model.predict(train_X)
    unseen_predict = y_predict*(1-train_X)
    top_items = np.argsort(unseen_predict, axis=1)[:, -10:]

    solutions = np.zeros_like(valid_X)
    solutions[np.arange(len(solutions))[:, None], top_items] = 1

    numerator = (solutions*valid_X).sum(1)
    denominator = valid_X.sum(1)
    recall = numerator/denominator
    return recall.mean()

In [None]:
def objective(trial: Trial) -> float:
    params_lgb = {
        # "random_state": 10,
        "verbose": True,
        # "learning_rate": 0.05,
        # "n_estimators": 10000,
        # "objective": "multiclass",
        # "metric": "multi_logloss",
        "lambda_1": trial.suggest_int("lambda_1", 0, 50),
        "lambda_2": trial.suggest_int("lambda_2", 300, 700),
        "rho": 10000, #trial.suggest_int("rho", 1, 20),
        "n_iter": 50, #trial.suggest_int("n_iter", 2, 256),
        "eps_rel": trial.suggest_float("eps_rel", 1e-5, 1e-3), #1e-4
        "eps_abs": trial.suggest_float("eps_abs", 1e-4, 1e-2), #1e-3
    }
    model = AdmmSlim(
        **params_lgb
    )
    model.fit(train_X)
    
    log_score = get_score_recall(model, train_X, valid_X)
    
    return log_score

In [None]:
sampler = TPESampler(seed=10)

study = optuna.create_study(
    study_name="ADMM_SLIM",
    direction="maximize",
    sampler=sampler,
)

study.optimize(objective, n_trials=10)
print("Best Score:", study.best_value)
print("Best trial:", study.best_trial.params)


[32m[I 2022-04-12 05:38:51,794][0m A new study created in memory with name: ADMM_SLIM[0m


 --- init
 --- iteration start.
 --- iteration 1/50
 --- iteration 2/50
 --- iteration 3/50
 --- iteration 4/50
 --- iteration 5/50
 --- iteration 6/50
 --- iteration 7/50
 --- iteration 8/50
 --- iteration 9/50
 --- iteration 10/50
 --- iteration 11/50
 --- iteration 12/50
 --- iteration 13/50
 --- iteration 14/50
 --- iteration 15/50
 --- iteration 16/50
 --- iteration 17/50
 --- iteration 18/50
 --- iteration 19/50
 --- iteration 20/50
 --- iteration 21/50
 --- iteration 22/50
 --- iteration 23/50
 --- iteration 24/50
 --- iteration 25/50
 --- iteration 26/50
 --- iteration 27/50
 --- iteration 28/50
 --- iteration 29/50
 --- iteration 30/50
 --- iteration 31/50
 --- iteration 32/50
 --- iteration 33/50
 --- iteration 34/50
 --- iteration 35/50
 --- iteration 36/50
 --- iteration 37/50
 --- iteration 38/50
 --- iteration 39/50
 --- iteration 40/50
 --- iteration 41/50
 --- iteration 42/50
 --- iteration 43/50
 --- iteration 44/50
 --- iteration 45/50
 --- iteration 46/50
 --- iterat

[32m[I 2022-04-12 05:41:49,394][0m Trial 0 finished with value: 0.0 and parameters: {'lambda_1': 39, 'lambda_2': 308, 'eps_rel': 0.0006373117525770126, 'eps_abs': 0.007513158437132258}. Best is trial 0 with value: 0.0.[0m


 --- init
 --- iteration start.
 --- iteration 1/50
 --- iteration 2/50
 --- iteration 3/50
 --- iteration 4/50
 --- iteration 5/50
 --- iteration 6/50
 --- iteration 7/50
 --- iteration 8/50
 --- iteration 9/50
 --- iteration 10/50
 --- iteration 11/50
 --- iteration 12/50
 --- iteration 13/50
 --- iteration 14/50
 --- iteration 15/50
 --- iteration 16/50


KeyboardInterrupt: 