In [1]:
import os
from models import AdmmSlim
from datasets import BaseDataset, ValidDataset
import numpy as np
import pandas as pd

In [2]:
import torch
import random

def set_seed(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    # some cudnn methods can be random even after fixing the seed
    # unless you tell it to be deterministic
    torch.backends.cudnn.deterministic = True
    
seed = 10
set_seed(seed)

In [3]:
train_dataset = BaseDataset(path = '../data/') # args.path = '../data/'
valid_dataset = ValidDataset(train_dataset = train_dataset)

train_X = train_dataset.train_input_data
valid_X = valid_dataset.valid_input_data

Creating interaction Train/ Vaild Split...
Train/Vaild Split Complete. Takes in 20.93163776397705 sec


In [4]:
def get_score_recall(model, train_X, valid_X):
    y_predict = model.predict(train_X)
    unseen_predict = y_predict*(1-train_X)
    top_items = np.argsort(unseen_predict, axis=1)[:, -10:]

    solutions = np.zeros_like(valid_X)
    solutions[np.arange(len(solutions))[:, None], top_items] = 1

    numerator = (solutions*valid_X).sum(1)
    denominator = valid_X.sum(1)
    recall = numerator/denominator
    return recall.mean()

In [5]:
# params
lambda_1 = 1
lambda_2 = 500
rho = 10000
n_iter = 100
eps_rel = 1e-4
eps_abs = 1e-3

In [6]:
model = AdmmSlim(lambda_1 = lambda_1, lambda_2 = lambda_2, rho = rho, n_iter = n_iter, eps_rel = eps_rel, eps_abs = eps_abs, verbose = True)
model.fit(train_X)
log_score = get_score_recall(model, train_X, valid_X)
print(log_score)

 --- init
 --- iteration start.


100%|██████████| 100/100 [04:56<00:00,  2.97s/it]


0.15275733


In [7]:
##### train_all to Submission #####
k = 20
n_iter = 100
lambda_1 = 8
lambda_2 = 335
eps_rel = 0.0006885062201841193
eps_abs = 0.009538594127329872

set_seed(seed)
train_dataset = BaseDataset(path = '../data/', mode='train_all') # args.path = '../data/'
# valid_dataset = ValidDataset(train_dataset = train_dataset)
train_X = train_dataset.train_input_data
# valid_X = valid_dataset.valid_input_data

submission_model = AdmmSlim(lambda_1 = lambda_1, lambda_2 = lambda_2, rho = rho, n_iter = n_iter, eps_rel = eps_rel, eps_abs = eps_abs, verbose = True)
submission_model.fit(train_X)

Preparing interaction all train set
Train/Vaild Split Complete. Takes in 2.217848539352417 sec
 --- init
 --- iteration start.


 62%|██████▏   | 62/100 [03:30<02:29,  3.94s/it]

In [None]:
y_predict = submission_model.predict(train_X)
unseen_predict = y_predict*(1-train_X)

top_items = np.argsort(unseen_predict, axis=1)[:, -k:]

idx2item = train_dataset.item2idx.reset_index(0)
idx2item.columns = ['item', 'item_idx']
idx2user = train_dataset.user2idx.reset_index(0)
idx2user.columns = ['user', 'user_idx']

temp = pd.concat({k: pd.Series(v) for k, v in enumerate(top_items)}).reset_index(0)
temp.columns = ['user_idx', 'item_idx']

temp = temp.merge(idx2user, on='user_idx')
temp = temp.merge(idx2item, on='item_idx')

del temp['user_idx'], temp['item_idx']

output = temp.sort_values('user')
output.index = range(len(output))

output

