In [1]:
import math
import numpy as np
import scipy.sparse as sp
import pandas as pd
from tqdm import tqdm
from collections import defaultdict
import os
from copy import deepcopy

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from box import Box

import warnings

warnings.filterwarnings(action='ignore')
torch.set_printoptions(sci_mode=True)

# 1. 학습 설정

In [2]:
config = {
    'data_path' : "/opt/ml/input/data/train" , # 데이터 경로
    'model_path' : "../model",


    'submission_path' : "../submission",
    'submission_name' : 'Ensembel_v7_submission.csv',

    'candidate_item_num' : 50,
    'valid_samples' : 10, # 검증에 사용할 sample 수
    'seed' : 22,
}

device = 'cuda' if torch.cuda.is_available() else 'cpu'

config = Box(config)

# 2. 데이터 전처리

In [3]:
class MakeMatrixDataSet():
    """
    MatrixDataSet 생성
    """
    def __init__(self, config):
        self.config = config
        self.df = pd.read_csv(os.path.join(self.config.data_path, 'train_ratings.csv'))
        
        self.item_encoder, self.item_decoder = self.generate_encoder_decoder('item')
        self.user_encoder, self.user_decoder = self.generate_encoder_decoder('user')
        self.num_item, self.num_user = len(self.item_encoder), len(self.user_encoder)

        self.df['item_idx'] = self.df['item'].apply(lambda x : self.item_encoder[x])
        self.df['user_idx'] = self.df['user'].apply(lambda x : self.user_encoder[x])

        self.user_train, self.user_valid = self.generate_sequence_data()

    def generate_encoder_decoder(self, col : str) -> dict:
        """
        encoder, decoder 생성

        Args:
            col (str): 생성할 columns 명
        Returns:
            dict: 생성된 user encoder, decoder
        """

        encoder = {}
        decoder = {}
        ids = self.df[col].unique()

        for idx, _id in enumerate(ids):
            encoder[_id] = idx
            decoder[idx] = _id

        return encoder, decoder
    
    def generate_sequence_data(self) -> dict:
        """
        sequence_data 생성

        Returns:
            dict: train user sequence / valid user sequence
        """
        users = defaultdict(list)
        user_train = {}
        user_valid = {}
        for user, item, time in zip(self.df['user_idx'], self.df['item_idx'], self.df['time']):
            users[user].append(item)
        
        for user in users:
            np.random.seed(self.config.seed)

            user_total = users[user]
            valid = np.random.choice(user_total, size = self.config.valid_samples, replace = False).tolist()
            train = list(set(user_total) - set(valid))

            user_train[user] = train
            user_valid[user] = valid # valid_samples 개수 만큼 검증에 활용 (현재 Task와 가장 유사하게)

        return user_train, user_valid
    
    def get_train_valid_data(self):
        return self.user_train, self.user_valid

    def make_matrix(self, user_list, train = True):
        """
        user_item_dict를 바탕으로 행렬 생성
        """
        mat = torch.zeros(size = (user_list.size(0), self.num_item))
        for idx, user in enumerate(user_list):
            if train:
                mat[idx, self.user_train[user.item()]] = 1
            else:
                mat[idx, self.user_train[user.item()] + self.user_valid[user.item()]] = 1
        return mat

    def make_sparse_matrix(self, test = False):
        X = sp.dok_matrix((self.num_user, self.num_item), dtype=np.float32)
        
        for user in self.user_train.keys():
            item_list = self.user_train[user]
            X[user, item_list] = 1.0
        
        if test:
            for user in self.user_valid.keys():
                item_list = self.user_valid[user]
                X[user, item_list] = 1.0

        return X.tocsr()

In [4]:
class AEDataSet(Dataset):
    def __init__(self, num_user):
        self.num_user = num_user
        self.users = [i for i in range(num_user)]

    def __len__(self):
        return self.num_user

    def __getitem__(self, idx): 
        user = self.users[idx]
        return torch.LongTensor([user])

# 3. 모델

In [5]:
import numpy as np
from scipy import sparse
from copy import deepcopy

class HOSLIM():
    def __init__(self, threshold = 3500, lambdaBB = 500, lambdaCC = 5000, rho = 100000, epochs = 40):
        self.threshold = threshold
        self.lambdaBB = lambdaBB
        self.lambdaCC = lambdaCC
        self.rho = rho
        self.epochs = epochs
    
    def create_list_feature_pairs(self, XtX):
        AA = np.triu(np.abs(XtX))
        AA[ np.diag_indices(AA.shape[0]) ]=0.0
        ii_pairs = np.where((AA > self.threshold) == True)
        return ii_pairs
    
    def create_matrix_Z(self, ii_pairs, X):
        MM = np.zeros( (len(ii_pairs[0]), X.shape[1]),    dtype=np.float64)
        MM[np.arange(MM.shape[0]) , ii_pairs[0]   ]=1.0
        MM[np.arange(MM.shape[0]) , ii_pairs[1]   ]=1.0
        CCmask = 1.0-MM
        MM = sparse.csc_matrix(MM.T)
        Z=  X * MM
        Z= (Z == 2.0 )
        Z=Z*1.0
        return Z, CCmask

    def train_higher(self, XtX, XtXdiag, ZtZ, ZtZdiag, CCmask, ZtX):
        ii_diag=np.diag_indices(XtX.shape[0])
        XtX[ii_diag] = XtXdiag + self.lambdaBB
        PP = np.linalg.inv(XtX)
        ii_diag_ZZ=np.diag_indices(ZtZ.shape[0])
        ZtZ[ii_diag_ZZ] = ZtZdiag + self.lambdaCC + self.rho
        QQ=np.linalg.inv(ZtZ)
        CC = np.zeros( (ZtZ.shape[0], XtX.shape[0]),dtype=np.float64 )
        DD = np.zeros( (ZtZ.shape[0], XtX.shape[0]),dtype=np.float64 )
        UU = np.zeros( (ZtZ.shape[0], XtX.shape[0]),dtype=np.float64 )

        for iter in range(self.epochs):
            # learn BB
            XtX[ii_diag] = XtXdiag
            BB= PP.dot(XtX-ZtX.T.dot(CC))
            gamma = np.diag(BB) / np.diag(PP)
            BB-= PP * gamma
            # learn CC
            CC= QQ.dot(ZtX-ZtX.dot(BB) + self.rho * (DD-UU))
            # learn DD
            DD=  CC  * CCmask 
            #DD= np.maximum(0.0, DD) # if you want to enforce non-negative parameters
            # learn UU (is Gamma in paper)
            UU+= CC-DD
        
        return BB, DD

    def fit(self, X):
        print(' --- init')
        XtX = (X.transpose() * X).toarray()
        XtXdiag = deepcopy(np.diag(XtX))
        ii_pairs = self.create_list_feature_pairs(XtX)
        Z, CCmask = self.create_matrix_Z(ii_pairs, X)

        ZtZ = (Z.transpose() * Z).toarray()
        ZtZdiag = deepcopy(np.diag(ZtZ))

        ZtX = (Z.transpose() * X).toarray()
        
        print(' --- iteration start.')
        BB, CC = self.train_higher(XtX, XtXdiag, ZtZ, ZtZdiag, CCmask, ZtX)
        print(' --- iteration end.')

        self.pred = torch.from_numpy(X.toarray().dot(BB) + Z.toarray().dot(CC))
    
    
class AdmmSlim():
    def __init__(self, lambda_1=1, lambda_2=500, rho=10000, positive=True, n_iter=50, eps_rel=1e-4, eps_abs=1e-3, verbose=False):
        self.lambda_1 = lambda_1
        self.lambda_2 = lambda_2
        self.rho = rho
        self.positive = positive
        self.n_iter = n_iter
        self.eps_rel = eps_rel
        self.eps_abs = eps_abs
        self.verbose = verbose
    
    def soft_thresholding(self, B, Gamma):
        if self.lambda_1 == 0:
            if self.positive:
                return np.abs(B)
            else:
                return B
        else:
            x = B + Gamma / self.rho
            threshold = self.lambda_1 / self.rho
            if self.positive:
                return np.where(threshold < x, x - threshold, 0)
            else:
                return np.where(threshold < x, x - threshold,
                                np.where(x < - threshold, x + threshold, 0))

    def is_converged(self, B, C, C_old, Gamma):
        B_norm = np.linalg.norm(B)
        C_norm = np.linalg.norm(C)
        Gamma_norm = np.linalg.norm(Gamma)

        eps_primal = self.eps_abs * B.shape[0] - self.eps_rel * np.max([B_norm, C_norm])
        eps_dual = self.eps_abs * B.shape[0] - self.eps_rel * Gamma_norm

        R_primal_norm = np.linalg.norm(B - C)
        R_dual_norm = np.linalg.norm(C  - C_old) * self.rho

        converged = R_primal_norm < eps_primal and R_dual_norm < eps_dual
        return converged

    def fit(self, X):
        XtX = X.T.dot(X)
        if sparse.issparse(XtX):
            XtX = XtX.todense().A

        if self.verbose:
            print(' --- init')
        identity_mat = np.identity(XtX.shape[0])
        diags = identity_mat * (self.lambda_2 + self.rho)
        P = np.linalg.inv(XtX + diags).astype(np.float32)
        B_aux = P.dot(XtX)

        Gamma = np.zeros_like(XtX, dtype=np.float32)
        C = np.zeros_like(XtX, dtype=np.float32)

        if self.verbose:
            print(' --- iteration start.')
        for iter in range(self.n_iter):
            if self.verbose:
                print(f' --- iteration {iter+1}/{self.n_iter}')
            C_old = C.copy()
            B_tilde = B_aux + P.dot(self.rho * C - Gamma)
            gamma = np.diag(B_tilde) / (np.diag(P) + 1e-8)
            B = B_tilde - P * gamma
            C = self.soft_thresholding(B, Gamma)
            Gamma = Gamma + self.rho * (B - C)
            if self.is_converged(B, C, C_old, Gamma):
                if self.verbose:
                    print(f' --- Converged. Stopped iteration.')
                break

        coef = C

        self.pred = torch.from_numpy(X.dot(coef))

class DenseSlim():
    def __init__(self, X, reg):
        self.X = self._convert_sp_mat_to_sp_tensor(X)
        self.reg = reg
    
    def _convert_sp_mat_to_sp_tensor(self, X):
        """
        Convert scipy sparse matrix to PyTorch sparse matrix

        Arguments:
        ----------
        X = Adjacency matrix, scipy sparse matrix
        """
        coo = X.tocoo().astype(np.float32)
        i = torch.LongTensor(np.mat([coo.row, coo.col]))
        v = torch.FloatTensor(coo.data)
        res = torch.sparse.FloatTensor(i, v, coo.shape).to(device)
        return res
    
    def fit(self):
        '''

        진짜 정말 간단한 식으로 모델을 만듬

        '''
        XtX = self.X.to_dense().t() @ self.X.to_dense()
        identity_mat = torch.eye(XtX.shape[0]).to(device)
        diags = identity_mat * self.reg
        
        P = XtX + diags
        P = XtX.inverse()
        diag_P = 1. / P.diag()
        diag_P = diag_P.diag()

        coef = identity_mat - (P @ diag_P)
        
        self.pred = self.X.to_dense() @ coef

# 4. 학습 함수

In [6]:
def get_ndcg(pred_list, true_list):
    idcg = sum((1 / np.log2(rank + 2) for rank in range(1, len(pred_list))))
    dcg = 0
    for rank, pred in enumerate(pred_list):
        if pred in true_list:
            dcg += 1 / np.log2(rank + 2)
    ndcg = dcg / idcg
    return ndcg

# hit == recall == precision
def get_hit(pred_list, true_list):
    hit_list = set(true_list) & set(pred_list)
    hit = len(hit_list) / len(true_list)
    return hit


def evaluate(model1, X, user_train, user_valid, candidate_cnt):

    mat = torch.from_numpy(X)

    NDCG = 0.0 # NDCG@10
    HIT = 0.0 # HIT@10

    recon_mat1 = model1.pred.cpu()
    recon_mat1[mat == 1] = -np.inf
    rec_list1 = recon_mat1.argsort(dim = 1)

    for user, rec1 in tqdm(enumerate(rec_list1)):
        uv = user_valid[user]

        # ranking
        up = rec1[-candidate_cnt:].cpu().numpy().tolist()[::-1]

        NDCG += get_ndcg(pred_list = up, true_list = uv)
        HIT += get_hit(pred_list = up, true_list = uv)

    NDCG /= len(user_train)
    HIT /= len(user_train)

    return NDCG, HIT

# 5. 학습

In [7]:
make_matrix_data_set = MakeMatrixDataSet(config = config)
user_train, user_valid = make_matrix_data_set.get_train_valid_data()
X = make_matrix_data_set.make_sparse_matrix()

In [8]:
model = DenseSlim(X = X, reg = 750)
model.fit()
ndcg, hit = evaluate(model1 = model, X = X.todense(), user_train = user_train, user_valid = user_valid, candidate_cnt = 10)
print(f'NDCG@10: {ndcg:.5f}| HIT@10: {hit:.5f}')

31360it [00:00, 32555.42it/s]


NDCG@10: 0.25925| HIT@10: 0.17225


In [10]:
model = AdmmSlim(lambda_2 = 1, rho = 1000)
model.fit(X = X.toarray())
ndcg, hit = evaluate(model1 = model, X = X.todense(), user_train = user_train, user_valid = user_valid, candidate_cnt = 10)
print(f'NDCG@10: {ndcg:.5f}| HIT@10: {hit:.5f}')

31360it [00:01, 24796.41it/s]


NDCG@10: 0.30600| HIT@10: 0.20024


In [8]:
model = AdmmSlim(lambda_1 = 10, lambda_2 = 1000, rho = 1000)
model.fit(X = X.toarray())
ndcg, hit = evaluate(model1 = model, X = X.todense(), user_train = user_train, user_valid = user_valid, candidate_cnt = 10)
print(f'NDCG@10: {ndcg:.5f}| HIT@10: {hit:.5f}')

31360it [00:01, 23279.86it/s]

NDCG@10: 0.30091| HIT@10: 0.19676





In [9]:
model = AdmmSlim(lambda_1 = 10, lambda_2 = 500, rho = 1000)
model.fit(X = X.toarray())
ndcg, hit = evaluate(model1 = model, X = X.todense(), user_train = user_train, user_valid = user_valid, candidate_cnt = 10)
print(f'NDCG@10: {ndcg:.5f}| HIT@10: {hit:.5f}')

31360it [00:01, 25215.18it/s]


NDCG@10: 0.30321| HIT@10: 0.19844


In [10]:
model = AdmmSlim(lambda_1 = 10, lambda_2 = 50, rho = 1000)
model.fit(X = X.toarray())
ndcg, hit = evaluate(model1 = model, X = X.todense(), user_train = user_train, user_valid = user_valid, candidate_cnt = 10)
print(f'NDCG@10: {ndcg:.5f}| HIT@10: {hit:.5f}')

31360it [00:01, 22634.20it/s]

NDCG@10: 0.30604| HIT@10: 0.20026





In [11]:
model = AdmmSlim(lambda_1 = 10, lambda_2 = 5, rho = 1000)
model.fit(X = X.toarray())
ndcg, hit = evaluate(model1 = model, X = X.todense(), user_train = user_train, user_valid = user_valid, candidate_cnt = 10)
print(f'NDCG@10: {ndcg:.5f}| HIT@10: {hit:.5f}')

31360it [00:01, 22536.45it/s]

NDCG@10: 0.30627| HIT@10: 0.20035





In [16]:
model = AdmmSlim(lambda_1 = 10, lambda_2 = 5, rho = 10000)
model.fit(X = X.toarray())
ndcg, hit = evaluate(model1 = model, X = X.todense(), user_train = user_train, user_valid = user_valid, candidate_cnt = 10)
print(f'NDCG@10: {ndcg:.5f}| HIT@10: {hit:.5f}')

31360it [00:01, 23276.79it/s]

NDCG@10: 0.30402| HIT@10: 0.19880





In [17]:
model = AdmmSlim(lambda_1 = 10, lambda_2 = 5, rho = 100)
model.fit(X = X.toarray())
ndcg, hit = evaluate(model1 = model, X = X.todense(), user_train = user_train, user_valid = user_valid, candidate_cnt = 10)
print(f'NDCG@10: {ndcg:.5f}| HIT@10: {hit:.5f}')

31360it [00:01, 24534.46it/s]

NDCG@10: 0.30095| HIT@10: 0.19741





In [8]:
model = HOSLIM(threshold = 3500, lambdaBB = 500, lambdaCC = 10000, rho = 50000)
model.fit(X = X)
ndcg, hit = evaluate(model1 = model, X = X.todense(), user_train = user_train, user_valid = user_valid, candidate_cnt = 10)
print(f'NDCG@10: {ndcg:.5f}| HIT@10: {hit:.5f}')

 --- init


In [None]:
model = HOSLIM(threshold = 3500, lambdaBB = 500, lambdaCC = 15000, rho = 10000)
model.fit(X = X)
ndcg, hit = evaluate(model1 = model, X = X.todense(), user_train = user_train, user_valid = user_valid, candidate_cnt = 10)
print(f'NDCG@10: {ndcg:.5f}| HIT@10: {hit:.5f}')

 --- init
 --- iteration start.
 --- iteration end.


31360it [00:01, 24088.03it/s]


NDCG@10: 0.31118| HIT@10: 0.20455


In [None]:
model = HOSLIM(threshold = 3500, lambdaBB = 1000, lambdaCC = 10000, rho = 50000)
model.fit(X = X)
ndcg, hit = evaluate(model1 = model, X = X.todense(), user_train = user_train, user_valid = user_valid, candidate_cnt = 10)
print(f'NDCG@10: {ndcg:.5f}| HIT@10: {hit:.5f}')

 --- init
 --- iteration start.
 --- iteration end.


31360it [00:01, 22746.58it/s]

NDCG@10: 0.31118| HIT@10: 0.20455





In [None]:
model = HOSLIM(threshold = 3500, lambdaBB = 10000, lambdaCC = 10000, rho = 50000)
model.fit(X = X)
ndcg, hit = evaluate(model1 = model, X = X.todense(), user_train = user_train, user_valid = user_valid, candidate_cnt = 10)
print(f'NDCG@10: {ndcg:.5f}| HIT@10: {hit:.5f}')

 --- init
 --- iteration start.
 --- iteration end.


31360it [00:01, 22689.49it/s]

NDCG@10: 0.31118| HIT@10: 0.20455





In [None]:
model = HOSLIM(threshold = 3500, lambdaBB = 5000, lambdaCC = 10000, rho = 25000)
model.fit(X = X)
ndcg, hit = evaluate(model1 = model, X = X.todense(), user_train = user_train, user_valid = user_valid, candidate_cnt = 10)
print(f'NDCG@10: {ndcg:.5f}| HIT@10: {hit:.5f}')

31360it [00:01, 26117.21it/s]

NDCG@10: 0.30095| HIT@10: 0.19741





In [8]:
model = HOSLIM(threshold = 13000, lambdaBB = 1000, lambdaCC = 30000, rho = 100000)
model.fit(X = X)
ndcg, hit = evaluate(model1 = model, X = X.todense(), user_train = user_train, user_valid = user_valid, candidate_cnt = 10)
print(f'NDCG@10: {ndcg:.5f}| HIT@10: {hit:.5f}')

 --- init
 --- iteration start.
 --- iteration end.


31360it [00:01, 24483.41it/s]

NDCG@10: 0.31057| HIT@10: 0.20379





In [9]:
model = HOSLIM(threshold = 22000, lambdaBB = 1000, lambdaCC = 30000, rho = 100000)
model.fit(X = X)
ndcg, hit = evaluate(model1 = model, X = X.todense(), user_train = user_train, user_valid = user_valid, candidate_cnt = 10)
print(f'NDCG@10: {ndcg:.5f}| HIT@10: {hit:.5f}')

 --- init
 --- iteration start.
 --- iteration end.


31360it [00:01, 23422.15it/s]

NDCG@10: 0.31057| HIT@10: 0.20379





In [10]:
model = HOSLIM(threshold = 33000, lambdaBB = 1000, lambdaCC = 10000, rho = 100000)
model.fit(X = X)
ndcg, hit = evaluate(model1 = model, X = X.todense(), user_train = user_train, user_valid = user_valid, candidate_cnt = 10)
print(f'NDCG@10: {ndcg:.5f}| HIT@10: {hit:.5f}')

 --- init
 --- iteration start.
 --- iteration end.


31360it [00:01, 24554.56it/s]


NDCG@10: 0.31057| HIT@10: 0.20379


In [11]:
model = HOSLIM(threshold = 44000, lambdaBB = 1000, lambdaCC = 3000, rho = 30000)
model.fit(X = X)
ndcg, hit = evaluate(model1 = model, X = X.todense(), user_train = user_train, user_valid = user_valid, candidate_cnt = 10)
print(f'NDCG@10: {ndcg:.5f}| HIT@10: {hit:.5f}')

 --- init
 --- iteration start.
 --- iteration end.


31360it [00:01, 25470.14it/s]

NDCG@10: 0.31057| HIT@10: 0.20379





In [12]:
model = HOSLIM(threshold = 1750, lambdaBB = 500, lambdaCC = 10000, rho = 100000)
model.fit(X = X)
ndcg, hit = evaluate(model1 = model, X = X.todense(), user_train = user_train, user_valid = user_valid, candidate_cnt = 10)
print(f'NDCG@10: {ndcg:.5f}| HIT@10: {hit:.5f}')

 --- init


In [11]:
model = HOSLIM(threshold = 3000, lambdaBB = 500, lambdaCC = 5000, rho = 100000)
model.fit(X = X)
ndcg, hit = evaluate(model1 = model, X = X.todense(), user_train = user_train, user_valid = user_valid, candidate_cnt = 10)
print(f'NDCG@10: {ndcg:.5f}| HIT@10: {hit:.5f}')

 --- init
 --- iteration start.
 --- iteration end.


31360it [00:01, 25064.96it/s]

NDCG@10: 0.31016| HIT@10: 0.20423





In [12]:
model = HOSLIM(threshold = 3500, lambdaBB = 500, lambdaCC = 2000, rho = 30000)
model.fit(X = X)
ndcg, hit = evaluate(model1 = model, X = X.todense(), user_train = user_train, user_valid = user_valid, candidate_cnt = 10)
print(f'NDCG@10: {ndcg:.5f}| HIT@10: {hit:.5f}')

 --- init
 --- iteration start.
 --- iteration end.


31360it [00:01, 24912.96it/s]


NDCG@10: 0.30651| HIT@10: 0.20179


In [10]:
model = HOSLIM(threshold = 3500, lambdaBB = 500, lambdaCC = 10000, rho = 100000)
model.fit(X = X)
ndcg, hit = evaluate(model1 = model, X = X.todense(), user_train = user_train, user_valid = user_valid, candidate_cnt = 10)
print(f'NDCG@10: {ndcg:.5f}| HIT@10: {hit:.5f}')

 --- init
 --- iteration start.
 --- iteration end.


31360it [00:01, 30437.35it/s]


NDCG@10: 0.31102| HIT@10: 0.20443


In [13]:
model = HOSLIM(threshold = 3500, lambdaBB = 500, lambdaCC = 20000, rho = 100000)
model.fit(X = X)
ndcg, hit = evaluate(model1 = model, X = X.todense(), user_train = user_train, user_valid = user_valid, candidate_cnt = 10)
print(f'NDCG@10: {ndcg:.5f}| HIT@10: {hit:.5f}')

 --- init
 --- iteration start.
 --- iteration end.


31360it [00:01, 23497.94it/s]


NDCG@10: 0.31091| HIT@10: 0.20431


In [8]:
model = HOSLIM(threshold = 3500, lambdaBB = 500, lambdaCC = 5000, rho = 100000)
model.fit(X = X)
ndcg, hit = evaluate(model1 = model, X = X.todense(), user_train = user_train, user_valid = user_valid, candidate_cnt = 10)
print(f'NDCG@10: {ndcg:.5f}| HIT@10: {hit:.5f}')

 --- init
 --- iteration start.
 --- iteration end.


31360it [00:01, 24928.76it/s]

NDCG@10: 0.31056| HIT@10: 0.20429





In [9]:
model = HOSLIM(threshold = 6500, lambdaBB = 500, lambdaCC = 5000, rho = 100000)
model.fit(X = X)
ndcg, hit = evaluate(model1 = model, X = X.todense(), user_train = user_train, user_valid = user_valid, candidate_cnt = 10)
print(f'NDCG@10: {ndcg:.5f}| HIT@10: {hit:.5f}')

 --- init
 --- iteration start.
 --- iteration end.


31360it [00:01, 24317.15it/s]


NDCG@10: 0.30995| HIT@10: 0.20358


In [10]:
model = HOSLIM(threshold = 10000, lambdaBB = 500, lambdaCC = 2000, rho = 30000)
model.fit(X = X)
ndcg, hit = evaluate(model1 = model, X = X.todense(), user_train = user_train, user_valid = user_valid, candidate_cnt = 10)
print(f'NDCG@10: {ndcg:.5f}| HIT@10: {hit:.5f}')

 --- init
 --- iteration start.
 --- iteration end.


31360it [00:01, 24253.44it/s]

NDCG@10: 0.31003| HIT@10: 0.20364





In [None]:
model = HOSLIM(threshold = 750, lambdaBB = 200, lambdaCC = 1200, rho = 10000)
model.fit(X = X)
ndcg, hit = evaluate(model1 = model, X = X.todense(), user_train = user_train, user_valid = user_valid, candidate_cnt = 10)
print(f'NDCG@10: {ndcg:.5f}| HIT@10: {hit:.5f}')

In [None]:
model = HOSLIM(threshold = 1850, lambdaBB = 200, lambdaCC = 1000, rho = 10000)
model.fit(X = X)
ndcg, hit = evaluate(model1 = model, X = X.todense(), user_train = user_train, user_valid = user_valid, candidate_cnt = 10)
print(f'NDCG@10: {ndcg:.5f}| HIT@10: {hit:.5f}')

 --- init


In [9]:
model = HOSLIM(threshold = 4050, lambdaBB = 200, lambdaCC = 200, rho = 10000)
model.fit(X = X)
ndcg, hit = evaluate(model1 = model, X = X.todense(), user_train = user_train, user_valid = user_valid, candidate_cnt = 10)
print(f'NDCG@10: {ndcg:.5f}| HIT@10: {hit:.5f}')

 --- init
 --- iteration start.
 --- iteration end.


31360it [00:01, 24901.50it/s]


NDCG@10: 0.28670| HIT@10: 0.19031


In [8]:
model = HOSLIM(threshold = 6820, lambdaBB = 200, lambdaCC = 1200, rho = 10000)
model.fit(X = X)
ndcg, hit = evaluate(model1 = model, X = X.todense(), user_train = user_train, user_valid = user_valid, candidate_cnt = 10)
print(f'NDCG@10: {ndcg:.5f}| HIT@10: {hit:.5f}')

 --- init
 --- iteration start.
 --- iteration end.


31360it [00:01, 25120.80it/s]


NDCG@10: 0.30461| HIT@10: 0.20077


In [8]:
for reg in [10000, 8000, 6000, 4000, 2000, 1000, 800, 600, 400, 200, 100]:
    model = AdmmSlim(lambda_2 = 1, rho = reg, verbose = False)
    model.fit(X = X.toarray())
    ndcg, hit = evaluate(model1 = model, X = X.todense(), user_train = user_train, user_valid = user_valid, candidate_cnt = 10)
    print(f'rho : {reg}| NDCG@10: {ndcg:.5f}| HIT@10: {hit:.5f}')

31360it [00:01, 26196.18it/s]


rho : 10000| NDCG@10: 0.30442| HIT@10: 0.19907


31360it [00:01, 23755.24it/s]


rho : 8000| NDCG@10: 0.30466| HIT@10: 0.19923


31360it [00:01, 25640.66it/s]


rho : 6000| NDCG@10: 0.30495| HIT@10: 0.19938


31360it [00:01, 26796.71it/s]


rho : 4000| NDCG@10: 0.30532| HIT@10: 0.19958


31360it [00:01, 22900.96it/s]


rho : 2000| NDCG@10: 0.30605| HIT@10: 0.20013


31360it [00:01, 23095.43it/s]


rho : 1000| NDCG@10: 0.30600| HIT@10: 0.20024


31360it [00:01, 24019.05it/s]


rho : 800| NDCG@10: 0.30573| HIT@10: 0.20016


31360it [00:01, 26868.35it/s]


rho : 600| NDCG@10: 0.30543| HIT@10: 0.20004


31360it [00:01, 23032.59it/s]


rho : 400| NDCG@10: 0.30504| HIT@10: 0.20002


31360it [00:01, 23314.67it/s]


rho : 200| NDCG@10: 0.30388| HIT@10: 0.19929


31360it [00:01, 25826.09it/s]

rho : 100| NDCG@10: 0.30144| HIT@10: 0.19778





In [8]:
for reg in [10000, 1000, 100, 10, 1]:
    model = HOSLIM(rho = reg)
    model.fit(X = X.toarray())
    ndcg, hit = evaluate(model1 = model, X = X.todense(), user_train = user_train, user_valid = user_valid, candidate_cnt = 10)
    print(f'rho : {reg}| NDCG@10: {ndcg:.5f}| HIT@10: {hit:.5f}')

31360it [00:01, 26722.55it/s]


rho : 10000| NDCG@10: 0.31052| HIT@10: 0.20434


31360it [00:01, 27630.96it/s]


rho : 1000| NDCG@10: 0.25501| HIT@10: 0.17144
