In [1]:
import warnings
warnings.filterwarnings('ignore')

import torch
import torch.nn as nn

import numpy as np
import pandas as pd
from scipy import sparse
from tqdm import tqdm

In [2]:
from model_util import *
from Model import *

In [3]:
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')

In [4]:
size = 100 # sample size + 1
repeats = 100

data_path = '../data/processed_data/'
test_data_path = '../data/fix_sample_%d/'%size

args = {'pinterest-20':1500, 'yelp':500, 'ml-20m':3000}

datasets = ['pinterest-20','yelp', 'ml-20m']

In [5]:
sample_size_list = [100, 200, 400, 800, 1600]

In [6]:
for dataset in datasets:
    
    #rank_save_path = '../results/fix_sample_%d/'%size + dataset +'/EASE/'
    
    print('processing %s'%dataset)
    #u_dict = np.load(base_data_path + dataset + '_dict.npy', allow_pickle = True).item()
    
    data_reader = DataReader(data_path, dataset)
    num_user, num_item = data_reader.get_user_item()
    topk = int(num_item)

    URM_train = data_reader.get_URM_train()

    model = EASE(URM_train, device)

    lamda = args[dataset]

    BB = model.fit(lamda)
    
    BB = BB.cpu().numpy()
    
    F = URM_train.dot(BB)

    for i in tqdm(range(repeats)):
        
        np.random.seed(i + 33573517)
        
        u_dict = np.load(test_data_path + dataset + '/%d.npy'%i, allow_pickle = True).item()
        
        new_dict = {}
        
        global_rank = []# global rank list

        sample_rank = []# sample rank list
        
        sample_size = [] # sample size


        for user, items in u_dict.items():
            
            new_dict[user] = items.copy()
            
            predictions = F[user]
            predictions = torch.tensor(predictions).view(-1).float().to(device)

            # for global
            global_scores = predictions
            _, global_indicies = torch.topk(global_scores, topk)

            g_rank_list = global_indicies.cpu().numpy().tolist()
            g_rank = g_rank_list.index(items[0])

            global_rank.append(g_rank)

            # for sample
            sample_scores = predictions[items]
            _, sample_indicies = torch.topk(sample_scores, size)

            s_rank_list = np.take(items, sample_indicies.cpu().numpy())
            s_rank = s_rank_list.tolist().index(items[0])


            # adaptive
            current_size = size
            current_rank = s_rank

            if current_rank == 0:

                for s_size in sample_size_list:

                    while(True):

                        new_ranks = np.random.choice(num_item, size = s_size + 100, replace = True)
                        new_ranks = new_ranks[new_ranks != g_rank]
                        if len(new_ranks) >= s_size:
                            break

                    # keep s_size ranks without the target item rank
                    new_ranks = new_ranks[:s_size]

                    new_items = global_indicies[new_ranks].cpu().numpy()

                    new_dict[user].extend(new_items.tolist())

                    h_rank = ((new_ranks < g_rank)*1.0).sum()

                    current_rank += h_rank
                    current_size += s_size

                    if current_rank != 0:
                        break

            sample_rank.append(current_rank)
            sample_size.append(current_size)

        s = np.array(sample_rank).astype('int')
        g = np.array(global_rank).astype('int')
        sz = np.array(sample_size).astype('int')
 
        save_Adaptive(dataset, 'EASE', s, g, sz, new_dict, i)

processing pinterest-20


100%|██████████| 100/100 [1:02:05<00:00, 37.26s/it]


processing yelp


100%|██████████| 100/100 [54:03<00:00, 32.44s/it]


processing ml-20m


100%|██████████| 100/100 [4:49:12<00:00, 173.52s/it] 
