In [1]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import scipy.stats as stats
import pandas as pd
import utils
import os
import matplotlib.pyplot as plt
import time

from tqdm import tqdm
from utils import *

In [2]:
import torch

In [3]:
device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')

In [4]:
n = 500 # sample size

repeats = 100

In [5]:
def new_PR_gpu(A, D, H):
    
    gamma = 1/H
    
    AT = A.T
    
    G1 = A.sum(0)
    
    W = AT@D@A - gamma * AT@A + gamma * torch.diag(G1).to(device)
    
    W = torch.inverse(W)
    
    W = W@AT@D
   
    
    return W

In [6]:
nums = {'pinterest-20':9916, 'yelp':25815, 'ml-20m':20720}

datasets = ['ml-20m', 'pinterest-20', 'yelp']

models = ['EASE','MultiVAE','NeuMF','itemKNN','ALS']


metrics = ['Recall', 'NDCG', 'AP']

In [7]:
def get_estimate(model, dataset, estimator, n):
    
    temp_list = list()
    path = '../save_PR/' +'fix_sample_%d/'%n +dataset + '/'+estimator+'/' + model + '/'
    for re in range(repeats):

        R = np.load(path + '%d.npz'%re)['R']
        temp_list.append(R)

    temp_array = np.array(temp_list)
    
    return temp_array

In [8]:
for dataset in datasets:
    
    
    N = nums[dataset]
    
    AA = utils.A_Nn(N, n)
    AA = torch.tensor(AA).float().to(device)
    
    print('processing %s'%dataset)
    
    for model in models:
        
        print('processing %s'%model)
        
        save_path = '../save_PR/' + 'fix_sample_%d/'%n + dataset + '/MN_MES/' + model + '/'
   

        if not os.path.exists(save_path):
            os.makedirs(save_path)

        ru, Ru = fix_load_model(model, dataset, n, 0)
        H = len(ru)
        
        
        PR_MES = get_estimate(model, dataset, 'MES', n).mean(0)
        em_PR = torch.tensor(PR_MES).float()
        
        D = torch.diag(em_PR).to(device)

        W = new_PR_gpu(AA, D, H)
        W = W.cpu().numpy()
        
        for re in tqdm(range(repeats)):
        

            #ru, Ru = repeat_load_model(model, re, n)
            ru, Ru = fix_load_model(model, dataset, n, re)
            WW = W[:][ru]
            PR = WW.mean(0)

            np.savez(save_path+ '%d.npz'%re, R = PR)

processing ml-20m
processing EASE


100%|██████████| 100/100 [06:49<00:00,  4.09s/it]


processing MultiVAE


100%|██████████| 100/100 [06:21<00:00,  3.81s/it]


processing NeuMF


100%|██████████| 100/100 [06:17<00:00,  3.78s/it]


processing itemKNN


100%|██████████| 100/100 [06:31<00:00,  3.91s/it]


processing ALS


100%|██████████| 100/100 [06:18<00:00,  3.78s/it]
  0%|          | 0/100 [00:00<?, ?it/s]

processing pinterest-20
processing EASE


100%|██████████| 100/100 [01:11<00:00,  1.40it/s]
  0%|          | 0/100 [00:00<?, ?it/s]

processing MultiVAE


100%|██████████| 100/100 [01:11<00:00,  1.39it/s]
  0%|          | 0/100 [00:00<?, ?it/s]

processing NeuMF


100%|██████████| 100/100 [01:12<00:00,  1.38it/s]
  0%|          | 0/100 [00:00<?, ?it/s]

processing itemKNN


100%|██████████| 100/100 [01:11<00:00,  1.40it/s]
  0%|          | 0/100 [00:00<?, ?it/s]

processing ALS


100%|██████████| 100/100 [01:12<00:00,  1.38it/s]


processing yelp
processing EASE


100%|██████████| 100/100 [01:31<00:00,  1.09it/s]


processing MultiVAE


100%|██████████| 100/100 [01:31<00:00,  1.10it/s]


processing NeuMF


100%|██████████| 100/100 [01:29<00:00,  1.11it/s]


processing itemKNN


100%|██████████| 100/100 [01:28<00:00,  1.13it/s]


processing ALS


100%|██████████| 100/100 [01:28<00:00,  1.13it/s]
