In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import colors
from matplotlib.ticker import PercentFormatter
from tqdm import tqdm

import timeit
import time
import math
from sklearn.model_selection import train_test_split
# ./indexer
from indexer import AppendIndexer
import ALS

# Annoy
from annoy import AnnoyIndex

#SKLearn 
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import average_precision_score
from sklearn.metrics import pairwise_distances

# scipy
from scipy.spatial import distance
from scipy.sparse import lil_matrix
import scipy
import scipy.sparse
import itertools
import random

In [2]:
class ViewMatrix:
    def __init__(self, path):
        self.path = path
        self.original = True
        self.item_indexer = AppendIndexer.load('./entekhab.ir/view_matrix/item_indexer.indexer')
        self.user_indexer = AppendIndexer.load('./entekhab.ir/view_matrix/user_indexer.indexer')
        
    def load_matrix(path):
        
        matrix = ViewMatrix(path)

        try:
            matrix.view_matrix = ViewMatrix \
                .load_sparse_lil(path)
        except:
            print('Error: loading', path)

        return matrix

    def load_sparse_lil(filename):
        loader = np.load(filename, allow_pickle=True)
        result = lil_matrix(tuple(loader["shape"]), dtype=str(loader["dtype"]))
        result.data = loader["data"]
        result.rows = loader["rows"]
        return result
    
    def make_dense(self, user_min_view, item_min_view):
        self.original = False
        while True:
            removed_rows_cnt = self.trim_users_with_few_views(user_min_view)
            removed_columns_cnt = self.trim_columns_with_few_views(item_min_view)
            if not removed_columns_cnt and not removed_rows_cnt:
                break

    def trim_users_with_few_views(self, user_min_view):
        removing_row_indices = list(np.where(self.view_matrix.getnnz(1) < user_min_view)[0])
        print('Number of users which should be deleted:', len(removing_row_indices))
        self.trim_user_indices(to_remove_indices=removing_row_indices)
        return len(removing_row_indices)

    def trim_columns_with_few_views(self, column_min_view):
        removing_column_indices = list(np.where(self.view_matrix.getnnz(0) < column_min_view)[0])
        print('Number products which should be deleted:', len(removing_column_indices))
        self.trim_column_indices(to_remove_indices=removing_column_indices)
        return len(removing_column_indices)
    
    def trim_user_indices(self, to_remove_indices):
        self.user_indexer.remove_indexes(to_remove_indices)
        self.view_matrix = ViewMatrix.delete_row_lil(self.view_matrix, to_remove_indices)
    
    def trim_column_indices(self, to_remove_indices):
        self.item_indexer.remove_indexes(to_remove_indices)
        self.view_matrix = ViewMatrix.delete_column_lil(self.view_matrix, to_remove_indices)
    
    def delete_column_lil(mat: lil_matrix, *i) -> lil_matrix:
        mat = mat.transpose()
        mat = ViewMatrix.delete_row_lil(mat, *i)
        return mat.transpose()
    
    def delete_row_lil(mat: lil_matrix, *i) -> lil_matrix:
        if not isinstance(mat, lil_matrix):
            raise ValueError("works only for LIL format -- use .tolil() first")
        mat = mat.copy()
        mat.rows = np.delete(mat.rows, i)
        mat.data = np.delete(mat.data, i)
        mat._shape = (mat.rows.shape[0], mat._shape[1])
        return mat
    def to_csr(self):
        train_data = self.view_matrix.astype(np.float64)
        train_data = train_data.tocoo()
        train_data.data = np.log10(train_data.data) + 1
        train_data = train_data.tocsr()
        return train_data

In [3]:
def ALSReady(path: str, l = 2):
    now = time.time()
    matrix = ViewMatrix.load_matrix(path)
    print('View matrix loaded in', time.time() - now, 'seconds.')

    now = time.time()
    sparce_matrix = matrix.to_csr()
    matrix.make_dense(user_min_view = l, 
                      item_min_view = l)
    implicit_matrix = matrix.to_csr()
    print('matrix has been made dense in', time.time() - now, 'seconds.')
    return matrix, sparce_matrix, implicit_matrix

def CFTrain(matrix, implicit_matrix, _alpha = 15, _facs = 20, _itr = 15, save = False):
    
    now = time.time()
    als_model = ALS.Als(num_factors = _facs,
                        iterations = _itr,
                        num_threads = 10,
                        alpha = _alpha)

    
    als_model.fit(implicit_matrix)
    alsTime = time.time() - now
    print('ALS model is fitted in', alsTime, 'seconds.')
    if save:
        print('Saving Data ...')
        matrix.item_indexer.dump('./entekhab.ir/alisResult/ALS/ali_item_indexer_factorized.indexer')
        matrix.user_indexer.dump('./entekhab.ir/alisResult/ALS/ali_user_indexer_factorized.indexer')
        np.save('./entekhab.ir/alisResult/ALS/ali_items_vectors.npy', als_model.item_vectors)
        np.save('./entekhab.ir/alisResult/ALS/ali_users_vectors.npy', als_model.user_vectors)

    return als_model.item_vectors, als_model.user_vectors, alsTime

In [4]:
def get_annoy_results(pages_vector, number_of_trees = 50, number_of_neighbours = 11):
    
    start = time.time()
    annoy_model = annoy_model = AnnoyIndex(pages_vector.shape[1], 'angular')
    
    for index, vector in enumerate(pages_vector):
        annoy_model.add_item(index, vector)
    
    annoy_model.build(number_of_trees)
    
    annoy_indices = []
    annoy_distances = []

    for i in pages_vector:
        indices, distances = annoy_model. \
            get_nns_by_vector(i, number_of_neighbours, include_distances=True)
        annoy_indices.append(indices)
        annoy_distances.append(distances)
    
    duration = time.time() - start
    print('annoy time:', duration)
    return annoy_indices, annoy_distances, duration

implicit matrix ro migire va ye bakhshisho baramun test o train mikone ke ye bakhshi az cell ha sefr shodan:

In [5]:
def generate_test_set_precision_recall(implicit_matrix, _test_size = 0.05, test_cells = 0.2):
    train, test = train_test_split(implicit_matrix, shuffle=False, test_size = _test_size)
    print('test_shape', test.shape, 'train_shape (which we cant process bc of RAM)', train.shape)
    rows,cols = test.nonzero()
    delete_index = [(rows[i], cols[i]) for i in random.sample(range(1, len(rows)), int(len(rows)*test_cells))]
    print("total cells", len(rows), "number of deleted cells", len(delete_index))
    
    x_train = scipy.sparse.csr_matrix(test.shape)
    for i, j in tqdm(zip(rows, cols)):
        if (i, j) in delete_index:
            continue
        x_train[i, j] = test[i, j]
    return x_train, test, delete_index

def prepare_data_for_AP(implicit_matrix, _test_size = 0.05):
    train, test = train_test_split(implicit_matrix, shuffle=False, test_size = _test_size)
    print('test_shape', test.shape, 'train_shape (which we cant process bc of RAM)', train.shape)
    rows, cols = test.nonzero()
    unique, counts = np.unique(rows, return_counts=True)
    commulative_count = [0]
    for i in counts:
        commulative_count.append(commulative_count[-1] + i)
    delete_row = []
    for i in range(1, len(commulative_count)):
        delete_row.append(np.random.randint(commulative_count[i-1], commulative_count[i]))
    delete_index = [(rows[i], cols[i]) for i in delete_row]
    # print(delete_row[:10], delete_index[:10], rows[:10], cols[:10], random.randrange(commulative_count[0], 10, 2) )
    print("total cells", len(rows), "number of deleted cells", len(delete_index))
    
    x_train = scipy.sparse.csr_matrix(test.shape)
    for i, j in zip(rows, cols):
        if (i, j) in delete_index:
            continue
        x_train[i, j] = test[i, j]
    return x_train, test, delete_index

def prepare_data_for_MAE(implicit_matrix, set_size = 0.05):
    train, test = train_test_split(implicit_matrix, shuffle=False, test_size = set_size)
    return test

In [6]:
def hit_rate_at_k(deleted, x_train, test_approx, k = 10):
    found = []
    for i in tqdm(range(len(test_approx))):
        ta = list(n_argmax(test_approx[i], 30))
        nonz = np.nonzero(x_train[i])[0]
        same_old = []
        for l in range(len(ta)):
            if ta[l] in nonz:
                same_old.append(l)

        for l in same_old[::-1]:
            ta.pop(l)
        ta = ta[:k]        
        for j in ta:
            found.append((i, j))
    same = 0
    for i in tqdm(deleted):
        if i in found:
            same += 1
    print(same, len(deleted), same/len(deleted))
    return same/len(deleted)

def visited_at_k(x_train, test_approx, k = 10):
    same_old = []
    for i in tqdm(range(len(test_approx))):
        ta = list(n_argmax(test_approx[i], k))
        nonz = np.nonzero(x_train[i])[0]
        for l in range(len(ta)):
            if ta[l] in nonz:
                same_old.append(l)
    rows,cols = x_train.nonzero()
    return len(same_old)/len(rows)

def n_argmax(a, n):
    ranked = np.argsort(a)
    largest_indices = ranked[::-1][:n]
    return largest_indices

In [1]:
#random search
itrs1 = [15, 15, 15, 15]
alphas1 = [5, 5, 5, 5]
factors1 = [5, 10, 20, 40]

itrs2 = [15, 15, 15, 15]
alphas2 = [10, 10, 10, 10]
factors2 = [5, 10, 20, 40]

itrs3 = [15, 15, 15, 15]
alphas3 = [50, 50, 50, 50]
factors3 = [5, 10, 20, 40]

itrs4 = [15, 15, 15, 15]
alphas4 = [100, 100, 100, 100]
factors4 = [5, 10, 20, 40]

### metrics

accuracy metrics:
- recall@k, hit_rate@k
- visited_rate@k
- MAE on train set per iteration

time metrics:
- training time
- query time

--------------------------------------------------------------------------
hit_rate@k, visited@k

In [8]:
# matrix, sparce_matrix, implicit_matrix = ALSReady('./chetor.com/view_matrix/lil_matrix.npz', l=2)
# print(implicit_matrix.shape)

In [9]:
# #to test
# x_train, test, deleted = generate_test_set_precision_recall(implicit_matrix, _test_size = 0.07)
# print('train set shape:', x_train.shape, '\ntest set shape:', test.shape, '\ndeleted cells:', len(deleted), type(test), type(x_train))
# #training ALS model
# item_vectors, user_vectors, alsTime = \
# CFTrain(None, x_train, _alpha = 10, _facs = 20, _itr = 20)
# print(item_vectors.shape, user_vectors.shape)
# test_approx = np.matmul(user_vectors, item_vectors.T)
# print(test_approx.shape, test.shape)

In [10]:
# hit_rate_at_k(deleted, x_train.toarray(), test_approx, k = 6)
# hit_rate_at_k(deleted, x_train.toarray(), test_approx, k = 3)

In [11]:
# visited_at_k(x_train.toarray(), test_approx, k = 6)
# visited_at_k(x_train.toarray(), test_approx, k = 3)

In [12]:
def calc_visited_hitRate(alphas, itrs, factors, _l = 3, data_set_size = 0.072, log = False):
    visited3s = []
    hitRate3s = []
    visited6s = []
    hitRate6s = []
    AP3s = []
    AP6s = []
    trainingTime = []
    qTime = []
    matrix, sparce_matrix, implicit_matrix = ALSReady('./entekhab.ir/view_matrix/lil_matrix.npz', l=_l)
    x_train_AP, test_AP, delete_index_AP = prepare_data_for_AP(implicit_matrix, _test_size = data_set_size)
    x_train, test, deleted = generate_test_set_precision_recall(implicit_matrix, _test_size = data_set_size)
    for a,i,f in zip(alphas, itrs, factors):
        if log: print('------alphas, itrs, factors------', a,i,f)
        item_vectors, user_vectors, alsTime = \
        CFTrain(None, x_train, _alpha = a, _facs = f, _itr = i)
        a1, a2, duration = get_annoy_results(item_vectors)
        qTime.append(duration)
        trainingTime.append(alsTime)
        if log: print('training and querry time:', trainingTime[-1], qTime[-1])
        test_approx = np.matmul(user_vectors, item_vectors.T)
        visited3s.append(visited_at_k(x_train.toarray(), test_approx, k = 3))
        if log: print('visited3s', visited3s[-1])
        visited6s.append(visited_at_k(x_train.toarray(), test_approx, k = 6))
        if log: print('visited6s', visited6s[-1])
        hitRate3s.append(hit_rate_at_k(deleted, x_train.toarray(), test_approx, k = 3))
        if log: print('hitRate3s', hitRate3s[-1])
        hitRate6s.append(hit_rate_at_k(deleted, x_train.toarray(), test_approx, k = 6))
        if log: print('hitRate6s', hitRate6s[-1])
        item_vectors_AP, user_vectors_AP, alsTime_AP = \
        CFTrain(None, x_train_AP, _alpha = a, _facs = f, _itr = i)
        test_approx_AP = np.matmul(user_vectors_AP, item_vectors_AP.T)
        
        AP3s.append(hit_rate_at_k(delete_index_AP, x_train_AP, test_approx_AP, k = 3))
        if log: print('AP3s', AP3s[-1])
        AP6s.append(hit_rate_at_k(delete_index_AP, x_train_AP, test_approx_AP, k = 6))
        if log: print('AP6s', AP6s[-1])
    return visited3s, visited6s, hitRate3s, hitRate6s, AP3s, AP6s, trainingTime, qTime

-------------------------------------------------------------------------------------
Average precision

In [13]:
# matrix, sparce_matrix, implicit_matrix_AP = ALSReady('./chetor.com/view_matrix/lil_matrix.npz', l=3)

In [14]:
# #testing
# x_train_AP, test_AP, delete_index_AP = prepare_data_for_AP(implicit_matrix_AP, _test_size = 0.2)
# #training ALS model
# item_vectors_AP, user_vectors_AP, alsTime_AP = \
# CFTrain(None, x_train_AP, _alpha = 10, _facs = 20, _itr = 20)
# print(item_vectors_AP.shape, user_vectors_AP.shape)
# test_approx_AP = np.matmul(user_vectors_AP, item_vectors_AP.T)
# print(test_approx_AP.shape, test_AP.shape)

In [15]:
# print("average precision k = 3", hit_rate_at_k(delete_index_AP, x_train_AP, test_approx_AP, k = 3))
# print("average precision k = 6", hit_rate_at_k(delete_index_AP, x_train_AP, test_approx_AP, k = 6))

In [16]:
def calc_AP(alphas, itrs, factors, _l = 3, data_set_size = 0.2):
    AP3s = []
    AP6s = []
    time
    matrix, sparce_matrix, implicit_matrix_AP = ALSReady('./chetor.com/view_matrix/lil_matrix.npz', l=_l)
    x_train_AP, test_AP, delete_index_AP = prepare_data_for_AP(implicit_matrix_AP, _test_size = data_set_size)
    for a,i,f in zip(alphas, itrs, factors):
        item_vectors_AP, user_vectors_AP, alsTime_AP = \
        CFTrain(None, x_train_AP, _alpha = a, _facs = f, _itr = i)
        test_approx_AP = np.matmul(user_vectors_AP, item_vectors_AP.T)
        AP3s.append(hit_rate_at_k(delete_index_AP, x_train_AP, test_approx_AP, k = 3))
        AP6s.append(hit_rate_at_k(delete_index_AP, x_train_AP, test_approx_AP, k = 6))
    return AP3s, AP6s

-------------------------------------------------------------------------------------
MAE per iteration

In [17]:
# matrix, sparce_matrix, implicit_matrix = ALSReady('./chetor.com/view_matrix/lil_matrix.npz', l=2)
# train = prepare_data_for_MAE(implicit_matrix, set_size = 0.05)

In [18]:
def MAE_calc(train, itr = 10, facs = 20):
    item_vectors, user_vectors, alsTime = \
    CFTrain(None, train, _alpha = 10, _facs = 20, _itr = itr)
    approximated_matrix = np.matmul(user_vectors, item_vectors.T)
    absolute_error = np.absolute(train - approximated_matrix)
    return np.mean(absolute_error)

In [19]:
# MAEs = []
# for i in [2,5,7,10,15,20,30,40,80,100]:
#     MAEs.append(MAE_calc(train, itr = i))
#     print(MAEs[-1])

In [20]:
# plt.plot(range(19), MAEs[1:])

## Test,  data_set_size = 1 (on all data)

In [None]:
visited3s1, visited6s1, hitRate3s1, hitRate6s1, AP3s1, AP6s1, trainingTime1, qTime1 = \
calc_visited_hitRate(alphas1, itrs1, factors1, _l = 2, data_set_size = 0.03, log = True)

View matrix loaded in 9.8618745803833 seconds.
Number of users which should be deleted: 1784866
Number products which should be deleted: 26556
Number of users which should be deleted: 2255
Number products which should be deleted: 155
Number of users which should be deleted: 77
Number products which should be deleted: 4
Number of users which should be deleted: 2
Number products which should be deleted: 0
Number of users which should be deleted: 0
Number products which should be deleted: 0
matrix has been made dense in 31.21712899208069 seconds.
test_shape (9299, 6051) train_shape (which we cant process bc of RAM) (300651, 6051)
total cells 26831 number of deleted cells 9299


  self._set_intXint(row, col, x.flat[0])
223it [00:00, 2226.19it/s]

test_shape (9299, 6051) train_shape (which we cant process bc of RAM) (300651, 6051)
total cells 26831 number of deleted cells 5366


26831it [00:17, 1575.99it/s]


------alphas, itrs, factors------ 5 15 5
ALS model is fitted in 0.28662824630737305 seconds.
annoy time: 0.6946148872375488
training and querry time: 0.28662824630737305 0.6946148872375488


100%|██████████| 9299/9299 [00:02<00:00, 3915.15it/s]
  4%|▎         | 346/9299 [00:00<00:02, 3451.95it/s]

visited3s 0.4810156068017703


100%|██████████| 9299/9299 [00:02<00:00, 3482.42it/s]
  2%|▏         | 190/9299 [00:00<00:04, 1894.33it/s]

visited6s 0.5683205217796413


100%|██████████| 9299/9299 [00:03<00:00, 2567.28it/s]
100%|██████████| 5366/5366 [03:24<00:00, 26.23it/s]
  2%|▏         | 214/9299 [00:00<00:04, 2134.46it/s]

1361 5366 0.2536339918002236
hitRate3s 0.2536339918002236


100%|██████████| 9299/9299 [00:04<00:00, 2220.62it/s]
100%|██████████| 5366/5366 [07:08<00:00, 12.52it/s]


1764 5366 0.3287364890048453
hitRate6s 0.3287364890048453
ALS model is fitted in 0.28129100799560547 seconds.


100%|██████████| 9299/9299 [00:05<00:00, 1746.87it/s]
100%|██████████| 9299/9299 [06:34<00:00, 23.54it/s]
  2%|▏         | 195/9299 [00:00<00:04, 1947.03it/s]

2014 9299 0.21658242821808796
AP3s 0.21658242821808796


100%|██████████| 9299/9299 [00:04<00:00, 1891.82it/s]
100%|██████████| 9299/9299 [12:08<00:00, 12.76it/s]


2568 9299 0.2761587267448113
AP6s 0.2761587267448113
------alphas, itrs, factors------ 5 15 10
ALS model is fitted in 0.40508008003234863 seconds.
annoy time: 0.6868317127227783
training and querry time: 0.40508008003234863 0.6868317127227783


100%|██████████| 9299/9299 [00:02<00:00, 3800.49it/s]
  4%|▍         | 412/9299 [00:00<00:02, 4113.38it/s]

visited3s 0.5864430468204053


100%|██████████| 9299/9299 [00:02<00:00, 3672.54it/s]
  3%|▎         | 291/9299 [00:00<00:03, 2902.39it/s]

visited6s 0.6733286745865362


100%|██████████| 9299/9299 [00:03<00:00, 2518.14it/s]
100%|██████████| 5366/5366 [03:38<00:00, 24.59it/s]
  2%|▏         | 219/9299 [00:00<00:04, 2187.91it/s]

1110 5366 0.20685799478196049
hitRate3s 0.20685799478196049


100%|██████████| 9299/9299 [00:03<00:00, 2684.30it/s]
100%|██████████| 5366/5366 [06:53<00:00, 12.99it/s]


1543 5366 0.28755124860231085
hitRate6s 0.28755124860231085
ALS model is fitted in 0.4465057849884033 seconds.


100%|██████████| 9299/9299 [00:05<00:00, 1783.19it/s]
100%|██████████| 9299/9299 [06:47<00:00, 22.84it/s]
  2%|▏         | 166/9299 [00:00<00:05, 1651.97it/s]

1052 9299 0.11313044413377782
AP3s 0.11313044413377782


100%|██████████| 9299/9299 [00:05<00:00, 1842.11it/s]
100%|██████████| 9299/9299 [12:49<00:00, 12.08it/s]


1744 9299 0.18754704806968492
AP6s 0.18754704806968492
------alphas, itrs, factors------ 5 15 20
ALS model is fitted in 0.7360048294067383 seconds.
annoy time: 0.6211917400360107
training and querry time: 0.7360048294067383 0.6211917400360107


100%|██████████| 9299/9299 [00:02<00:00, 3882.62it/s]
  4%|▍         | 418/9299 [00:00<00:02, 4173.58it/s]

visited3s 0.6938737479617982


100%|██████████| 9299/9299 [00:02<00:00, 3895.26it/s]
  3%|▎         | 308/9299 [00:00<00:02, 3075.13it/s]

visited6s 0.7873747961798276


100%|██████████| 9299/9299 [00:03<00:00, 2565.01it/s]
100%|██████████| 5366/5366 [03:50<00:00, 23.31it/s]
  2%|▏         | 220/9299 [00:00<00:04, 2198.42it/s]

822 5366 0.15318673127096533
hitRate3s 0.15318673127096533


100%|██████████| 9299/9299 [00:03<00:00, 2396.19it/s]
100%|██████████| 5366/5366 [07:19<00:00, 12.20it/s]


1136 5366 0.21170331718225865
hitRate6s 0.21170331718225865
ALS model is fitted in 0.9109225273132324 seconds.


100%|██████████| 9299/9299 [00:05<00:00, 1736.25it/s]
100%|██████████| 9299/9299 [07:02<00:00, 22.01it/s]
  2%|▏         | 228/9299 [00:00<00:03, 2272.74it/s]

549 9299 0.05903860630175288
AP3s 0.05903860630175288


100%|██████████| 9299/9299 [00:04<00:00, 1950.33it/s]
100%|██████████| 9299/9299 [13:12<00:00, 11.74it/s]


1096 9299 0.11786213571351759
AP6s 0.11786213571351759
------alphas, itrs, factors------ 5 15 40
ALS model is fitted in 5.090430021286011 seconds.
annoy time: 0.6099114418029785
training and querry time: 5.090430021286011 0.6099114418029785


100%|██████████| 9299/9299 [00:02<00:00, 3582.80it/s]
  5%|▍         | 456/9299 [00:00<00:01, 4553.00it/s]

visited3s 0.7761938038667598


100%|██████████| 9299/9299 [00:02<00:00, 3383.00it/s]
  3%|▎         | 307/9299 [00:00<00:02, 3065.27it/s]

visited6s 0.8684835779175402


100%|██████████| 9299/9299 [00:03<00:00, 2453.53it/s]
100%|██████████| 5366/5366 [03:53<00:00, 23.01it/s]
  3%|▎         | 318/9299 [00:00<00:02, 3173.04it/s]

477 5366 0.08889303019008572
hitRate3s 0.08889303019008572


100%|██████████| 9299/9299 [00:03<00:00, 2810.47it/s]
100%|██████████| 5366/5366 [07:44<00:00, 11.55it/s]


682 5366 0.12709653373089824
hitRate6s 0.12709653373089824
ALS model is fitted in 5.540105581283569 seconds.


100%|██████████| 9299/9299 [00:04<00:00, 1898.24it/s]
100%|██████████| 9299/9299 [07:07<00:00, 21.73it/s]
  2%|▏         | 141/9299 [00:00<00:06, 1401.74it/s]

404 9299 0.04344553177761049
AP3s 0.04344553177761049


100%|██████████| 9299/9299 [00:05<00:00, 1774.29it/s]
100%|██████████| 9299/9299 [13:39<00:00, 11.35it/s]


736 9299 0.07914829551564684
AP6s 0.07914829551564684
------alphas, itrs, factors------ 10 15 5
ALS model is fitted in 0.2748854160308838 seconds.
annoy time: 0.7899103164672852
training and querry time: 0.2748854160308838 0.7899103164672852


100%|██████████| 9299/9299 [00:02<00:00, 3703.71it/s]
  3%|▎         | 290/9299 [00:00<00:03, 2894.28it/s]

visited3s 0.49168413696715585


100%|██████████| 9299/9299 [00:02<00:00, 3596.73it/s]
  3%|▎         | 314/9299 [00:00<00:02, 3138.03it/s]

visited6s 0.5924528301886792


100%|██████████| 9299/9299 [00:03<00:00, 2494.06it/s]
100%|██████████| 5366/5366 [03:28<00:00, 25.70it/s]
  3%|▎         | 311/9299 [00:00<00:02, 3106.65it/s]

1638 5366 0.30525531121878496
hitRate3s 0.30525531121878496


100%|██████████| 9299/9299 [00:03<00:00, 2480.09it/s]
100%|██████████| 5366/5366 [06:23<00:00, 13.98it/s]


2106 5366 0.39247111442415206
hitRate6s 0.39247111442415206
ALS model is fitted in 0.2815239429473877 seconds.


100%|██████████| 9299/9299 [00:05<00:00, 1613.98it/s]
100%|██████████| 9299/9299 [06:31<00:00, 23.77it/s]
  3%|▎         | 236/9299 [00:00<00:03, 2351.57it/s]

1846 9299 0.19851596945908162
AP3s 0.19851596945908162


100%|██████████| 9299/9299 [00:05<00:00, 1696.33it/s]
100%|██████████| 9299/9299 [11:56<00:00, 12.98it/s]  


2389 9299 0.25690934509087
AP6s 0.25690934509087
------alphas, itrs, factors------ 10 15 10
ALS model is fitted in 0.3713047504425049 seconds.
annoy time: 0.7086730003356934
training and querry time: 0.3713047504425049 0.7086730003356934


100%|██████████| 9299/9299 [00:02<00:00, 3690.38it/s]
  3%|▎         | 298/9299 [00:00<00:03, 2979.42it/s]

visited3s 0.59958071278826


100%|██████████| 9299/9299 [00:02<00:00, 4048.77it/s]
  3%|▎         | 308/9299 [00:00<00:02, 3074.10it/s]

visited6s 0.7157232704402515


100%|██████████| 9299/9299 [00:03<00:00, 2788.27it/s]
100%|██████████| 5366/5366 [03:26<00:00, 26.02it/s]
  2%|▏         | 207/9299 [00:00<00:04, 2063.27it/s]

1343 5366 0.25027953783078644
hitRate3s 0.25027953783078644


100%|██████████| 9299/9299 [00:03<00:00, 2682.12it/s]
100%|██████████| 5366/5366 [06:16<00:00, 14.26it/s]


1805 5366 0.33637718971300784
hitRate6s 0.33637718971300784


  0%|          | 0/9299 [00:00<?, ?it/s]

ALS model is fitted in 0.3566892147064209 seconds.


100%|██████████| 9299/9299 [00:05<00:00, 1764.57it/s]
100%|██████████| 9299/9299 [15:39:57<00:00,  6.06s/it]        
  1%|          | 72/9299 [00:00<00:13, 706.04it/s]

1177 9299 0.1265727497580385
AP3s 0.1265727497580385


100%|██████████| 9299/9299 [00:08<00:00, 1077.19it/s]
100%|██████████| 9299/9299 [17:54<00:00,  8.66it/s]


1756 9299 0.18883750940961394
AP6s 0.18883750940961394
------alphas, itrs, factors------ 10 15 20
ALS model is fitted in 1.3228659629821777 seconds.
annoy time: 0.7575104236602783
training and querry time: 1.3228659629821777 0.7575104236602783


100%|██████████| 9299/9299 [00:03<00:00, 2472.02it/s]
  3%|▎         | 258/9299 [00:00<00:03, 2568.68it/s]

visited3s 0.7258793384579548


100%|██████████| 9299/9299 [00:03<00:00, 2524.20it/s]
  2%|▏         | 151/9299 [00:00<00:06, 1507.50it/s]

visited6s 0.8275797810389005


100%|██████████| 9299/9299 [00:05<00:00, 1712.34it/s]
100%|██████████| 5366/5366 [05:16<00:00, 16.94it/s]
  2%|▏         | 185/9299 [00:00<00:04, 1833.84it/s]

1068 5366 0.19903093551994036
hitRate3s 0.19903093551994036


100%|██████████| 9299/9299 [00:04<00:00, 1863.71it/s]
100%|██████████| 5366/5366 [10:24<00:00,  8.60it/s]


1481 5366 0.27599701826313827
hitRate6s 0.27599701826313827
ALS model is fitted in 1.0070199966430664 seconds.


100%|██████████| 9299/9299 [00:08<00:00, 1155.82it/s]
 72%|███████▏  | 6712/9299 [07:54<03:23, 12.73it/s]

In [None]:
visited3s2, visited6s2, hitRate3s2, hitRate6s2, AP3s2, AP6s2, trainingTime2, qTime2 = \
calc_visited_hitRate(alphas2, itrs2, factors2, _l = 2, data_set_size = 0.03, log = True)

In [None]:
visited3s3, visited6s3, hitRate3s3, hitRate6s3, AP3s3, AP6s3, trainingTime3, qTime3 = \
calc_visited_hitRate(alphas3, itrs3, factors3, _l = 2, data_set_size = 0.03, log = True)

In [None]:
visited3s4, visited6s4, hitRate3s4, hitRate6s4, AP3s4, AP6s4, trainingTime4, qTime4 = \
calc_visited_hitRate(alphas4, itrs4, factors4, _l = 2, data_set_size = 0.03, log = True)