In [30]:
import pandas as pd
import networkx as nx
import numpy as np
import os
import random
from scipy import sparse
from Dataset.RS_Data_Loader import RS_Data_Loader
from page_rank import pageRank
import importlib

In [28]:
def compute_PageRank(G, beta=0.85, epsilon=10**-4):
    '''
    Efficient computation of the PageRank values using a sparse adjacency 
    matrix and the iterative power method.
    
    Parameters
    ----------
    G : boolean adjacency matrix. np.bool8
        If the element j,i is True, means that there is a link from i to j.
    beta: 1-teleportation probability.
    epsilon: stop condition. Minimum allowed amount of change in the PageRanks
        between iterations.

    Returns
    -------
    output : tuple
        PageRank array normalized top one.
        Number of iterations.

    '''    
    #Test adjacency matrix is OK
    n,_ = G.shape
    assert(G.shape==(n,n))
    #Constants Speed-UP
    deg_out_beta = G.sum(axis=0).T/beta #vector
    #Initialize
    ranks = np.ones((n,1))/n #vector
    time = 0
    flag = True
    while flag:        
        time +=1
        with np.errstate(divide='ignore'): # Ignore division by 0 on ranks/deg_out_beta
            new_ranks = G.dot((ranks/deg_out_beta)) #vector
        #Leaked PageRank
        new_ranks += (1-new_ranks.sum())/n
        #Stop condition
        if np.linalg.norm(ranks-new_ranks,ord=1)<=epsilon:
            flag = False        
        ranks = new_ranks
    return ranks / ranks.max()

In [69]:
importlib.reload(page_rank.py)

NameError: name 'page_rank' is not defined

In [2]:
dataReader = RS_Data_Loader()

URM_train = dataReader.get_URM_train()
URM_validation = dataReader.get_URM_validation()
URM_test = dataReader.get_URM_test()
#ICM = dataReader.get_ICM()

RS_Data_Loader: loading data...
RS_Data_Loader: loading complete


In [76]:
relation_mat = URM_train.transpose().dot(URM_train).tocsc()

In [77]:
r0 = relation_mat[0].A
r0

array([[19,  0,  0, ...,  0,  0,  0]])

In [78]:
relation_mat

<20635x20635 sparse matrix of type '<class 'numpy.int64'>'
	with 10460699 stored elements in Compressed Sparse Column format>

In [79]:
user_id = 0
songs_in_playlist = URM_train.indices[URM_train.indptr[user_id]:URM_train.indptr[user_id + 1]]

In [80]:
l = range(URM_train.shape[1])
s_all = set(l)

In [81]:
songs_in_playlist

array([ 1220,  8360, 12844, 14301], dtype=int32)

In [82]:
s_0 = s_all - set(songs_in_playlist)

In [83]:
for i in s_0:
    relation_mat.data[relation_mat.indptr[i]:relation_mat.indptr[i+1]].fill(0)
relation_mat.eliminate_zeros()

In [111]:
%%timeit
a = compute_PageRank(relation_mat.transpose())[0]

47.5 ms ± 677 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [86]:
a[1220]

matrix([[0.02241312]])

In [118]:
b = (a / a.max()).A1

In [120]:
b

matrix([[0.0001372],
        [0.0001372],
        [0.0001372],
        ...,
        [0.0001372],
        [0.0001372],
        [0.0001372]])

In [89]:
print(b[1220], b[8360], b[12844])

[[0.11455208]] [[0.95930704]] [[0.20659116]]


In [91]:
URM_train[0] * b

matrix([[2.28045027]])

In [94]:
URM_train[0] * b

array([2.28045027])

In [108]:
URM_train = URM_train.astype(float)

In [109]:
URM_train

<50446x20635 sparse matrix of type '<class 'numpy.float64'>'
	with 949248 stored elements in Compressed Sparse Row format>

In [105]:
b[songs_in_playlist]

array([0.11455208, 0.95930704, 0.20659116, 1.        ])

In [110]:
URM_train[0].data *= b[songs_in_playlist]

In [99]:
URM_train[0].data

array([1, 1, 1, 1])

In [117]:
%%timeit
URM_train[0].data *= b[songs_in_playlist]

58.7 µs ± 4.2 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
