Recreate the basic variables defined in the paper.

In [1]:
import numpy as np

m = 6040 # users
n = 3952 # movies
R = np.zeros((m,n), dtype = np.int8) # Rating matrix
R_hat = np.zeros((m,n), dtype = np.int8) # Predicted rating matrix

In [10]:
import io
# Read the data into the rating matrix
with open('ml-1m/ratings.dat', 'r') as fp:
    for line in iter(fp.readline, ''):
        l = line.split('::')
        R[int(l[0])-1,int(l[1])-1] = int(l[2])
        
def O_u(u_i): # item set of user u_i
    return np.nonzero(R[u_i,:])
def U_o(o_j): # user set of item o_j
    return np.nonzero(R[:,o_j])

print(O_u(0))
print(U_o(0))

(array([   0,   47,  149,  259,  526,  530,  587,  593,  594,  607,  660,
        719,  744,  782,  913,  918,  937, 1021, 1027, 1028, 1034, 1096,
       1192, 1196, 1206, 1245, 1269, 1286, 1544, 1565, 1720, 1835, 1906,
       1960, 1961, 2017, 2027, 2293, 2320, 2339, 2354, 2397, 2686, 2691,
       2761, 2790, 2796, 2803, 2917, 3104, 3113, 3185, 3407]),)
(array([   0,    5,    7, ..., 6031, 6034, 6039]),)


In [11]:
def r_bar_u(u_i): # mean ratings of user u_i
    return np.mean(R[u_i,O_u(u_i)])

r_bar = np.mean(R[np.nonzero(R)]) # mean rating
print('Average rating:',r_bar)
print('Average rating for user 0:',r_bar_u(0))

Average rating: 3.58156445303
Average rating for user 0: 4.18867924528


In [18]:
def calc_r_hat(t, o_j, c_t):
    """
    t -> target user index
    o_j -> movie index
    c_t -> similarity vector of t to all users
    """
    U_oj = U_o(o_j)
    return r_bar_u(t) - r_bar + (np.sum(c_t[U_oj]*R[U_oj, o_j]) / np.sum(c_t[U_oj]))

print('Rating of user 0 on movie 0:', R[0,0])
print('Estimated rating (using uniform user similarity):', calc_r_hat(0,0, np.ones(m)))

Rating of user 0 on movie 0: 5
Estimated rating (using uniform user similarity): 4.75396120535


Starting the random walk.

In [91]:
def P_uo(u_i, o_j):
    return 1. / np.sum(R[u_i,:] > 0) # Type 1 walk, user to movie
print(P_uo(0,0))

MAXSCORE = 5
def sim(r_ij, r_kj):
    return MAXSCORE - np.absolute(r_ij - r_kj)
def P_ou(u_k, o_j, u_i): # Type 2 walk, movie to user, given the previous user
    numer = sim(R[u_i, o_j], R[u_k, o_j])
    denom = np.sum([sim(R[u_i, o_j], R[u_p, o_j]) for u_p in U_o(o_j)])
    return numer / denom

print(P_ou(0,0,0))

def p_trans_movie(u_k, u_i, o_j): # Transition probability from u_k to u_i using o_j
    return P_uo(u_i, o_j) * P_ou(u_k, o_j, u_i)

print(p_trans_movie(0,0,0))

def p_trans(u_k, u_i):# Total transition probability from u_k to u_i
    s = np.intersect1d(O_u(u_i), O_u(u_k))
    return np.sum([p_trans_movie(u_k, u_i, o_j) for o_j in s])

print(p_trans(0,3))

0.0188679245283
0.000580517821897
1.09531664509e-05
0.000101412334978
