In [1]:
import numpy
import numpy.ma as ma
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
rating_matrix = numpy.random.randint(1,3,size=(5,8))
rating_matrix

array([[2, 1, 1, 1, 1, 1, 2, 1],
       [1, 2, 2, 1, 1, 1, 1, 2],
       [1, 1, 1, 2, 2, 1, 2, 1],
       [2, 1, 1, 1, 1, 2, 1, 1],
       [2, 1, 2, 2, 2, 1, 2, 1]])

In [3]:
test_mask_matrix = numpy.zeros((5,8) , dtype = bool)
test_mask_matrix

array([[False, False, False, False, False, False, False, False],
       [False, False, False, False, False, False, False, False],
       [False, False, False, False, False, False, False, False],
       [False, False, False, False, False, False, False, False],
       [False, False, False, False, False, False, False, False]], dtype=bool)

In [4]:
list(range(5))

[0, 1, 2, 3, 4]

In [5]:
test_mask_matrix[list(range(5)), [0,1,2,3,7]] = True
test_mask_matrix

array([[ True, False, False, False, False, False, False, False],
       [False,  True, False, False, False, False, False, False],
       [False, False,  True, False, False, False, False, False],
       [False, False, False,  True, False, False, False, False],
       [False, False, False, False, False, False, False,  True]], dtype=bool)

In [6]:
train_data = ma.masked_array(rating_matrix , test_mask_matrix)
train_data

masked_array(data =
 [[-- 1 1 1 1 1 2 1]
 [1 -- 2 1 1 1 1 2]
 [1 1 -- 2 2 1 2 1]
 [2 1 1 -- 1 2 1 1]
 [2 1 2 2 2 1 2 --]],
             mask =
 [[ True False False False False False False False]
 [False  True False False False False False False]
 [False False  True False False False False False]
 [False False False  True False False False False]
 [False False False False False False False  True]],
       fill_value = 999999)

In [7]:
theta = numpy.random.randn(8,10)
theta

array([[-1.64278642, -0.30503899,  0.96092519,  0.80215673, -0.13893253,
        -0.59373953, -0.54728162,  0.59892328,  1.63107594, -1.1486659 ],
       [ 1.5527677 ,  0.42016853,  0.25988474, -0.22758284,  1.54384123,
         0.21447287, -0.04116339,  0.83602026,  0.8903968 ,  0.30848773],
       [ 1.32513858, -0.28254052, -1.02287235, -0.71416967, -1.23278907,
        -1.31351763,  1.26828482,  0.69532066,  0.62420521,  0.48953499],
       [ 1.10521244, -1.86420767, -0.54187631, -2.81770807,  0.94492279,
        -0.5854354 , -1.11761518,  0.75901973, -0.43631619,  0.15000033],
       [-1.35369545, -0.24906603, -1.89529735, -0.74880695, -0.15854168,
        -0.7093734 , -0.34666854, -1.52798312, -1.47210269,  0.38866936],
       [-1.11514544,  1.17409119, -1.41688307, -0.33085702, -0.29614505,
         1.13619472,  0.28788334, -1.13764896, -0.68258383, -1.41599824],
       [ 0.78964331,  1.79646916,  0.54206318, -0.54459046,  0.16900273,
         1.23751385, -0.37929578,  0.32401519

In [8]:
cosine_documents_similarity = cosine_similarity(theta)
sorted_documents_similarity = numpy.argsort(cosine_documents_similarity)
sorted_documents_similarity

array([[4, 3, 2, 1, 5, 6, 7, 0],
       [4, 5, 7, 0, 2, 3, 6, 1],
       [5, 0, 6, 4, 1, 3, 7, 2],
       [5, 0, 6, 7, 4, 2, 1, 3],
       [1, 6, 0, 7, 2, 3, 5, 4],
       [1, 3, 7, 2, 0, 6, 4, 5],
       [4, 7, 2, 3, 0, 5, 1, 6],
       [1, 6, 5, 3, 4, 2, 0, 7]], dtype=int64)

In [9]:
sorted_documents_similarity[[0,2,3],:3]

array([[4, 3, 2],
       [5, 0, 6],
       [5, 0, 6]], dtype=int64)

In [10]:
rated_and_peerpaper_matrix_list = []
for user in range(5):
    ones_indices = numpy.where(rating_matrix[user] == 1)[0]
    peers_per_paper = 3
    user_rated_peer_matrix= numpy.zeros((ones_indices.size , peers_per_paper +1))
    user_rated_peer_matrix[:,0] = ones_indices
    user_rated_peer_matrix[:,1:] = sorted_documents_similarity[ones_indices,:peers_per_paper]
    user_rated_peer_matrix =user_rated_peer_matrix.astype(int)
    rated_and_peerpaper_matrix_list.append(user_rated_peer_matrix)

rated_and_peerpaper_matrix_list

[array([[1, 4, 5, 7],
        [2, 5, 0, 6],
        [3, 5, 0, 6],
        [4, 1, 6, 0],
        [5, 1, 3, 7],
        [7, 1, 6, 5]]), array([[0, 4, 3, 2],
        [3, 5, 0, 6],
        [4, 1, 6, 0],
        [5, 1, 3, 7],
        [6, 4, 7, 2]]), array([[0, 4, 3, 2],
        [1, 4, 5, 7],
        [2, 5, 0, 6],
        [5, 1, 3, 7],
        [7, 1, 6, 5]]), array([[1, 4, 5, 7],
        [2, 5, 0, 6],
        [3, 5, 0, 6],
        [4, 1, 6, 0],
        [6, 4, 7, 2],
        [7, 1, 6, 5]]), array([[1, 4, 5, 7],
        [5, 1, 3, 7],
        [7, 1, 6, 5]])]

In [11]:
user_0_rated_peer_matrix = rated_and_peerpaper_matrix_list[0]
user_0_rated_peer_matrix

array([[1, 4, 5, 7],
       [2, 5, 0, 6],
       [3, 5, 0, 6],
       [4, 1, 6, 0],
       [5, 1, 3, 7],
       [7, 1, 6, 5]])

In [12]:
ones = user_0_rated_peer_matrix[:,0]
zeros = user_0_rated_peer_matrix[:,1:]
print(ones)
zeros

[1 2 3 4 5 7]


array([[4, 5, 7],
       [5, 0, 6],
       [5, 0, 6],
       [1, 6, 0],
       [1, 3, 7],
       [1, 6, 5]])

In [13]:
theta[ones]

array([[ 1.5527677 ,  0.42016853,  0.25988474, -0.22758284,  1.54384123,
         0.21447287, -0.04116339,  0.83602026,  0.8903968 ,  0.30848773],
       [ 1.32513858, -0.28254052, -1.02287235, -0.71416967, -1.23278907,
        -1.31351763,  1.26828482,  0.69532066,  0.62420521,  0.48953499],
       [ 1.10521244, -1.86420767, -0.54187631, -2.81770807,  0.94492279,
        -0.5854354 , -1.11761518,  0.75901973, -0.43631619,  0.15000033],
       [-1.35369545, -0.24906603, -1.89529735, -0.74880695, -0.15854168,
        -0.7093734 , -0.34666854, -1.52798312, -1.47210269,  0.38866936],
       [-1.11514544,  1.17409119, -1.41688307, -0.33085702, -0.29614505,
         1.13619472,  0.28788334, -1.13764896, -0.68258383, -1.41599824],
       [-1.27668355, -0.38114215, -0.11810443,  0.55307629, -1.83644552,
        -1.30609569, -0.63825337,  1.59639907,  0.84271933,  0.34933201]])

In [14]:
#testing
#success vectorized implementation of pairbuilding
x = numpy.array([[1,2,3],[4,5,6],[2,3,4],[3,23,1],[2,3,5]])
print(x)
rated_xs = x[[0,1,4]]
rated_xs = rated_xs[:,numpy.newaxis,:]
print(rated_xs)
non_rated_indices = numpy.array([ [2,1], [3,4], [2,0] ])
non_rated_xs = x[non_rated_indices]
print(non_rated_xs)
y = rated_xs - non_rated_xs
print(y)
print(y.shape)

[[ 1  2  3]
 [ 4  5  6]
 [ 2  3  4]
 [ 3 23  1]
 [ 2  3  5]]
[[[1 2 3]]

 [[4 5 6]]

 [[2 3 5]]]
[[[ 2  3  4]
  [ 4  5  6]]

 [[ 3 23  1]
  [ 2  3  5]]

 [[ 2  3  4]
  [ 1  2  3]]]
[[[ -1  -1  -1]
  [ -3  -3  -3]]

 [[  1 -18   5]
  [  2   2   1]]

 [[  0   0   1]
  [  1   1   2]]]
(3, 2, 3)


In [15]:
y = numpy.reshape(y,(6,3))
print(y)
print(y.shape)

[[ -1  -1  -1]
 [ -3  -3  -3]
 [  1 -18   5]
 [  2   2   1]
 [  0   0   1]
 [  1   1   2]]
(6, 3)


In [17]:
z = - rated_xs + non_rated_xs
print(z)
print(z.shape)

[[[ 1  1  1]
  [ 3  3  3]]

 [[-1 18 -5]
  [-2 -2 -1]]

 [[ 0  0 -1]
  [-1 -1 -2]]]
(3, 2, 3)


In [19]:
z = numpy.reshape(z,(6,3))
print(z)
print(z.shape)

[[ 1  1  1]
 [ 3  3  3]
 [-1 18 -5]
 [-2 -2 -1]
 [ 0  0 -1]
 [-1 -1 -2]]
(6, 3)


In [25]:
final = numpy.concatenate((y,z),axis=0)
print(final)
final.shape

[[ -1  -1  -1]
 [ -3  -3  -3]
 [  1 -18   5]
 [  2   2   1]
 [  0   0   1]
 [  1   1   2]
 [  1   1   1]
 [  3   3   3]
 [ -1  18  -5]
 [ -2  -2  -1]
 [  0   0  -1]
 [ -1  -1  -2]]


(12, 3)