In [1]:
import pandas as pd
import numpy as np

In [2]:
table = pd.DataFrame(
    columns=['m1', 'm2', 'm3', 'm4', 'm5', 'm6'],
    index=['u1', 'u2', 'u3', 'u4', 'u5', 'u6'],
    data=np.array([
        [3,1,2,2,0,2],
        [4,2,3,3,4,2],
        [4,1,3,3,2,5],
        [0,3,4,4,5,0],
        [2,5,5,0,3,3],
        [1,4,0,5,0,0]
    ])
)
table

Unnamed: 0,m1,m2,m3,m4,m5,m6
u1,3,1,2,2,0,2
u2,4,2,3,3,4,2
u3,4,1,3,3,2,5
u4,0,3,4,4,5,0
u5,2,5,5,0,3,3
u6,1,4,0,5,0,0


In [3]:
# a)
def mean_rating(X):
    return X.where(lambda x: x > 0).dropna().mean()

for u in list(table.index):
    print("Mean rating for {} equals {}".format(u, mean_rating(table.loc[u, :])))

Mean rating for u1 equals 2.0
Mean rating for u2 equals 3.0
Mean rating for u3 equals 3.0
Mean rating for u4 equals 4.0
Mean rating for u5 equals 3.6
Mean rating for u6 equals 3.3333333333333335


In [8]:
# b)
M = np.zeros_like(table.values.astype(np.float))
C = np.zeros_like(table.values.astype(np.int))

def pearson(data):
    # drop rows that are not co-rated
    data = data.where(lambda x: x > 0).dropna(how='any', axis=0)
    
    x = data.iloc[:, 0]
    x_mean = mean_rating(x)
    x_norm = (x - x_mean).values
    
    y = data.iloc[:, 1]
    y_mean = mean_rating(y)
    y_norm = (y - y_mean).values
    
    num = np.sum(x_norm * y_norm)
    denom = np.sqrt(np.sum(x_norm**2)) * np.sqrt(np.sum(y_norm**2))
    
    return data.shape[0], np.round(num / denom, 3)
    
    
for i, x in enumerate(list(table.index)):
    for j, y in enumerate(list(table.index)):
        C[i, j], M[i, j] = pearson(table.T[[x, y]])
        
# Similarity matrix:
M

array([[ 1.   ,  0.845,  0.715,  1.   , -0.816, -0.721],
       [ 0.845,  1.   ,  0.   ,  1.   , -0.559, -0.721],
       [ 0.715,  0.   ,  1.   ,  0.426, -0.589, -0.577],
       [ 1.   ,  1.   ,  0.426,  1.   , -0.866,  1.   ],
       [-0.816, -0.559, -0.589, -0.866,  1.   ,  1.   ],
       [-0.721, -0.721, -0.577,  1.   ,  1.   ,  1.   ]])

In [9]:
# Common ratings matrix:
C

array([[5, 5, 5, 3, 4, 3],
       [5, 6, 6, 4, 5, 3],
       [5, 6, 6, 4, 5, 3],
       [3, 4, 4, 4, 3, 2],
       [4, 5, 5, 3, 5, 2],
       [3, 3, 3, 2, 2, 3]])

In [10]:
# c)

In [10]:
# u1, m5
pred = (1 * (5-4) + 0.845 * (4-3)) / (1 + 0.845) + 2
print(pred)

3.0


In [13]:
# u4, m5
pred = (1 * (3 - 2) + 1 * (4 - 3)) / (1 + 1) + 4 # with u1 and u2
print(pred)
pred = (1 * (3 - 2) + 1 * (1 - 3.3)) / (1 + 1) + 4 # with u1 and u6
print(pred)

5.0
3.35


In [15]:
# u4, m6
pred = (1 * (2 - 2) + 1 * (2 - 3)) / (1 + 1) + 4
print(pred)

3.5


In [16]:
# u5, m4
pred = (1 * (5 - 3.3)) / (1) + 3.6
print(pred)

5.300000000000001


In [19]:
# u6, m3
pred = (1 * (4 - 4.0) + 1 * (5 - 3.6)) / (1 + 1) + 3.3
print(pred)

4.0


In [20]:
# u6, m5
pred = (1 * (5 - 4.0) + 1 * (3 - 3.6)) / (1 + 1) + 3.3
print(pred)

3.5


In [21]:
# u6, m5
pred = (1 * (3 - 3.6)) / (1) + 3.3
print(pred)

2.6999999999999997
