In [89]:
import numpy as np
import pandas as pd
from scipy.sparse.linalg import spsolve
from scipy import sparse

In [90]:
artists = pd.read_csv("hw2-bundle/hw2-bundle/implicit_feedback/data/artists.txt", sep="\t")
user_artists = pd.read_csv("hw2-bundle/hw2-bundle/implicit_feedback/data/user_artists.txt", sep="\t")
user_artists_small = pd.read_csv("hw2-bundle/hw2-bundle/implicit_feedback/data/user_artists_small.txt", sep="\t")

In [91]:
artists.head(3)

Unnamed: 0,artistID,name
0,0,MALICE MIZER
1,1,Diary of Dreams
2,2,Carpathian Forest


In [92]:
user_artists.head(3)

Unnamed: 0,userID,artistID,interactions
0,0,45,13883
1,0,46,11690
2,0,47,11351


In [93]:
user_artists_small

Unnamed: 0,userID,artistID,interactions
0,0,45,13883
1,0,46,11690
2,0,47,11351
3,0,48,10300
4,0,49,8983
...,...,...,...
260,94,75,73
261,94,83,45
262,95,59,194
263,96,7,1513


In [104]:
# m = 1882
# n = 3000
m = 100
n = 100
f = 3
lamda = 0.01

X = sparse.csr_matrix(np.zeros((m, f)) + 0.5)  
Y = sparse.csr_matrix(np.zeros((n, f))) 

# construct the preference matrix and confidence matrix
# YOUR CODE HERE
data = user_artists_small
# data = user_artists
alpha = np.sum(data["interactions"] > 0) / (m * n - np.sum(data["interactions"] > 0))
P = sparse.csr_matrix(((data["interactions"] > 0).astype(int), (data["userID"], data["artistID"])), 
                      shape=(m, n))
C = sparse.csr_matrix((1 + alpha * data["interactions"], (data["userID"], data["artistID"])), 
                      shape=(m, n))    

In [103]:
for i in range(len(user_artists_small)):
    if (int(user_artists_small["interactions"][i] > 0) != P[user_artists_small["userID"][i], user_artists_small["artistID"][i]]):
        print("STOP")

In [105]:
def ALS(X, Y, P, C, m, n, f, lamda, MAX_ITER = 100):
    """"
    X: initial user matrix
    Y: initial item matrix
    P: preference matrix
    C: confidence matrix
    """
    # objective function
    def objective(X, Y, C, P, lam):
        return (C.multiply((P - X @ Y.T).power(2))).sum() + lam * ((X.power(2)).sum() + (Y.power(2)).sum())

    # construct lambda * I
    # YOUR CODE HERE
    lamda_eye = lamda * sparse.eye(f, f)
    objectives = np.zeros(MAX_ITER + 1)
    objectives[0] = objective(X, Y, C, P, lamda)
    print(f"At iteration 0, the objective is {objectives[0]}.")

    for t in range(MAX_ITER):
        # update item matrix
        # YOUR CODE HERE
        xTx = X.T.dot(X)
        for item in range(n):
            p_i = P[:, item]
            # C_i_I = sparse.diags(C[:, item].toarray()) - sparse.eye(m, m)  
            C_i_I = sparse.diags_array([C[j,item] for j in range(C.shape[0])]) - sparse.eye(m, m)       # the sparse matrix: C_i - I
            left = xTx + X.T.dot(C_i_I).dot(X) + lamda_eye	# please use the trick in spec 
            right = X.T.dot(C_i_I).dot(p_i) + X.T.dot(p_i)	   # please use the trick in spec 
            y_i = spsolve(left, right)
            Y[item] = y_i
        
        # update user matrix   
        # YOUR CODE HERE     	
        yTy = Y.T.dot(Y)
        for user in range(m):
            p_u = P[user, :].T
            C_u_I = sparse.diags_array([C[user,j] for j in range(C.shape[1])]) - sparse.eye(n, n)      # the sparse matrix: C_u - I
            left = yTy + Y.T.dot(C_u_I).dot(Y) + lamda_eye	   # please use the trick in spec 
            right =  Y.T.dot(C_u_I).dot(p_u) + Y.T.dot(p_u)      # please use the trick in spec 
            x_u = spsolve(left, right)
            X[user] = x_u
        
        objectives[t + 1] = objective(X, Y, C, P, lamda)
        print(f"At iteration {t + 1}, the objective is {objectives[t + 1]}.")
    return X, Y, objectives

In [108]:
result = ALS(X, Y, P, C, m, n, f, lamda)

At iteration 0, the objective is 1.5081458831779613.


In [66]:
np.argsort((result[0] @ result[1].T)[0,:].toarray())

array([[36, 43, 24, 44, 22, 21, 20, 19, 18, 34, 16, 15, 14, 28, 40, 11,
        10,  9,  8, 38, 32, 37,  4,  3,  2,  1, 26, 42, 99, 98, 96, 97,
        95, 41, 12, 13,  0, 39, 33, 29, 30,  5, 23, 25, 31, 27, 17,  7,
         6, 90, 89, 88, 87, 86, 84, 81, 80, 92, 77, 85, 73, 76, 78, 72,
        66, 93, 68, 67, 59, 94, 69, 75, 56, 50, 62, 82, 70, 54, 64, 49,
        91, 65, 79, 60, 45, 57, 61, 35, 55, 51, 71, 63, 74, 52, 46, 53,
        47, 48, 83, 58]], dtype=int64)