In [186]:
import pandas as pd
import numpy as np
import scipy as sp
import scipy.sparse
import scipy.sparse.linalg

# Read the LastFM datasets
artistsNames = ["id", "name", "url", "pictureURL"]
artists = pd.read_table("/Users/daniel/Documents/Blog/artists.dat", header=True, sep="\t", names=artistsNames)
userArtistNames = ["userID", "artistID", "weight"]
userArtists = pd.read_table("/Users/daniel/Documents/Blog/user_artists.dat", header=True, sep="\t", names=userArtistNames)


In [187]:
f = 20
iters = 10
regLambda = 0.1
alpha = 40
n = max(artists.id)
m = pd.Series.unique(userArtists.userID).size


In [188]:
# Create a dummy entry for each movie
temp = np.zeros((n-1, 3))
for i in range(1, n):
    temp[i-1,] = [m+1, i, 0]
    
userArtists = userArtists.append(pd.DataFrame(temp, columns = userArtistNames))

ratingsMatrix = userArtists.pivot_table(columns=['artistID'], index =['userID'], values='weight', dropna = False)

ratingsMatrix = ratingsMatrix.fillna(0).as_matrix()

# Drop the dummy movie
ratingsMatrix = sp.sparse.csr_matrix(ratingsMatrix[0:m,0:n])

In [189]:
def normaliseRow(x):
    return x / sum(x)

def initialiseMatrix(n, f):
    A = abs(np.random.randn(n, f))
    return sp.sparse.csr_matrix(np.apply_along_axis(normaliseRow, 1, A))

def ratingsPred(X, Y):
    return X.dot(Y.T)

def calculateMSE(X, Y, ratingsMatrix):
    ratingsPrediction = ratingsPred(X, Y)
    ratingsDiff = ratingsPrediction - ratingsMatrix
    return (ratingsDiff.multiply(ratingsDiff)).mean()

def calculateWeightedMSE(X, Y, P, ratingsMatrix, alpha):
    ratingsPrediction = ratingsPred(X, Y)
    C =  ratingsMatrix.multiply(1 + alpha)
    weightedDiff = C.multiply(P - ratingsPrediction)
    return weightedDiff.multiply(weightedDiff).mean()

In [190]:
# Initialise Y matrix, n x f
Y = initialiseMatrix(n, f)
# Initialise X matrix, m x f
X = initialiseMatrix(m, f)

reg = regLambda * np.eye(f,f)

P = ratingsMatrix
P[ratingsMatrix > 0] = 1

print(calculateWeightedMSE(X, Y, P, ratingsMatrix, alpha))

3.97106814192


In [191]:
userWeights = range(m)
userDiag = sp.sparse.diags(np.repeat(1, n), 0)
denseRatings = ratingsMatrix.toarray()
for i in range(0, m-1):
    userWeights[i+1] = sp.sparse.diags(denseRatings[i,:], 0)


In [192]:
itemWeights = range(n)
itemDiag =  sp.sparse.diags(np.repeat(1, m), 0)
for i in range(0, n-1):
    itemWeights[i+1] = sp.sparse.diags(denseRatings[:,i], 0)

In [193]:
for k in range(1, iters):
    Yt = Y.T
    YtY = Yt.dot(Y)
    
    for i in range(1, m):    
        YTCuY = YtY + Yt.dot(userWeights[i] - userDiag).dot(Y)
        updateX = sp.sparse.linalg.spsolve(YTCuY + reg, Yt.dot(userWeights[i]).dot(P.getrow(i).T))                    
        X[i,] = updateX
        
    Xt = X.T
    XtX = Xt.dot(X)
    for j in range(1, n):
        XTCuX = XtX + Xt.dot(itemWeights[j] - itemDiag).dot(X)
        updateY = sp.sparse.linalg.spsolve(XTCuX + reg, Xt.dot(itemWeights[j]).dot(P.getcol(j)))                                                   
        Y[j,] = updateY
        
    wMse = calculateWeightedMSE(X, Y, P, ratingsMatrix, alpha)
    print("Weighted MSE: " + str(wMse))
        
print("Done")

Weighted MSE: 3.86750556942
Weighted MSE: 3.87483479259
Weighted MSE: 3.87117675874
Weighted MSE: 3.88324130693
Weighted MSE: 3.8658264083
Weighted MSE: 3.85207893295
Weighted MSE: 3.85665103746
Weighted MSE: 3.85891818292
Weighted MSE: 3.87284096409
Done
