Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 57 additions & 7 deletions implicit/als.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
""" Implicit Alternating Least Squares """
import heapq
import itertools
import logging
import time

import numpy as np
import scipy

from . import _als
from .recommender_base import RecommenderBase
Expand Down Expand Up @@ -58,16 +60,16 @@ def fit(self, item_users):
"""
Ciu, Cui = item_users.tocsr(), item_users.T.tocsr()
items, users = Ciu.shape
self._YtY = None

# Initialize the variables randomly if they haven't already been set
if self.user_factors is None:
self.user_factors = np.random.rand(users, self.factors).astype(self.dtype) * 0.01
if self.item_factors is None:
self.item_factors = np.random.rand(items, self.factors).astype(self.dtype) * 0.01

# invalidate cached norms
# invalidate cached norms and squared factors
self._item_norms = None
self._YtY = None

solver = self.solver

Expand Down Expand Up @@ -105,12 +107,46 @@ def recommend(self, userid, user_items, N=10, filter_items=None, recalculate_use
def _user_factor(self, userid, user_items, recalculate_user=False):
if not recalculate_user:
return self.user_factors[userid]
Y = self.item_factors
if self._YtY is None:
self._YtY = Y.T.dot(Y)
return user_factor(Y, self._YtY, user_items, userid,
return user_factor(self.item_factors, self.YtY,
user_items.tocsr(), userid,
self.regularization, self.factors)

def explain(self, userid, user_items, itemid, user_weights=None, N=10):
""" Returns the predicted rating for an user x item pair,
the explanation (the contribution from the top N items the user liked),
and a user latent factor weight that can be cached if you want to
get more than one explanation for the same user.
"""
# user_weights = Cholesky decomposition of Wu^-1
# from section 5 of the paper CF for Implicit Feedback Datasets
user_items = user_items.tocsr()
if user_weights is None:
A, _ = user_linear_equation(self.item_factors, self.YtY,
user_items, userid,
self.regularization, self.factors)
user_weights = scipy.linalg.cho_factor(A)
seed_item = self.item_factors[itemid]

# weighted_item = y_i^t W_u
weighted_item = scipy.linalg.cho_solve(user_weights, seed_item)

total_score = 0.0
h = []
for i, (itemid, confidence) in enumerate(nonzeros(user_items, userid)):
factor = self.item_factors[itemid]
# s_u^ij = (y_i^t W^u) y_j
score = weighted_item.dot(factor) * confidence
total_score += score
contribution = (score, itemid)
if i < N:
heapq.heappush(h, contribution)
else:
heapq.heappushpop(h, contribution)

items = (heapq.heappop(h) for i in range(len(h)))
top_contributions = list((i, s) for s, i in items)[::-1]
return total_score, top_contributions, user_weights

def similar_items(self, itemid, N=10):
""" Return the top N similar items for itemid. """
scores = self.item_factors.dot(self.item_factors[itemid]) / self.item_norms
Expand All @@ -129,6 +165,13 @@ def solver(self):
return _als.least_squares_cg if self.use_native else least_squares_cg
return _als.least_squares if self.use_native else least_squares

@property
def YtY(self):
if self._YtY is None:
Y = self.item_factors
self._YtY = Y.T.dot(Y)
return self._YtY


def alternating_least_squares(Ciu, factors, **kwargs):
""" factorizes the matrix Cui using an implicit alternating least squares
Expand Down Expand Up @@ -158,7 +201,10 @@ def least_squares(Cui, X, Y, regularization, num_threads=0):
X[u] = user_factor(Y, YtY, Cui, u, regularization, n_factors)


def user_factor(Y, YtY, Cui, u, regularization, n_factors):
def user_linear_equation(Y, YtY, Cui, u, regularization, n_factors):
# Xu = (YtCuY + regularization * I)^-1 (YtCuPu)
# YtCuY + regularization * I = YtY + regularization * I + Yt(Cu-I)

# accumulate YtCuY + regularization*I in A
A = YtY + regularization * np.eye(n_factors)

Expand All @@ -169,8 +215,12 @@ def user_factor(Y, YtY, Cui, u, regularization, n_factors):
factor = Y[i]
A += (confidence - 1) * np.outer(factor, factor)
b += confidence * factor
return A, b


def user_factor(Y, YtY, Cui, u, regularization, n_factors):
# Xu = (YtCuY + regularization * I)^-1 (YtCuPu)
A, b = user_linear_equation(Y, YtY, Cui, u, regularization, n_factors)
return np.linalg.solve(A, b)


Expand Down
51 changes: 50 additions & 1 deletion tests/als_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def test_factorize(self):
[0, 0, 1, 1, 0, 1],
[0, 1, 0, 0, 0, 1],
[0, 0, 0, 0, 1, 1]], dtype=np.float64)
user_items = counts * 2

# try all 8 variants of native/python, cg/cholesky, and
# 64 vs 32 bit factors
Expand All @@ -63,7 +64,7 @@ def test_factorize(self):
use_native=use_native,
use_cg=use_cg)
np.random.seed(23)
model.fit(counts * 2)
model.fit(user_items)
rows, cols = model.item_factors, model.user_factors

except Exception as e:
Expand All @@ -81,6 +82,54 @@ def test_factorize(self):
% (i, j, reconstructed[i, j], dtype, use_cg,
use_native))

def test_explain(self):
counts = csr_matrix([[1, 1, 0, 1, 0, 0],
[0, 1, 1, 1, 0, 0],
[1, 4, 1, 0, 7, 0],
[1, 1, 0, 0, 0, 0],
[9, 0, 4, 1, 0, 1],
[0, 1, 0, 0, 0, 1],
[0, 0, 2, 0, 1, 1]], dtype=np.float64)
user_items = counts * 2
item_users = user_items.T

model = AlternatingLeastSquares(factors=4,
regularization=20,
use_native=False,
use_cg=False,
iterations=100)
np.random.seed(23)
model.fit(user_items)

userid = 0

# Assert recommendation is the the same if we recompute user vectors
recs = model.recommend(userid, item_users, N=10)
recalculated_recs = model.recommend(userid, item_users, N=10, recalculate_user=True)
for (item1, score1), (item2, score2) in zip(recs, recalculated_recs):
self.assertEqual(item1, item2)
self.assertAlmostEqual(score1, score2, 4)

# Assert explanation makes sense
top_rec, score = recalculated_recs[0]
score_explained, contributions, W = model.explain(userid, item_users, itemid=top_rec)
scores = [s for _, s in contributions]
items = [i for i, _ in contributions]
self.assertAlmostEqual(score, score_explained, 4)
self.assertAlmostEqual(score, sum(scores), 4)
self.assertEqual(scores, sorted(scores, reverse=True), "Scores not in order")
self.assertEqual([0, 2, 3, 4], sorted(items), "Items not seen by user")

# Assert explanation with precomputed user weights is correct
top_score_explained, top_contributions, W = model.explain(
userid, item_users, itemid=top_rec, user_weights=W, N=2)
top_scores = [s for _, s in top_contributions]
top_items = [i for i, _ in top_contributions]
self.assertEqual(2, len(top_contributions))
self.assertAlmostEqual(score, top_score_explained, 4)
self.assertEqual(scores[:2], top_scores)
self.assertEqual(items[:2], top_items)


if __name__ == "__main__":
unittest.main()