## Recommender Systems with Spotlight (on Pytorch)

### Installation

In [1]:
# Install a conda package in the current Jupyter kernel
import sys
!conda clean --index-cache
!conda install --yes --prefix {sys.prefix} -c maciejkula -c pytorch spotlight=0.1.3

Solving environment: done

# All requested packages already installed.



In [2]:
from spotlight.cross_validation import random_train_test_split
from spotlight.datasets.movielens import get_movielens_dataset
from spotlight.factorization.implicit import ImplicitFactorizationModel


### Wide matrix vs. narrow matrix.

#### Using dataset Movielens1M

In [3]:
d1 = get_movielens_dataset(variant='10M')
train, test = random_train_test_split(d1)

In [4]:
import time
start = time.time()

model = ImplicitFactorizationModel(n_iter=3, loss='bpr')
model.fit(train)

end = time.time()
print "Training finished on %ss" %(end - start)

Training finished on 12789.3924692s


### Evaluation metrics

In [5]:
# Based on precision_recall_score
from spotlight.evaluation import precision_recall_score
import numpy as np

def f1_score(model, test):
    pre_rec  = precision_recall_score(model, test)
    precision = pre_rec[0]
    recall = pre_rec[1]
    f1 = [2*precision[i]*recall[i]/(0.0001+precision[i]+recall[i]) for i in range(len(precision))]
    return f1

In [6]:
import scipy.stats as st
def map_score(model, test, train=None):
    """
    Compute mean average precision (MAP) scores.
    Calculates the average precision for each user's recommendation vector,
    then computes the resultant mean for all users.

    Parameters
    ----------

    model: fitted instance of a recommender model
        The model to evaluate.
    test: :class:`spotlight.interactions.Interactions`
        Test interactions.
    train: :class:`spotlight.interactions.Interactions`, optional
        Train interactions. If supplied, scores of known
        interactions will be set to very low values and so not
        affect the MAP.

    Returns
    -------

    map score: numpy array of shape (num_users,)
        Array of MAP scores for each user in test.
    """

    test = test.tocsr()
    if train is not None:
        train = train.tocsr()

    ap = []

    for user_id, row in enumerate(test):
        if not len(row.indices):
            continue
        predictions = -model.predict(user_id)
        if train is not None:
            predictions[train[user_id].indices] = FLOAT_MAX

        prec = []
        ranking = np.sort(st.rankdata(predictions)[row.indices])
        for index, value in enumerate(ranking):
            prec.append((index + 1) / value)
        ap.append(sum(prec) / len(ranking))

    return np.array(ap)

In [7]:
my_map_score = map_score(model, test)
print "MAP is %s" %np.mean(my_map_score)

my_f1_score = f1_score(model, test)
print "f1_score is %s" %np.mean(my_f1_score)

MAP is 0.0763492406134
f1_score is 0.0586242915903


#### Using dataset2: Movielens1M transposed

In [8]:
from spotlight import interactions
d2 = interactions.Interactions(d1.item_ids, d1.user_ids, d1.ratings,d1.timestamps,d1.weights,d1.num_items,d1.num_users)
train2, test2 = random_train_test_split(d2)

In [9]:
import time
start = time.time()

model2 = ImplicitFactorizationModel(n_iter=3, loss='bpr')
model2.fit(train2)

end = time.time()
print "Training finished on %ss" %(end - start)

Training finished on 9581.53764796s


In [10]:
my_map_score = map_score(model2, test2)
print "MAP is %s" %np.mean(my_map_score)

my_f1_score = f1_score(model2, test2)
print "f1_score is %s" %np.mean(my_f1_score)

MAP is 0.0403900462079
f1_score is 0.0246218219745
