## Recommender Systems with Spotlight (on Pytorch)

### Installation

In [1]:
# Install a conda package in the current Jupyter kernel
import sys
!conda clean --index-cache
!conda install --yes --prefix {sys.prefix} -c maciejkula -c pytorch spotlight=0.1.3

Solving environment: done

## Package Plan ##

  environment location: /opt/conda

  added / updated specs: 
    - spotlight=0.1.3


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    certifi-2017.11.5          |   py27h71e7faf_0         196 KB
    spotlight-0.1.3            |   py27h7f24ad5_0          42 KB  maciejkula
    openssl-1.0.2n             |       hb7f436b_0         3.4 MB
    pytorch-0.3.0              |py27_cuda8.0.61_cudnn7.0.3hf383a3f_4       416.5 MB  pytorch
    cudatoolkit-8.0            |                3       322.4 MB
    ------------------------------------------------------------
                                           Total:       742.6 MB

The following NEW packages will be INSTALLED:

    cudatoolkit: 8.0-3                                                
    pytorch:     0.3.0-py27_cuda8.0.61_cudnn7.0.3hf383a3f_4 pytorch   
    spotlight:   0.1.3-py27h7f24ad5_0 

In [2]:
from spotlight.cross_validation import random_train_test_split
from spotlight.datasets.movielens import get_movielens_dataset
from spotlight.factorization.implicit import ImplicitFactorizationModel


### Wide matrix vs. narrow matrix.

#### Using dataset Movielens

### Configuration

In [3]:
USE_GPU = False
DATASET = '1M'

In [4]:
d1 = get_movielens_dataset(variant=DATASET)
train, test = random_train_test_split(d1)

In [5]:
import time
start = time.time()

model = ImplicitFactorizationModel(n_iter=3, loss='bpr', use_cuda=USE_GPU)
model.fit(train)

end = time.time()
print("Training finished on {}s".format(end - start))

Training finished on 30.8242278099s


### Evaluation metrics

In [6]:
# Based on precision_recall_score
from spotlight.evaluation import precision_recall_score
import numpy as np

def f1_score(model, test):
    pre_rec  = precision_recall_score(model, test)
    precision = pre_rec[0]
    recall = pre_rec[1]
    f1 = [2*precision[i]*recall[i]/(0.0001+precision[i]+recall[i]) for i in range(len(precision))]
    return f1

In [7]:
import scipy.stats as st
def map_score(model, test, train=None):
    """
    Compute mean average precision (MAP) scores.
    Calculates the average precision for each user's recommendation vector,
    then computes the resultant mean for all users.

    Parameters
    ----------

    model: fitted instance of a recommender model
        The model to evaluate.
    test: :class:`spotlight.interactions.Interactions`
        Test interactions.
    train: :class:`spotlight.interactions.Interactions`, optional
        Train interactions. If supplied, scores of known
        interactions will be set to very low values and so not
        affect the MAP.

    Returns
    -------

    map score: numpy array of shape (num_users,)
        Array of MAP scores for each user in test.
    """

    test = test.tocsr()
    if train is not None:
        train = train.tocsr()

    ap = []

    for user_id, row in enumerate(test):
        if not len(row.indices):
            continue
        predictions = -model.predict(user_id)
        if train is not None:
            predictions[train[user_id].indices] = FLOAT_MAX

        prec = []
        ranking = np.sort(st.rankdata(predictions)[row.indices])
        for index, value in enumerate(ranking):
            prec.append((index + 1) / value)
        ap.append(sum(prec) / len(ranking))

    return np.array(ap)

In [8]:
start = time.time()
my_map_score = map_score(model, test)
print("MAP is {}".format(np.mean(my_map_score)))

my_f1_score = f1_score(model, test)
print("f1_score is {}".format(np.mean(my_f1_score)))

end = time.time()
print("Evaluation finished on {}s".format(end - start))

MAP is 0.0762944616631
f1_score is 0.0540406269007
Evaluation finished on 9.05224609375s


#### Using dataset2: transpose users and items

In [9]:
from spotlight import interactions
d2 = interactions.Interactions(d1.item_ids, d1.user_ids, d1.ratings,d1.timestamps,d1.weights,d1.num_items,d1.num_users)
train2, test2 = random_train_test_split(d2)

In [10]:
import time
start = time.time()

model2 = ImplicitFactorizationModel(n_iter=3, loss='bpr', use_cuda=USE_GPU)
model2.fit(train2)

end = time.time()
print("Training finished on {}s".format(end - start))

Training finished on 35.3453760147s


In [11]:
start = time.time()
my_map_score = map_score(model2, test2)
print("MAP is {}".format(np.mean(my_map_score)))

my_f1_score = f1_score(model2, test2)
print("f1_score is {}".format(np.mean(my_f1_score)))

end = time.time()
print("Evaluation finished on {}s".format(end - start))

MAP is 0.0664028952701
f1_score is 0.0359944871129
Evaluation finished on 8.42612719536s
