## Recommender Systems with Spotlight (on Pytorch)

### Installation

In [2]:
# Install a conda package in the current Jupyter kernel
print(1)
import sys
!conda clean --index-cache
!conda install --yes --prefix {sys.prefix} -c maciejkula -c pytorch spotlight=0.1.3

Solving environment: | ^C
failed

CondaError: KeyboardInterrupt



In [41]:
!conda install --yes -c conda-forge tqdm

Solving environment: done

## Package Plan ##

  environment location: /opt/conda

  added / updated specs: 
    - tqdm


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    tqdm-4.19.5                |             py_0          32 KB  conda-forge
    ca-certificates-2018.1.18  |                0         140 KB  conda-forge
    certifi-2018.1.18          |           py27_0         143 KB  conda-forge
    openssl-1.0.2n             |                0         3.5 MB  conda-forge
    ------------------------------------------------------------
                                           Total:         3.8 MB

The following NEW packages will be INSTALLED:

    tqdm:            4.19.5-py_0           conda-forge

The following packages will be UPDATED:

    ca-certificates: 2017.08.26-h1d4fec5_0             --> 2018.1.18-0      conda-forge
    certifi:         2018.1.18-py27_0                  --> 

In [33]:
from spotlight.cross_validation import random_train_test_split
from spotlight.datasets.movielens import get_movielens_dataset
from spotlight.factorization.implicit import ImplicitFactorizationModel

### Wide matrix vs. narrow matrix.

#### Using dataset Movielens

### Configuration

### Evaluation metrics

In [37]:
# Based on precision_recall_score
from spotlight.evaluation import precision_recall_score
import numpy as np

def f1_score(model, test):
    pre_rec  = precision_recall_score(model, test)
    precision = pre_rec[0]
    recall = pre_rec[1]
    f1 = [2*precision[i]*recall[i]/(0.0001+precision[i]+recall[i]) for i in range(len(precision))]
    return f1

In [1]:
import scipy.stats as st
import pdb
def map_score(model, test, train=None):
    """
    Compute mean average precision (MAP) scores.
    then computes the resultant mean for all users.

    Parameters
    ----------

    model: fitted instance of a recommender model
        The model to evaluate.
    test: :class:`spotlight.interactions.Interactions`
        Test interactions.
    train: :class:`spotlight.interactions.Interactions`, optional
        Train interactions. If supplied, scores of known
        interactions will be set to very low values and so not
        affect the MAP.

    Returns
    -------

    map score: numpy array of shape (num_users,)
        Array of MAP scores for each user in test.
    """

    test = test.tocsr()
    if train is not None:
        train = train.tocsr()

    ap = []
    import pdb
    for user_id, row in enumerate(test):
        if not len(row.indices):
            continue
        predictions = -model.predict(user_id)
        if train is not None:
            pdb.set_trace()
            predictions[train[user_id].indices] = FLOAT_MAX
        prec = []
        #ranking = np.sort(st.rankdata(predictions)[row.indices])
        ranking = predictions[row.indices]
        num_hits = 0.0
        score = 0.0
        for index, value in enumerate(ranking):
            num_hits += 1.0
            score += num_hits / (row.indices[index]+1.0)
            #prec.append((index + 1) / value)
        prec.append(score)
        ap.append(sum(prec) / len(ranking))
    return np.array(ap)

La métrica MAP ha sido sacada de https://www.kaggle.com/c/avito-prohibited-content/discussion/9584 . Basado en la implementación de average precision de https://www.kaggle.com/hardyce/testing-mapk

In [2]:
import numpy as np
from tqdm import tqdm

def average_precision(relevant_items, recommended, recomm_length):
    """
    Calculates the average precision for the specified recommendation_length.
    This function computes the average precision at recommendation_length between two lists of
    items.
        :type relevant_items: list
    :param relevant_items: Relevant items for the user
    :type recommended: list
    :param recommended: Items recommended for the user
    :type recomm_length: int
    :param recomm_length: Length of the recommendation
    :rtype: float
    """
    if len(recommended)>recomm_length:
        recommended = recommended[:recomm_length]
    score = 0.0
    num_hits = 0.0
    for i,p in enumerate(recommended):
        if p in relevant_items and p not in recommended[:i]:
            num_hits += 1.0
            score += num_hits / (i+1.0)
    if not relevant_items:
        return 0.0
    return score / min(len(relevant_items), recomm_length)

def get_relevant_items(user, threshold, actions):
    """
    Gets the
    :param user: User to obtain relevant items
    :type user: str
    :param threshold: Threshold to separate relevant from non relevant
    :type threshold: float
    :rtype: ndarray
    """
    user_actions = actions[actions['user_id'] == user]
    relevant_user_items = user_actions[user_actions['value'] >= threshold]['item_id'].values.tolist()
    return relevant_user_items

def mean_average_precision(recommendations, users, threshold, actions, at):
    """
    Calculates the mean average precision
    :type recommendations: list
    :param recommendations: List of recommendations
    :type users: list
    :param users: Users for MAP
    :type threshold: float
    :param threshold: Relevant items threshold
    :return:
    """
    average_precisions = []
    print "Average Precission progress"
    for recommend in tqdm(recommendations):
        user_relevant_items = get_relevant_items(recommend.get('user'), threshold, actions)
        ap = average_precision(user_relevant_items, recommend.get('recommendation'),at)
        average_precisions.append(ap)
    return np.mean(average_precisions)

### Aproximación 1

Para cada usuario recomendar los items con mayor ranking y posteriormente calcular la precisión de estas recomendaciones

In [34]:
USE_GPU = False
DATASET = '1M'

In [35]:
d1 = get_movielens_dataset(variant=DATASET)
train, test = random_train_test_split(d1)

In [36]:
import time
start = time.time()

model = ImplicitFactorizationModel(n_iter=3, loss='bpr', use_cuda=USE_GPU)
model.fit(train)

end = time.time()
print("Training finished on {}s".format(end - start))

Training finished on 84.8973610401s


#### Métrica Ramiro

In [40]:
start = time.time()
my_map_score = map_score(model, test)
print("MAP is {}".format(np.mean(my_map_score)))

my_f1_score = f1_score(model, test)
print("f1_score is {}".format(np.mean(my_f1_score)))

end = time.time()
print("Evaluation finished on {}s".format(end - start))

MAP is 0.0330083207166
f1_score is 0.0558900347804
Evaluation finished on 24.4335558414s


#### Métrica Labs

In [41]:
from tqdm import tqdm
prediction_dict = {}
for item_id in tqdm(np.unique(test.user_ids)):
    prediction = model.predict(item_id)
    score_list = []
    for index, rating in enumerate(prediction.tolist()):
        score_list.append({'rating': rating, 'user_id': index})
    prediction_dict[item_id] = list(map(lambda x:x['user_id'],sorted(score_list, key=lambda x:x['rating'], reverse=True)))

100%|██████████| 6036/6036 [01:04<00:00, 94.13it/s] 


In [42]:
test_dict = {}
import pdb
for item_id in tqdm(np.unique(test.user_ids)):
    ratings_list = []
    for index, user_id in enumerate(test.item_ids[test.user_ids == item_id]):
        ratings_list.append({'user_id': user_id, 'rating': test.ratings[test.user_ids == item_id][index]})
    test_dict[user_id] = list(map(lambda x:x['user_id'], sorted(ratings_list, key=lambda x: x['rating'], reverse=True)))

100%|██████████| 6036/6036 [00:18<00:00, 335.16it/s]


In [43]:
average_precisions = []
import pdb
k= 6040
for item_id, users_list in tqdm(prediction_dict.items()):
    if test_dict.get(item_id):
        average_precisions.append(average_precision(test_dict[item_id],prediction_dict[item_id],k))
print("MAP@{} is {}".format(k,np.mean(average_precisions)))


100%|██████████| 6036/6036 [04:53<00:00, 20.59it/s]

MAP@6040 is 0.0462438723141





### Aproximación 2

Transponer la matriz de usuarios-items, para entrenar el sistema y para cada item recomendar los usuarios que mejor han valorado ese item. Posteriormente calcular la precisión de esta aproximación

In [44]:
from spotlight import interactions
d2 = interactions.Interactions(d1.item_ids, d1.user_ids, d1.ratings,d1.timestamps,d1.weights,d1.num_items,d1.num_users)
train2, test2 = random_train_test_split(d2)

In [45]:
import time
start = time.time()

model2 = ImplicitFactorizationModel(n_iter=3, loss='bpr', use_cuda=USE_GPU)
model2.fit(train2)

end = time.time()
print("Training finished on {}s".format(end - start))

Training finished on 85.9191660881s


#### Métrica Ramiro

In [46]:
start = time.time()
my_map_score = map_score(model2, test2)
print("MAP is {}".format(np.mean(my_map_score)))

my_f1_score = f1_score(model2, test2)
print("f1_score is {}".format(np.mean(my_f1_score)))

end = time.time()
print("Evaluation finished on {}s".format(end - start))

MAP is 0.0103377607946
f1_score is 0.0362460288795
Evaluation finished on 18.3569450378s


#### Métrica Labs

In [47]:
from tqdm import tqdm
prediction_dict2 = {}
for item_id in tqdm(np.unique(test2.user_ids)):
    prediction = model2.predict(item_id)
    score_list = []
    for index, rating in enumerate(prediction.tolist()):
        score_list.append({'rating': rating, 'user_id': index})
    prediction_dict2[item_id] = list(map(lambda x:x['user_id'],sorted(score_list, key=lambda x:x['rating'], reverse=True)))

100%|██████████| 3468/3468 [00:37<00:00, 92.79it/s]


In [48]:
test_dict2 = {}
import pdb
for item_id in tqdm(np.unique(test2.user_ids)):
    ratings_list = []
    for index, user_id in enumerate(test2.item_ids[test2.user_ids == item_id]):
        ratings_list.append({'user_id': user_id, 'rating': test2.ratings[test2.user_ids == item_id][index]})
    test_dict2[user_id] = list(map(lambda x:x['user_id'], sorted(ratings_list, key=lambda x: x['rating'], reverse=True)))

100%|██████████| 3468/3468 [00:18<00:00, 191.59it/s]


In [49]:
average_precisions = []
import pdb
k= 6040
for item_id, users_list in tqdm(prediction_dict2.items()):
    if test_dict2.get(item_id):
        average_precisions.append(average_precision(test_dict2[item_id],prediction_dict2[item_id],k))
print("MAP@{} is {}".format(k,np.mean(average_precisions)))


100%|██████████| 3468/3468 [06:00<00:00,  9.61it/s]

MAP@6040 is 0.0317547924938





### Aproximación 3

Generar recomendaciones de los items para usuarios, agrupar posteriormente para cada item los usuarios que lo han votado y calcular su precisión

In [3]:
from spotlight.cross_validation import random_train_test_split
from spotlight.datasets.movielens import get_movielens_dataset
from spotlight.factorization.implicit import ImplicitFactorizationModel

In [4]:
USE_GPU = False
DATASET = '1M'

In [5]:
d3 = get_movielens_dataset(variant=DATASET)
train3, test3 = random_train_test_split(d3)

In [6]:
import time
start = time.time()

model3 = ImplicitFactorizationModel(n_iter=3, loss='bpr', use_cuda=USE_GPU)
model3.fit(train3)

end = time.time()
print("Training finished on {}s".format(end - start))

Training finished on 70.0328481197s


#### Métrica Labs

In [8]:
import pdb
from tqdm import tqdm
predict_dict_users3 = {}
predict_dict_items3 = {}
for user_id in tqdm(np.unique(test3.user_ids)):
    try:
        prediction = model3.predict(user_id)
        predict_dict_users3[str(user_id)] = prediction.tolist()
        prediction_dict3 = {}
        for item_id, score in enumerate(prediction):
            if predict_dict_items3.get(str(item_id)) is None:
                #predict_dict_items[str(item_id)] = []
                predict_dict_items3[str(item_id)] = np.empty(6040,)
            #predict_dict_items[str(item_id)].append({'r':score, 'u': user_id})
            np.put(predict_dict_items3[str(item_id)], user_id, score)
    except Exception as e:
        print e
        print item_id
        print user_id

100%|██████████| 6035/6035 [01:03<00:00, 95.30it/s] 

index 6040 is out of bounds for axis 0 with size 6040
0
6040





In [9]:
import pdb
prediction_dict = {}
for item_id, values in tqdm(predict_dict_items3.items()):
    score_list = []
    for index, rating in enumerate(values.tolist()):
        score_list.append({'rating': rating, 'user':index})
    prediction_dict3[item_id] = list(map(lambda x:x['user'],sorted(score_list, key=lambda x:x['rating'], reverse=True)))

100%|██████████| 3707/3707 [00:28<00:00, 130.91it/s]


In [10]:
test_dict3 = {}
import pdb
for item_id in tqdm(np.unique(test3.item_ids)):
    ratings_list = []
    for index, user_id in enumerate(test3.user_ids[test3.item_ids == item_id]):
        ratings_list.append({'user': user_id, 'rating': test3.ratings[test3.item_ids == item_id][index]})
    test_dict3[item_id] = list(map(lambda x:x['user'], sorted(ratings_list, key=lambda x: x['rating'], reverse=True)))


100%|██████████| 3487/3487 [00:18<00:00, 193.17it/s]


In [12]:
average_precisions = []
k = 6040
for items, users in test_dict3.items():
    average_precisions.append(average_precision(test_dict3[items],prediction_dict3[str(items)],k))
print("MAP@{} is {}".format(k,np.mean(average_precisions)))


MAP@6040 is 0.0371521970202
