In [2]:
import numpy as np #computing
from lightfm.datasets import fetch_movielens #dataset
from lightfm import LightFM # model
from lightfm.evaluation import precision_at_k #predictions
from lightfm.evaluation import auc_score  #predictions

In [3]:
# fetch dataset
data = fetch_movielens(min_rating =4.0)
print(data)

{'train': <943x1682 sparse matrix of type '<class 'numpy.int32'>'
	with 49906 stored elements in COOrdinate format>, 'test': <943x1682 sparse matrix of type '<class 'numpy.int32'>'
	with 5469 stored elements in COOrdinate format>, 'item_features': <1682x1682 sparse matrix of type '<class 'numpy.float32'>'
	with 1682 stored elements in Compressed Sparse Row format>, 'item_feature_labels': array(['Toy Story (1995)', 'GoldenEye (1995)', 'Four Rooms (1995)', ...,
       'Sliding Doors (1998)', 'You So Crazy (1994)',
       'Scream of Stone (Schrei aus Stein) (1991)'], dtype=object), 'item_labels': array(['Toy Story (1995)', 'GoldenEye (1995)', 'Four Rooms (1995)', ...,
       'Sliding Doors (1998)', 'You So Crazy (1994)',
       'Scream of Stone (Schrei aus Stein) (1991)'], dtype=object)}


In [4]:
print(repr(data['train']))

<943x1682 sparse matrix of type '<class 'numpy.int32'>'
	with 49906 stored elements in COOrdinate format>


In [5]:
print(repr(data['test']))

<943x1682 sparse matrix of type '<class 'numpy.int32'>'
	with 5469 stored elements in COOrdinate format>


### The Weighted Approximate-Rank Pairwise model
 - or WARP is an implicit feedback model: all interactions in the training matrix are treated as positive signals, and products that users did not interact with they implicitly do not like. The goal of the model is to score these implicit positives highly while assigining low scores to implicit negatives.
 - For every pass through the data — an epoch — the model learns to fit the data more and more closely. The model fit method will be executed on 2 threads.

In [6]:
#create model
model = LightFM(loss='warp') # loss measures the difference between our models prediciton and the desired outcome

#train model
%time model.fit(data['train'], epochs= 30, num_threads=2)


Wall time: 4.94 s


<lightfm.lightfm.LightFM at 0x1dfa28c50b8>

### Measuring precision in 2 ways

 #1. Pecision@K method which measures the proportion of positive items among the K highest-ranked items. Meaning that it doesn't matter how good or bad the rest of the ranking is as long as the first K items are mostly positive. This would be an appropriate metric when showing the users the very top of the list.

In [7]:
#measure the precision in both train and test in 2 ways
print("Using precision@K:")
print("Train precision: %.2f" % precision_at_k(model, data['train'], k=5).mean())
print("Test precision: %.2f" % precision_at_k(model, data['test'], k=5).mean())

Using precision@K:
Train precision: 0.56
Test precision: 0.09


#2. AUC method which measures the quality of the overall ranking. In the binary case, it can be interpreted as the probability that a randomly chosen positive item is ranked higher than a randomly chosen negative item.An AUC close to 1.0 will suggest that the  ordering is correct: and this can be true even if none of the first K items are positives. This is more appropriate when high quality throughout is required.

In [8]:
print("Using AUC score:")
print("Train precision: %.2f" % auc_score(model, data['train']).mean())
print("Test precision: %.2f" % auc_score(model, data['test']).mean())

Using AUC score:
Train precision: 0.96
Test precision: 0.92


Result interpretation: The model fits the train set better than the test set, as it should be expected.

In [9]:
print(data['test'].shape)

(943, 1682)


  To make predictions for a given user, pass the id of that user and the ids of all products for which predictions are requested into the predict method of the model.
  Note :  # lightfm considers ratings that are  == 5 positive and <=4 negative, to make it binary

In [10]:
def get_recommendation(model, data, user_ids):
    
    #number of items and number of movies in training data
    n_users, n_items = data['test'].shape
    
    # generate recommendation for each user
    for user_id in user_ids:
        
        # movies the users already like        
        # get the list of positive ratings from data in compressed sparsed row format
        # duplicate entries in the matrix will be sparsed together by using tocsr() from scipy
        known_positives = data['item_labels'][data['train'].tocsr()[user_id].indices]
    
        #movies the model predicts they like
        scores = model.predict(user_id, np.arange(n_items))
        
        #rank them from most liked to least
        top_items = data['item_labels'][np.argsort(-scores)]
        
        #check results
        print("User %s" % user_id)
        print("        Known positives:")
        
        for x in known_positives[:5]:
            print("           %s" % x)
        
        print("        Recommended:")
        
        for x in top_items[:5]:
            print("           %s" % x)

To get recommendations for given user(s), pass the id of those users, the fitted model, and the data set to the get_recommendation method.

In [11]:

get_recommendation(model, data, [145,23,45])


User 145
        Known positives:
           Devil's Own, The (1997)
           Contact (1997)
           Full Monty, The (1997)
           Good Will Hunting (1997)
           Wings of the Dove, The (1997)
        Recommended:
           English Patient, The (1996)
           L.A. Confidential (1997)
           Full Monty, The (1997)
           Boogie Nights (1997)
           Good Will Hunting (1997)
User 23
        Known positives:
           Twelve Monkeys (1995)
           Babe (1995)
           Dead Man Walking (1995)
           Seven (Se7en) (1995)
           Usual Suspects, The (1995)
        Recommended:
           Star Wars (1977)
           Return of the Jedi (1983)
           Fargo (1996)
           Godfather, The (1972)
           Twelve Monkeys (1995)
User 45
        Known positives:
           Twelve Monkeys (1995)
           Star Wars (1977)
           Welcome to the Dollhouse (1995)
           Fargo (1996)
           Phenomenon (1996)
        Recommended:
           Engl