In [1]:
import numpy as np

from lightfm.datasets import fetch_movielens

data = fetch_movielens(min_rating=5.0)
data


{'item_feature_labels': array([u'Toy Story (1995)', u'GoldenEye (1995)', u'Four Rooms (1995)', ...,
        u'Sliding Doors (1998)', u'You So Crazy (1994)',
        u'Scream of Stone (Schrei aus Stein) (1991)'], dtype=object),
 'item_features': <1682x1682 sparse matrix of type '<type 'numpy.float32'>'
 	with 1682 stored elements in Compressed Sparse Row format>,
 'item_labels': array([u'Toy Story (1995)', u'GoldenEye (1995)', u'Four Rooms (1995)', ...,
        u'Sliding Doors (1998)', u'You So Crazy (1994)',
        u'Scream of Stone (Schrei aus Stein) (1991)'], dtype=object),
 'test': <943x1682 sparse matrix of type '<type 'numpy.int32'>'
 	with 2153 stored elements in COOrdinate format>,
 'train': <943x1682 sparse matrix of type '<type 'numpy.int32'>'
 	with 19048 stored elements in COOrdinate format>}

In [2]:
print(repr(data['train']))
print(repr(data['test']))

<943x1682 sparse matrix of type '<type 'numpy.int32'>'
	with 19048 stored elements in COOrdinate format>
<943x1682 sparse matrix of type '<type 'numpy.int32'>'
	with 2153 stored elements in COOrdinate format>


In [3]:
from lightfm import LightFM

In [4]:
model = LightFM(loss='warp')
%time model.fit(data['train'], epochs=30, num_threads=2)


CPU times: user 1.11 s, sys: 0 ns, total: 1.11 s
Wall time: 1.57 s


<lightfm.lightfm.LightFM at 0x7f674c48b190>

In [5]:
from lightfm.evaluation import precision_at_k

In [6]:
print("Train precision: %.2f" % precision_at_k(model, data['train'], k=5).mean())
print("Test precision: %.2f" % precision_at_k(model, data['test'], k=5).mean())

Train precision: 0.39
Test precision: 0.06


In [7]:
def sample_recommendation(model, data, user_ids):
    

    n_users, n_items = data['train'].shape

    for user_id in user_ids:
        known_positives = data['item_labels'][data['train'].tocsr()[user_id].indices]
        
        scores = model.predict(user_id, np.arange(n_items))
        top_items = data['item_labels'][np.argsort(-scores)]
        
        print("User %s" % user_id)
        print("     Known positives:")
        
        for x in known_positives[:3]:
            print("        %s" % x)

        print("     Recommended:")
        
        for x in top_items[:3]:
            print("        %s" % x)
        
sample_recommendation(model, data, [3, 25, 450]) 

User 3
     Known positives:
        Contact (1997)
        Air Force One (1997)
        In & Out (1997)
     Recommended:
        L.A. Confidential (1997)
        G.I. Jane (1997)
        Mrs. Brown (Her Majesty, Mrs. Brown) (1997)
User 25
     Known positives:
        Fargo (1996)
        Godfather, The (1972)
        L.A. Confidential (1997)
     Recommended:
        L.A. Confidential (1997)
        Godfather, The (1972)
        Fargo (1996)
User 450
     Known positives:
        Event Horizon (1997)
        Scream (1996)
        Conspiracy Theory (1997)
     Recommended:
        Scream (1996)
        Game, The (1997)
        G.I. Jane (1997)


In [8]:
data['train']

<943x1682 sparse matrix of type '<type 'numpy.float32'>'
	with 19048 stored elements in COOrdinate format>