In [1]:
from lightfm.datasets import fetch_movielens

data = fetch_movielens(min_rating=4.0)
data



{'train': <943x1682 sparse matrix of type '<class 'numpy.int32'>'
 	with 49906 stored elements in COOrdinate format>,
 'test': <943x1682 sparse matrix of type '<class 'numpy.int32'>'
 	with 5469 stored elements in COOrdinate format>,
 'item_features': <1682x1682 sparse matrix of type '<class 'numpy.float32'>'
 	with 1682 stored elements in Compressed Sparse Row format>,
 'item_feature_labels': array(['Toy Story (1995)', 'GoldenEye (1995)', 'Four Rooms (1995)', ...,
        'Sliding Doors (1998)', 'You So Crazy (1994)',
        'Scream of Stone (Schrei aus Stein) (1991)'], dtype=object),
 'item_labels': array(['Toy Story (1995)', 'GoldenEye (1995)', 'Four Rooms (1995)', ...,
        'Sliding Doors (1998)', 'You So Crazy (1994)',
        'Scream of Stone (Schrei aus Stein) (1991)'], dtype=object)}

In [2]:
type(data)

dict

In [3]:
for key, value in data.items():
    print(key, type(key), value.shape)

train <class 'str'> (943, 1682)
test <class 'str'> (943, 1682)
item_features <class 'str'> (1682, 1682)
item_feature_labels <class 'str'> (1682,)
item_labels <class 'str'> (1682,)


In [4]:
m1 = data['train'].tocsr() # Compressed Sparse Row
print(m1[0, 0])
print(m1[0, 1])
# Rows are users, columns are items

5
0


In [5]:
from lightfm import LightFM

train = data['train']
test = data['test']

model = LightFM(loss='warp')
model.fit(train, epochs=10)

<lightfm.lightfm.LightFM at 0x174ea90c2c8>

In [6]:
from lightfm.evaluation import precision_at_k
from lightfm.evaluation import auc_score

train_precision = precision_at_k(model, train, k=10).mean()
test_precision = precision_at_k(model, test, k=10).mean()

train_auc = auc_score(model, train).mean()
test_auc = auc_score(model, test).mean()

print('Precision: train %.2f, test %.2f.' % (train_precision, test_precision))
print('AUC: train %.2f, test %.2f.' % (train_auc, test_auc))

Precision: train 0.48, test 0.08.
AUC: train 0.94, test 0.91.


In [7]:
import numpy as np

def recommend(model, data, users):
    n_users, n_items = data['train'].shape
    for user_id in users:
        known_positives = data['item_labels'][data['train'].tocsr()[user_id].indices]
        scores = model.predict(user_id, np.arange(n_items))
        top_items = data['item_labels'][np.argsort(-scores)]
        print("User %s" % user_id)
        print("Known positives:")
        for x in known_positives[:3]:
            print("%s" % x)        
        print("Recommended:")
        for x in top_items[:3]:
            print("%s" % x)
        print("\n")
        
recommend(model, data, [6, 125, 336])

User 6
Known positives:
Get Shorty (1995)
Twelve Monkeys (1995)
Babe (1995)
Recommended:
Raiders of the Lost Ark (1981)
Silence of the Lambs, The (1991)
Empire Strikes Back, The (1980)


User 125
Known positives:
Jungle2Jungle (1997)
Kull the Conqueror (1997)
Scream (1996)
Recommended:
Air Force One (1997)
L.A. Confidential (1997)
Titanic (1997)


User 336
Known positives:
Mr. Holland's Opus (1995)
Star Wars (1977)
Ace Ventura: Pet Detective (1994)
Recommended:
Return of the Jedi (1983)
Independence Day (ID4) (1996)
Star Wars (1977)


