# LightFM

* Hybrid model (content based and collaborative filtering)

## Installation
`conda install -c conda-forge lightfm`

In [246]:
from lightfm import LightFM
import numpy as np
from lightfm.datasets import fetch_movielens

print("Libraries imported")

Libraries imported


In [247]:
# Fetch LightFM dataset movielens
data = fetch_movielens(min_rating=4.0)

for i in data:
    print(i)

train
test
item_features
item_feature_labels
item_labels


In [248]:
# Example of item_feature_labels
count = 0;
for i in data['item_feature_labels']:
    if(count <5):
        print(f'{count+1}: {i}')
        count+=1

1: Toy Story (1995)
2: GoldenEye (1995)
3: Four Rooms (1995)
4: Get Shorty (1995)
5: Copycat (1995)


In [249]:
print('TRAIN: ', repr(data['train']), '\n\nTEST: ', repr(data['test']),
     '\n\nTYPE: ', type(data['train']))

TRAIN:  <943x1682 sparse matrix of type '<class 'numpy.int32'>'
	with 49906 stored elements in COOrdinate format> 

TEST:  <943x1682 sparse matrix of type '<class 'numpy.int32'>'
	with 5469 stored elements in COOrdinate format> 

TYPE:  <class 'scipy.sparse.coo.coo_matrix'>


In [250]:
train = data['train']
test = data['test']

## Traditional collaborative filtering model 

In [251]:
NUM_THREADS = 2
EPOCHS = 50

# Create model
model = LightFM(loss='warp')

# Start training
%time model = model.fit(train, epochs=EPOCHS, num_threads=NUM_THREADS)

Wall time: 5.72 s


In [252]:
# Import the evaluation routines
from lightfm.evaluation import auc_score

# Compute and print the AUC score, train set
train_auc = auc_score(model, train, num_threads=NUM_THREADS).mean()
print('Collaborative filtering train AUC: %s' % train_auc)

# Compute and print the AUC score, test set
test_auc = auc_score(model, test, train_interactions=train, num_threads=NUM_THREADS).mean()
print('Collaborative filtering test AUC: %s' % test_auc)

Collaborative filtering train AUC: 0.9615401
Collaborative filtering test AUC: 0.9325669


## Hybrid model

In [253]:
# Item features
item_features = data['item_features']

# Define model 
hybrid_model = LightFM(loss='warp')

# Fit the hybrid model. Note that this time, we pass
# in the item features matrix.
hybrid_model = hybrid_model.fit(train,
                item_features=item_features,
                epochs=EPOCHS,
                num_threads=NUM_THREADS)

In [254]:
train_auc = auc_score(hybrid_model,
                      train,
                      item_features=item_features,
                      num_threads=NUM_THREADS).mean()
print('Hybrid training set AUC: %s' % train_auc)

Hybrid training set AUC: 0.9609723


In [255]:
test_auc = auc_score(hybrid_model,
                    test,
                    train_interactions=train,
                    item_features=item_features,
                    num_threads=NUM_THREADS).mean()
print('Hybrid test set AUC: %s' % test_auc)

Hybrid test set AUC: 0.93419915


## User recommendation

In [263]:
# Users, items
n_users, n_items = data['train'].shape

In [264]:
# User 351
liked_movies = data['item_labels'][data['train'].tocsr()[200].indices]

In [273]:
# User 351
scores = hybrid_model.predict(200, np.arange(n_items))
scores

array([-5.24551296, -6.95766449, -6.47659969, ..., -8.8270359 ,
       -8.99055672, -8.73103142])

In [274]:
top_items = data['item_labels'][np.argsort(-scores)]
for i in liked_movies[:5]:
    print(f'Previous liked movies: {i}') 

print('\n') 

for x in top_items[:5]:
    print(f'Recommended movies: {x}')

Previous liked movies: Get Shorty (1995)
Previous liked movies: Seven (Se7en) (1995)
Previous liked movies: Usual Suspects, The (1995)
Previous liked movies: Taxi Driver (1976)
Previous liked movies: Brothers McMullen, The (1995)


Recommended movies: Fargo (1996)
Recommended movies: Pulp Fiction (1994)
Recommended movies: Godfather, The (1972)
Recommended movies: Usual Suspects, The (1995)
Recommended movies: GoodFellas (1990)
