# LightFM

* Hybrid model (content based and collaborative filtering)

## Installation
`conda install -c conda-forge lightfm`

In [14]:
from lightfm import LightFM
import numpy as np
from lightfm.datasets import fetch_movielens

print("Libraries imported")

Libraries imported


In [71]:
# Fetch LightFM dataset movielens
data = fetch_movielens(min_rating=4.0)

for i in data:
    print(i)

train
test
item_features
item_feature_labels
item_labels


In [106]:
# Example of item_feature_labels
count = 0;
for i in data['item_feature_labels']:
    if(count <5):
        print(f'{count+1}: {i}')
        count+=1

1: Toy Story (1995)
2: GoldenEye (1995)
3: Four Rooms (1995)
4: Get Shorty (1995)
5: Copycat (1995)


In [114]:
print('TRAIN: ', repr(data['train']), '\n\nTEST: ', repr(data['test']),
     '\n\nTYPE: ', type(data['train']))

TRAIN:  <943x1682 sparse matrix of type '<class 'numpy.int32'>'
	with 49906 stored elements in COOrdinate format> 

TEST:  <943x1682 sparse matrix of type '<class 'numpy.int32'>'
	with 5469 stored elements in COOrdinate format> 

TYPE:  <class 'scipy.sparse.coo.coo_matrix'>


In [209]:
train = data['train']
test = data['test']

## Traditional collaborative filtering model 

In [221]:
NUM_THREADS = 2
EPOCHS = 50

# Create model
model = LightFM(loss='warp')

# Start training
%time model = model.fit(train, epochs=EPOCHS, num_threads=NUM_THREADS)

Wall time: 6.1 s


In [222]:
# Import the evaluation routines
from lightfm.evaluation import auc_score

# Compute and print the AUC score, train set
train_auc = auc_score(model, train, num_threads=NUM_THREADS).mean()
print('Collaborative filtering train AUC: %s' % train_auc)

# Compute and print the AUC score, test set
test_auc = auc_score(model, test, train_interactions=train, num_threads=NUM_THREADS).mean()
print('Collaborative filtering test AUC: %s' % test_auc)

Collaborative filtering train AUC: 0.9610077
Collaborative filtering test AUC: 0.9338917


## Hybrid model

In [224]:
# Item features
item_features = data['item_features']

# Define model 
hybrid_model = LightFM(loss='warp')

# Fit the hybrid model. Note that this time, we pass
# in the item features matrix.
hybrid_model = hybrid_model.fit(train,
                item_features=item_features,
                epochs=EPOCHS,
                num_threads=NUM_THREADS)

In [226]:
train_auc = auc_score(hybrid_model,
                      train,
                      item_features=item_features,
                      num_threads=NUM_THREADS).mean()
print('Hybrid training set AUC: %s' % train_auc)

Hybrid training set AUC: 0.96085256


In [227]:
test_auc = auc_score(hybrid_model,
                    test,
                    train_interactions=train,
                    item_features=item_features,
                    num_threads=NUM_THREADS).mean()
print('Hybrid test set AUC: %s' % test_auc)

Hybrid test set AUC: 0.9325168


## User recommendation

In [202]:
# Users, items
n_users, n_items = data['train'].shape

In [203]:
liked_movies = data['item_labels'][data['train'].tocsr()[351].indices]

In [204]:
scores = model.predict(351, np.arange(n_items))
scores

array([-0.21502215, -1.68964744, -1.68243408, ..., -4.63933802,
       -4.71670341, -4.63237476])

In [205]:
top_items = data['item_labels'][np.argsort(-scores)]
for i in liked_movies[:5]:
    print(f'Previous liked movies: {i}')
    
print('\n')
    
for x in top_items[:5]:
    print(f'Recommended movies: {x}')

Previous liked movies: Usual Suspects, The (1995)
Previous liked movies: Strange Days (1995)
Previous liked movies: Star Wars (1977)
Previous liked movies: Pulp Fiction (1994)
Previous liked movies: Fugitive, The (1993)


Recommended movies: Pulp Fiction (1994)
Recommended movies: Silence of the Lambs, The (1991)
Recommended movies: Star Wars (1977)
Recommended movies: Raiders of the Lost Ark (1981)
Recommended movies: Alien (1979)
