In [None]:
!pip install lightfm

In [None]:
import numpy as np
from lightfm.datasets import fetch_movielens
from lightfm import LightFM

In [None]:
# load data from movielens dataset with a rating higher than 4.0
data = fetch_movielens(min_rating=4.0)

In [None]:
print(repr(data["train"]))

In [None]:
# create a model
model = LightFM(loss="warp")
# train the model
model.fit(data["train"], epochs=30, num_threads=2)

In [None]:
def sample_recommendation(model, data, user_ids):

    #number of users and movies in training data
    n_users, n_items = data['train'].shape

    #generate recommendations for each user we input
    for user_id in user_ids:

        #movies they already like
        # CSR --> https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_row_(CSR,_CRS_or_Yale_format)
        known_positives = data['item_labels'][data['train'].tocsr()[user_id].indices]

        #movies our model predicts they will like
        scores = model.predict(user_id, np.arange(n_items))
        #rank them in order of most liked to least
        top_items = data['item_labels'][np.argsort(-scores)]

        #print out the results
        print("User %s" % user_id)
        print("     Known positives:")

        for x in known_positives[:3]:
            print("        %s" % x)

        print("     Recommended:")

        for x in top_items[:3]:
            print("        %s" % x)
            

In [None]:
sample_recommendation(model, data, [3, 5, 24])

# Challenge
### new dataset with comparison between three different models

In [None]:
import pandas as pd

In [None]:
# Jester Online Joke Recommender System
jokes = [i for i in range(100)]
names = ["#Jokes"] + jokes
df = pd.read_excel("jester.xls",header=None, names=names)

In [None]:
# replace all negative values with -1
df[ df < 0] = -1
# replace all NaN=99.0 values with 0
df[ df == 99.0] = 0
# replace all positive values with +1
df[ df > 0] = +1

In [None]:
# visualize first 5 users raitings
df.head()

In [None]:
from scipy.sparse import csr_matrix
train = csr_matrix(df.drop(labels="#Jokes",axis=1)[:500])

In [None]:
train

In [None]:
train_set = pd.SparseDataFrame(train)

In [None]:
train_set = train_set.to_coo()

In [None]:
train_set

In [None]:
models = {"Wrap": LightFM(loss="warp"), "Logistic": LightFM(loss="logistic"), "Bpr": LightFM(loss="bpr")}

In [None]:
for model_name in models.keys():
    models[model_name].fit(train_set, epochs=30, num_threads=2)

In [None]:
# for user x the top jokes will be
x = 300
scores = {}
for model_name in models.keys():
    scores[model_name] = models[model_name].predict([x], np.arange(100))
    top3 = np.argsort(-scores[model_name])[:3]
    print("Model: {:10s} Top-3 Jokes:{}".format(model_name,top3))