In [66]:
!pip install LightFM



In [67]:
from lightfm import LightFM
from lightfm.datasets import fetch_movielens
from lightfm.evaluation import precision_at_k, auc_score

data = fetch_movielens(genre_features=True)
train = data['train']
test = data['test']

model = LightFM(learning_rate=0.05,loss="warp")
model.fit(train, epochs=30, num_threads=2)

train_precision = precision_at_k(model, train, k=10).mean()
test_precision = precision_at_k(model, test, k=10).mean()
train_auc = auc_score(model, train).mean()
test_auc = auc_score(model, test).mean()

print('Precision: train %.2f, test %.2f.' % (train_precision, test_precision))
print('AUC: train %.2f, test %.2f.' % (train_auc, test_auc))

Precision: train 0.64, test 0.11.
AUC: train 0.95, test 0.91.


In [68]:
import pandas as pd
score = model.predict(1, [47,50, 223, 235,349]) #takes in user and item id, optionally other features such as item and user features.
ratings = pd.read_csv("/content/ratings.csv")
score

array([-2.4813833, -3.4487445, -1.7370099, -1.4486222, -2.4142652],
      dtype=float32)

In [69]:
sample = pd.DataFrame({
    "userId" : 1, "itemId" : [47, 50,223,235,349]
})
sample["scores"] = score

In [70]:
sample.sort_values(by='scores' ,ascending=False)

Unnamed: 0,userId,itemId,scores
3,1,235,-1.448622
2,1,223,-1.73701
4,1,349,-2.414265
0,1,47,-2.481383
1,1,50,-3.448745


In [71]:
movies = pd.read_csv("/content/movies.csv")
def get_recs(df):
  recs = [movies["title"].loc[movies["movieId"] == i] for i in sample["itemId"]]
  hold = []
  for i in range(len(recs)):
    hold.append(recs[i].tolist()[0].strip())
  return hold

In [72]:
get_recs(movies)

['Seven (a.k.a. Se7en) (1995)',
 'Usual Suspects, The (1995)',
 'Clerks (1994)',
 'Ed Wood (1994)',
 'Clear and Present Danger (1994)']

In [73]:
movie_labels = data["item_feature_labels"]
movie_titles = movies['title'].copy()

In [74]:
movie_titles

Unnamed: 0,title
0,Toy Story (1995)
1,Jumanji (1995)
2,Grumpier Old Men (1995)
3,Waiting to Exhale (1995)
4,Father of the Bride Part II (1995)
...,...
9737,Black Butler: Book of the Atlantic (2017)
9738,No Game No Life: Zero (2017)
9739,Flint (2017)
9740,Bungo Stray Dogs: Dead Apple (2018)


In [84]:
import numpy as np
def get_similar_movies(model, movieId):
  movie_embed = (model.item_embeddings.T
                 / np.linalg.norm(model.item_embeddings, axis=1)).T

  query_embed = movie_embed[movieId]
  similarity = np.dot(movie_embed, query_embed)
  most_similar = np.argsort(-similarity)[1:6]

  return most_similar

movie = "Jumanji"
movieId = movie_titles.index[movie_titles.str.contains(movie)].tolist()
movieId = movieId[0]
print(f"Most similar movies for {movie_titles[movieId]}:\n{movie_titles[get_similar_movies(model, movieId)]}")

Most similar movies for Jumanji (1995):
577    Truth About Cats & Dogs, The (1996)
230                     Major Payne (1995)
232                     Love Affair (1994)
549                     Dragonheart (1996)
567                            Solo (1996)
Name: title, dtype: object
