In [11]:
!pip install LightFM



In [12]:
from lightfm import LightFM
from lightfm.datasets import fetch_movielens
from lightfm.evaluation import precision_at_k, auc_score

data = fetch_movielens(genre_features=True)
train = data['train']
test = data['test']

model = LightFM(learning_rate=0.05,loss="warp")
model.fit(train, epochs=30, num_threads=2)

train_precision = precision_at_k(model, train, k=10).mean()
test_precision = precision_at_k(model, test, k=10).mean()
train_auc = auc_score(model, train).mean()
test_auc = auc_score(model, test).mean()

print('Precision: train %.2f, test %.2f.' % (train_precision, test_precision))
print('AUC: train %.2f, test %.2f.' % (train_auc, test_auc))

Precision: train 0.64, test 0.11.
AUC: train 0.95, test 0.91.


In [13]:
train

<943x1682 sparse matrix of type '<class 'numpy.float32'>'
	with 90570 stored elements in COOrdinate format>

In [14]:
import pandas as pd
score = model.predict(1, [47,50, 223, 235,349]) #takes in user and item id, optionally other features such as item and user features.
ratings = pd.read_csv("/content/ratings.csv")
score

array([-2.6604555, -3.9915073, -2.2457302, -2.0042567, -2.9303498],
      dtype=float32)

In [15]:
sample = pd.DataFrame({
    "userId" : 1, "itemId" : [47, 50,223,235,349]
})
sample["scores"] = score

In [16]:
sample.sort_values(by='scores' ,ascending=False)

Unnamed: 0,userId,itemId,scores
3,1,235,-2.004257
2,1,223,-2.24573
0,1,47,-2.660455
4,1,349,-2.93035
1,1,50,-3.991507


In [17]:
movies = pd.read_csv("/content/movies.csv")
recs = [movies["title"].loc[movies["movieId"] == i] for i in sample["itemId"]]
def get_recs(result):
  hold = []
  for i in range(len(result)):
    hold.append(result[i].tolist()[0].strip())
  return hold

In [18]:
get_recs(recs)

['Seven (a.k.a. Se7en) (1995)',
 'Usual Suspects, The (1995)',
 'Clerks (1994)',
 'Ed Wood (1994)',
 'Clear and Present Danger (1994)']

In [33]:
movie_labels = data["item_feature_labels"]
movie_titles= pd.DataFrame(data=movies["movieId"], columns=["movieId", "title"])
movie_titles["title"] = movies["title"].copy()
movie_titles["title"] = movie_titles["title"].str.lower()

In [34]:
movie_titles

Unnamed: 0,movieId,title
0,1,toy story (1995)
1,2,jumanji (1995)
2,3,grumpier old men (1995)
3,4,waiting to exhale (1995)
4,5,father of the bride part ii (1995)
...,...,...
9737,193581,black butler: book of the atlantic (2017)
9738,193583,no game no life: zero (2017)
9739,193585,flint (2017)
9740,193587,bungo stray dogs: dead apple (2018)


In [35]:
import numpy as np
def get_similar_movies(model, movieId):
  movie_embed = (model.item_embeddings.T
                 / np.linalg.norm(model.item_embeddings, axis=1)).T

  query_embed = movie_embed[movieId]
  similarity = np.dot(movie_embed, query_embed)
  most_similar = np.argsort(-similarity)[1:6] #arranges them by index to be fed into movie[i]

  return most_similar

In [60]:
def get_movies(model, movie_name, movie_titles):
  movieId = movie_titles["movieId"].loc[movie_titles["title"].str.contains(str(movie_name).lower())].tolist()
  movieId = movieId[0]
  recs = [movie_titles["title"].loc[movie_titles["movieId"] == movie_ids] for movie_ids in get_similar_movies(model, movieId)]
  movies = []
  for i in range(len(recs)):
    movies.append(recs[i].tolist()[0])
  return movies

In [65]:
get_movies(model, "Jumanji", movie_titles)

IndexError: index 5349 is out of bounds for axis 0 with size 1682