In [175]:
import pandas as pd
import numpy as np

from scipy.sparse import coo_matrix
from scipy.sparse import csr_matrix

from lightfm import LightFM
from lightfm.cross_validation import random_train_test_split
from lightfm.evaluation import precision_at_k, auc_score
from sklearn.preprocessing import MultiLabelBinarizer

ratings_df = pd.read_csv("ratings.csv").drop(columns=["timestamp"])
movies_df = pd.read_csv("movies.csv")

print(ratings_df)

        userId  movieId  rating
0            1        1     4.0
1            1        3     4.0
2            1        6     4.0
3            1       47     5.0
4            1       50     5.0
...        ...      ...     ...
100831     610   166534     4.0
100832     610   168248     5.0
100833     610   168250     5.0
100834     610   168252     5.0
100835     610   170875     3.0

[100836 rows x 3 columns]


In [176]:
unique_users = sorted(ratings_df['userId'].unique())
unique_movies = sorted(ratings_df['movieId'].unique())

user_id_map = {old_id: new_id for new_id, old_id in enumerate(unique_users)}
movie_id_map = {old_id: new_id for new_id, old_id in enumerate(unique_movies)}

ratings_df['user_idx'] = ratings_df['userId'].map(user_id_map)
ratings_df['movie_idx'] = ratings_df['movieId'].map(movie_id_map)

n_users = len(unique_users)
n_movies = len(unique_movies)

print(n_users, n_movies)

610 9724


In [177]:
interactions = coo_matrix(
    (ratings_df['rating'] >= 2, (ratings_df['user_idx'], ratings_df['movie_idx'])), shape=(n_users, n_movies))

train_interactions, test_interactions = random_train_test_split(interactions, 0.2, 42)

In [178]:
movies_df['genre_list'] = movies_df['genres'].str.split('|')

mlb = MultiLabelBinarizer()
genre_matrix = mlb.fit_transform(movies_df['genre_list'])

item_features = csr_matrix(genre_matrix)
item_feature_names = mlb.classes_
print(item_features)

item_features = csr_matrix(item_features)

<Compressed Sparse Row sparse matrix of dtype 'int64'
	with 22084 stored elements and shape (9742, 20)>
  Coords	Values
  (0, 2)	1
  (0, 3)	1
  (0, 4)	1
  (0, 5)	1
  (0, 9)	1
  (1, 2)	1
  (1, 4)	1
  (1, 9)	1
  (2, 5)	1
  (2, 15)	1
  (3, 5)	1
  (3, 8)	1
  (3, 15)	1
  (4, 5)	1
  (5, 1)	1
  (5, 6)	1
  (5, 17)	1
  (6, 5)	1
  (6, 15)	1
  (7, 2)	1
  (7, 4)	1
  (8, 1)	1
  (9, 1)	1
  (9, 2)	1
  (9, 17)	1
  :	:
  (9731, 1)	1
  (9731, 2)	1
  (9731, 5)	1
  (9731, 16)	1
  (9732, 1)	1
  (9732, 3)	1
  (9732, 5)	1
  (9732, 16)	1
  (9733, 3)	1
  (9733, 8)	1
  (9734, 5)	1
  (9734, 8)	1
  (9735, 3)	1
  (9736, 7)	1
  (9737, 1)	1
  (9737, 3)	1
  (9737, 5)	1
  (9737, 9)	1
  (9738, 3)	1
  (9738, 5)	1
  (9738, 9)	1
  (9739, 8)	1
  (9740, 1)	1
  (9740, 3)	1
  (9741, 5)	1


In [179]:
model_collab = LightFM(loss='warp', no_components=30, random_state=42)
model_collab.fit(train_interactions, epochs=30, num_threads=4)

model_hybrid = LightFM(loss='warp', no_components=30, random_state=42)
model_hybrid.fit(train_interactions, item_features=item_features, epochs=50, num_threads=5)

<lightfm.lightfm.LightFM at 0x15d95f9d0>

In [180]:
auc_collab = auc_score(model_collab, test_interactions, train_interactions=train_interactions).mean()
auc_hybrid = auc_score(model_hybrid, test_interactions, train_interactions=train_interactions, item_features=item_features).mean()

train_prec = precision_at_k(model_hybrid, test_interactions, train_interactions=train_interactions, k=10, item_features=item_features).mean()

print(f"Collab AUC: {auc_collab.round(4)}")
print(f"Hybrid AUC: {auc_hybrid.round(4)}")
print(f"Train Precision: {train_prec.round(4)}")

Collab AUC: 0.9283000230789185
Hybrid AUC: 0.5619000196456909
Train Precision: 0.007899999618530273


In [181]:
user_id = 7
n_recommendations = 10

scores = model.predict(user_id, np.arange(n_movies), item_features=item_features)

top_items = np.argsort(-scores)[:n_recommendations]
recommended_titles = movies_df[movies_df['movieId'].isin(top_items)]['title'].values

print("Movies recommendations:\n")
for title in recommended_titles:
    print(f" - {title}")

Movies recommendations:

 - What's Eating Gilbert Grape (1993)
 - Thinner (1996)
 - Outlaw Josey Wales, The (1976)
 - Pee-wee's Big Adventure (1985)
 - T-Rex: Back to the Cretaceous (1998)
 - Changeling, The (1980)
 - Halloween: Resurrection (Halloween 8) (2002)
