In [1]:
import sys
from pathlib import Path

PROJECT_ROOT = Path("..").resolve()
SRC_DIR = PROJECT_ROOT / "src"
if str(SRC_DIR) not in sys.path:
    sys.path.append(str(SRC_DIR))

from data_loader import load_movielens_ratings, load_movie_titles

df = load_movielens_ratings(PROJECT_ROOT / "data" / "rating.csv")
df.head()

Unnamed: 0,user_id,movie_id,rating
0,196,242,3.0
1,186,302,3.0
2,22,377,1.0
3,244,51,2.0
4,166,346,1.0


In [2]:
# Construct user-item matrix
user_item_matrix = df.pivot_table(
    index="user_id",
    columns="movie_id",
    values="rating"
).fillna(0.0)

user_item_matrix.shape, user_item_matrix.head()

((943, 1682),
 movie_id  1     2     3     4     5     6     7     8     9     10    ...  \
 user_id                                                               ...   
 1          5.0   3.0   4.0   3.0   3.0   5.0   4.0   1.0   5.0   3.0  ...   
 2          4.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   2.0  ...   
 3          0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  ...   
 4          0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  ...   
 5          4.0   3.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  ...   
 
 movie_id  1673  1674  1675  1676  1677  1678  1679  1680  1681  1682  
 user_id                                                               
 1          0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  
 2          0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  
 3          0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  
 4          0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  
 5    

In [3]:
import sys
from pathlib import Path

PROJECT_ROOT = Path("..").resolve()
SRC_DIR = PROJECT_ROOT / "src"
if str(SRC_DIR) not in sys.path:
    sys.path.append(str(SRC_DIR))

from recommender import KNNRecommender

In [4]:
model = KNNRecommender().fit(df)

In [5]:
example_user = df["user_id"].iloc[0]
recs = model.recommend(example_user, top_k=5)
recs

[50, 100, 181, 174, 127]

In [6]:
movie_titles = load_movie_titles(PROJECT_ROOT / "data" / "u.item")
movie_titles[movie_titles["movie_id"].isin(recs)]

Unnamed: 0,movie_id,title
49,50,Star Wars (1977)
99,100,Fargo (1996)
126,127,"Godfather, The (1972)"
173,174,Raiders of the Lost Ark (1981)
180,181,Return of the Jedi (1983)


## Evaluation

In [7]:
from evaluation import precision_at_k

In [8]:
user_id = df["user_id"].iloc[0] # The first user will be our exemple

Now, we are extracting his liked movies in order to hide one to make a recommendation. 

In [9]:
user_ratings = df[df["user_id"] == user_id]
liked_movies = user_ratings[user_ratings["rating"] >= 4]["movie_id"].tolist()
liked_movies[:10]

[393, 381, 655, 67, 306, 238, 663, 111, 25, 286]

In [10]:
import random

test_movie = random.choice(liked_movies)
test_movie

238

In [11]:
recs = model.recommend(user_id, top_k=5)
recs

[50, 100, 181, 174, 127]

Let's see the precision - Precision@5

In [12]:
precision_at_k(recommended=recs, relevant=[test_movie], k=5)

0.0

Given the simplicity of the model (pure user-based KNN), the Precision@5 metric is expected to be low.
The goal here is not high accuracy but demonstrating the ability to build, train and evaluate a recommender system from scratch.
The following will at least provide a positive number.

In [13]:
from evaluation import global_precision_at_k

In [14]:
global_precision_at_k(model, df, k=2000, num_users=5000)

0.0005

## Second model

In [22]:
from recommender_svd import SVDRecommender

svd_model = SVDRecommender(n_components=20).fit(df)

example_user = df["user_id"].iloc[0]
svd_model.recommend(example_user, top_k=10)

[216, 100, 208, 88, 732, 14, 204, 275, 211, 137]

In [23]:
global_precision_at_k(svd_model, df, k=10, num_users=300)

0.0