In [1]:
import pandas as pd
import torch
import yaml
from torch import cosine_similarity

from model.encoder import Encoder
from model.recommender import DeepFM
from utils.data import get_feature_sizes

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
movies = pd.read_csv("data/ml-20m/movies.csv", header=0, names=["movie_id", "movie_title", "genres"])

movies = movies[["movie_id", "movie_title"]]

ratings = pd.read_csv("data/ml-20m/ratings.csv", header=0, names=["user_id", "movie_id", "rating", "timestamp"])
ratings = ratings[["user_id", "movie_id", "rating"]]

requests = pd.read_csv('data/ml-20m/requests.csv')
requests = requests.groupby("movie_id").agg({
    "movie_title": "first",
    "request": list,
}).reset_index()
requests.set_index("movie_id", inplace=True, drop=False)

In [3]:
args = yaml.safe_load(open("configs/collaborative.yaml", "r"))
args["recommender"]["weights"] = "weights/collaborative/deepfm.pt"
args["encoder"]["weights"] = "weights/collaborative/encoder.pt"

device = "cuda" if torch.cuda.is_available() else "cpu"

recommender = DeepFM(feature_dims=get_feature_sizes(ratings), **args["recommender"]).to(device)

movie_embeddings = recommender.embedding.embedding.weight[recommender.embedding.offsets[1]:].cpu()

encoder = Encoder(**args["encoder"]).to(device)

In [5]:
user_id = 36
unique_movie_ids = torch.tensor([movie_id for movie_id in requests["movie_id"].unique()])
uniquer_user_id = torch.full_like(unique_movie_ids, user_id)

In [6]:
ratings[ratings["user_id"] == user_id].merge(movies, on="movie_id").sort_values("rating", ascending=False)

Unnamed: 0,user_id,movie_id,rating,movie_title
57,36,2612,5.0,Live and Let Die (1973)
47,36,2076,5.0,Romancing the Stone (1984)
25,36,1082,5.0,"Bridge on the River Kwai, The (1957)"
55,36,2570,5.0,"Dirty Dozen, The (1967)"
27,36,1097,5.0,Groundhog Day (1993)
...,...,...,...,...
49,36,2342,3.0,"South Park: Bigger, Longer and Uncut (1999)"
71,36,3502,2.0,"O Brother, Where Art Thou? (2000)"
62,36,2988,2.0,Grumpy Old Men (1993)
67,36,3193,1.0,Prizzi's Honor (1985)


In [7]:
request = "I want to watch a popular romantic comedy"

request_embedding = encoder(request).cpu()

similarities = cosine_similarity(request_embedding, movie_embeddings)

_, indices = torch.topk(similarities, k=10, largest=True)

movies.iloc[indices]

Unnamed: 0,movie_id,movie_title
552,552,Pretty Woman (1990)
334,334,Four Weddings and a Funeral (1994)
215,215,French Kiss (1995)
3494,3494,What Women Want (2000)
2366,2366,Runaway Bride (1999)
790,790,It Happened One Night (1934)
1138,1138,When Harry Met Sally... (1989)
2318,2318,Notting Hill (1999)
6023,6023,50 First Dates (2004)
267,267,Only You (1994)


In [8]:
rec_features = torch.stack([uniquer_user_id, unique_movie_ids], dim=-1).to(device)

user_ratings = recommender(rec_features).cpu()

rankings = user_ratings * similarities

_, indices = torch.topk(rankings, k=10, largest=True)

movies.iloc[indices]

Unnamed: 0,movie_id,movie_title
790,790,It Happened One Night (1934)
334,334,Four Weddings and a Funeral (1994)
552,552,Pretty Woman (1990)
5742,5742,Love Actually (2003)
464,464,Much Ado About Nothing (1993)
1930,1930,Say Anything... (1989)
54,54,"Postman, The (Postino, Il) (1994)"
1138,1138,When Harry Met Sally... (1989)
215,215,French Kiss (1995)
1079,1079,"Graduate, The (1967)"
