In [8]:
import pandas as pd
import torch
import yaml
from torch import cosine_similarity

from model.encoder import Encoder
from model.recommender import DeepFM
from utils.data import get_feature_sizes
from utils.misc import cosine_distance

In [2]:
movies = pd.read_csv("data/ml-20m/movies.csv", header=0, names=["movie_id", "movie_title", "genres"])

movies = movies[["movie_id", "movie_title"]]

ratings = pd.read_csv("data/ml-20m/ratings.csv", header=0, names=["user_id", "movie_id", "rating", "timestamp"])
ratings = ratings[["user_id", "movie_id", "rating"]]

In [3]:
args = yaml.safe_load(open("configs/collaborative.yaml", "r"))

device = "cuda" if torch.cuda.is_available() else "cpu"

recommender = DeepFM(feature_dims=get_feature_sizes(ratings), **args["recommender"]).to(device)
recommender.load_state_dict(torch.load("weights/collaborative/deepfm.pt", map_location=device))

movie_embeddings = recommender.embedding.embedding.weight[recommender.embedding.offsets[1]:].cpu()

encoder = Encoder().to(device)

encoder.load_state_dict(torch.load("weights/collaborative/encoder.pt", map_location=device))



<All keys matched successfully>

In [10]:
request = "I want to watch an action movie."

request_embedding = encoder(request).cpu()

similarities = cosine_similarity(request_embedding, movie_embeddings)

_, indices = torch.topk(similarities, k=20, largest=True)

movies.iloc[indices]

Unnamed: 0,movie_id,movie_title
357,380,True Lies (1994)
1330,1552,Con Air (1997)
128,145,Bad Boys (1995)
452,485,Last Action Hero (1993)
388,420,Beverly Hills Cop III (1994)
1697,2001,Lethal Weapon 2 (1989)
1616,1918,Lethal Weapon 4 (1998)
1189,1370,Die Hard 2 (1990)
148,165,Die Hard: With a Vengeance (1995)
146,163,Desperado (1995)


In [13]:
similarities.max()

tensor(0.8778, grad_fn=<MaxBackward1>)