In [1]:
import pandas as pd
import torch
import yaml
from torch import cosine_similarity

from model.encoder import Encoder
from model.recommender import DeepFM
from utils.data import get_feature_sizes

  from pandas.core.computation.check import NUMEXPR_INSTALLED
This can be used to load a bitsandbytes version that is different from the PyTorch CUDA version.
If this was unintended set the BNB_CUDA_VERSION variable to an empty string: export BNB_CUDA_VERSION=
If you use the manual override make sure the right libcudart.so is in your LD_LIBRARY_PATH
For example by adding the following to your .bashrc: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:<path_to_cuda_dir/lib64

2024-10-05 04:11:17.567185: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-10-05 04:11:17.569386: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-10-05 04:11:17.574237: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:48

In [2]:
movies = pd.read_csv("data/ml-20m/movies.csv", header=0, names=["movie_id", "movie_title", "genres"])

movies = movies[["movie_id", "movie_title"]]

ratings = pd.read_csv("data/ml-20m/ratings.csv", header=0, names=["user_id", "movie_id", "rating", "timestamp"])
ratings = ratings[["user_id", "movie_id", "rating"]]

requests = pd.read_csv('data/ml-20m/requests.csv')
requests = requests.groupby("movie_id").agg({
    "movie_title": "first",
    "request": list,
}).reset_index()
requests.set_index("movie_id", inplace=True, drop=False)

user_id_to_unique_id = {user_id: i for i, user_id in enumerate(ratings["user_id"].unique())}
item_id_to_unique_id = {movie_id: i for i, movie_id in enumerate(requests["movie_id"].unique())}

In [9]:
movies["movie_id"] = movies["movie_id"].map(item_id_to_unique_id)

ratings["user_id"] = ratings["user_id"].map(user_id_to_unique_id)
ratings["movie_id"] = ratings["movie_id"].map(item_id_to_unique_id)


In [3]:
args = yaml.safe_load(open("configs/collaborative.yaml", "r"))

device = "cuda" if torch.cuda.is_available() else "cpu"

recommender = DeepFM(feature_dims=get_feature_sizes(ratings), **args["recommender"]).to(device)
recommender.load_state_dict(torch.load("weights/collaborative/deepfm.pt", map_location=device))

movie_embeddings = recommender.embedding.embedding.weight[recommender.embedding.offsets[1]:].cpu()

encoder = Encoder().to(device)
encoder.load_state_dict(torch.load("weights/collaborative/encoder.pt", map_location=device))



<All keys matched successfully>

In [4]:
user_id = 36
unique_movie_ids = torch.tensor([item_id_to_unique_id[movie_id] for movie_id in requests["movie_id"].unique()])
uniquer_user_id = torch.full_like(unique_movie_ids, user_id_to_unique_id[user_id])

In [5]:
ratings[ratings["user_id"] == user_id].merge(movies, on="movie_id").sort_values("rating", ascending=False)

Unnamed: 0,user_id,movie_id,rating,movie_title
16,36,2353,4.0,Enemy of the State (1998)
0,36,145,3.5,Bad Boys (1995)
8,36,1358,3.5,Sling Blade (1996)
13,36,2023,3.5,"Godfather: Part III, The (1990)"
9,36,1408,3.5,"Last of the Mohicans, The (1992)"
10,36,1597,3.5,Conspiracy Theory (1997)
19,36,58293,3.5,"10,000 BC (2008)"
6,36,1088,3.0,Dirty Dancing (1987)
1,36,163,3.0,Desperado (1995)
15,36,2302,3.0,My Cousin Vinny (1992)


In [6]:
request = "I want to watch a popular romantic comedy"

request_embedding = encoder(request).cpu()

similarities = cosine_similarity(request_embedding, movie_embeddings)

_, indices = torch.topk(similarities, k=10, largest=True)

movies.iloc[indices]

Unnamed: 0,movie_id,movie_title
552,597,Pretty Woman (1990)
334,357,Four Weddings and a Funeral (1994)
215,236,French Kiss (1995)
3494,4018,What Women Want (2000)
2366,2724,Runaway Bride (1999)
790,905,It Happened One Night (1934)
1138,1307,When Harry Met Sally... (1989)
2318,2671,Notting Hill (1999)
6023,7293,50 First Dates (2004)
267,289,Only You (1994)


In [7]:
rec_features = torch.stack([uniquer_user_id, unique_movie_ids], dim=-1).to(device)

user_ratings = recommender(rec_features).cpu()

rankings = user_ratings * similarities

_, indices = torch.topk(rankings, k=10, largest=True)

movies.iloc[indices]

Unnamed: 0,movie_id,movie_title
334,357,Four Weddings and a Funeral (1994)
552,597,Pretty Woman (1990)
790,905,It Happened One Night (1934)
215,236,French Kiss (1995)
2318,2671,Notting Hill (1999)
2366,2724,Runaway Bride (1999)
267,289,Only You (1994)
1138,1307,When Harry Met Sally... (1989)
6023,7293,50 First Dates (2004)
3494,4018,What Women Want (2000)
