In [120]:
import random

import numpy as np
import pandas as pd
import pytorch_lightning as pl
import torch
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
from torch.utils.data import DataLoader

from recommender.models import Recommender
from recommender.data_processing import get_context, pad_list, map_column, MASK, PAD

In [121]:
# data_csv_path = "../data/ml-latest-small/ratings.csv"
# movies_path = "../data/ml-latest-small/movies.csv"

data_csv_path = "../data/ml-25m/ratings.csv"
movies_path = "../data/ml-25m/movies.csv"

model_path = "./recommender_models/recommender.ckpt"

In [122]:
data = pd.read_csv(data_csv_path)
movies = pd.read_csv(movies_path)

In [123]:
data.sort_values(by="timestamp", inplace=True)

In [124]:
data, mapping, inverse_mapping = map_column(data, col_name="movieId")
grp_by_train = data.groupby(by="userId")
print(len(mapping))

59047


In [125]:
random.sample(list(grp_by_train.groups), k=10)

[130928, 90662, 20846, 39089, 110798, 21897, 133466, 72530, 86431, 111063]

In [126]:
model = Recommender(
    vocab_size=len(mapping) + 2,
    lr=1e-4,
    dropout=0.3,
)
model.eval()
model.load_state_dict(torch.load(model_path)["state_dict"])

<All keys matched successfully>

In [127]:
movie_to_idx = {
    a: mapping[b]
    for a, b in zip(movies.title.tolist(), movies.movieId.tolist())
    if b in mapping
}
idx_to_movie = {v: k for k, v in movie_to_idx.items()}
# idx_to_movie = {
#     b: (a, c)
#     for a, b, c in zip(
#         movies.title.tolist(), movies.movieId.tolist(), movies.genres.tolist()
#     )
#     if b in mapping
# }

In [128]:
def predict(list_movies, model, movie_to_idx, idx_to_movie):
    ids = (
        [PAD] * (120 - len(list_movies) - 1)
        + [movie_to_idx[a] for a in list_movies]
        + [MASK]
    )

    src = torch.tensor(ids, dtype=torch.long).unsqueeze(0)

    with torch.no_grad():
        prediction = model(src)

    masked_pred = prediction[0, -1].numpy()

    sorted_predicted_ids = np.argsort(masked_pred).tolist()[::-1]

    sorted_predicted_ids = [a for a in sorted_predicted_ids if a not in ids]

    return [idx_to_movie[a] for a in sorted_predicted_ids[:10] if a in idx_to_movie]

### Senario 1: Adventure/Fantasy


In [129]:
list_movies = [
    "Harry Potter and the Sorcerer's Stone (a.k.a. Harry Potter and the Philosopher's Stone) (2001)",
    "Harry Potter and the Chamber of Secrets (2002)",
    "Harry Potter and the Prisoner of Azkaban (2004)",
    "Harry Potter and the Goblet of Fire (2005)",
]

top_movie = predict(list_movies, model, movie_to_idx, idx_to_movie)
top_movie

['Wild Strawberries (Smultronstället) (1957)',
 'Blood Simple (1984)',
 'Double Indemnity (1944)',
 'Yojimbo (1961)',
 '8 1/2 (8½) (1963)',
 'Strangers on a Train (1951)',
 'Seventh Seal, The (Sjunde inseglet, Det) (1957)',
 'Serpico (1973)',
 'M (1931)',
 'Conversation, The (1974)']

### Senario 2: Action/Adventure


In [130]:
list_movies = [
    "Black Panther (2017)",
    "Avengers, The (2012)",
    "Avengers: Infinity War - Part I (2018)",
    "Logan (2017)",
    "Spider-Man (2002)",
    "Spider-Man 3 (2007)",
    # "Spider-Man: Far from Home (2019)",
]

top_movie = predict(list_movies, model, movie_to_idx, idx_to_movie)
top_movie

['Wild Strawberries (Smultronstället) (1957)',
 'Blood Simple (1984)',
 'Double Indemnity (1944)',
 'Strangers on a Train (1951)',
 '8 1/2 (8½) (1963)',
 'Yojimbo (1961)',
 'M (1931)',
 'Seventh Seal, The (Sjunde inseglet, Det) (1957)',
 'Notorious (1946)',
 'Key Largo (1948)']

### Senario 3: Comedy


In [131]:
list_movies = [
    "Zootopia (2016)",
    "Toy Story 3 (2010)",
    # "Toy Story 4 (2019)",
    "Finding Nemo (2003)",
    "Ratatouille (2007)",
    "The Lego Movie (2014)",
    "Ghostbusters (a.k.a. Ghost Busters) (1984)",
    "Ace Ventura: When Nature Calls (1995)",
]
top_movie = predict(list_movies, model, movie_to_idx, idx_to_movie)
top_movie

['Wild Strawberries (Smultronstället) (1957)',
 'Blood Simple (1984)',
 'Double Indemnity (1944)',
 'Yojimbo (1961)',
 '8 1/2 (8½) (1963)',
 'Seventh Seal, The (Sjunde inseglet, Det) (1957)',
 'Strangers on a Train (1951)',
 'M (1931)',
 'Serpico (1973)',
 'Persona (1966)']