In [1]:
## DO NOT ERASE THIS. IMPORTANT TO CORRECTLY IMPORT MODULES
import sys

sys.path.append("../")
sys.executable

'/Users/kristina/Desktop/University/COURSE_WORK/RecSys_thesis/recsysvenv/bin/python3.12'

In [2]:
import pandas as pd

pd.set_option("display.max_rows", 102)
from tqdm.auto import tqdm

import torch
from torch.utils.data import DataLoader, TensorDataset

from src.utils import (
    load_MovieLens,
    create_test_user,
    train_test_val_split,
    seed_everything,
    trainDataset,
    split_test_df,
    add_not_watched_movies,
    create_test_user_display_df,
)
from src.models import SVDPlusPlus
from src.metrics import reccomendation_report

## Constants

In [3]:
DATA_FOLDER = "../../data/ml-1m/"
RANDOM_STATE = 7

In [4]:
seed_everything(RANDOM_STATE)

## Data

In [5]:
df_users, df_movies, df_ratings = load_MovieLens(DATA_FOLDER)

### Creating a test_user

In [6]:
df_users, df_ratings, new_user_id = create_test_user(
    df_users, df_ratings, [6, 16, 3192, 1461, 827, 887, 593]
)
print("Test user watch list:")
display(df_ratings[df_ratings.userId == new_user_id].merge(df_movies, on="movieId"))

Test user watch list:


Unnamed: 0,userId,movieId,rating,timestamp,name,genre
0,6040,6,5,0,Sabrina (1995),Comedy|Romance
1,6040,16,5,1,Sense and Sensibility (1995),Drama|Romance
2,6040,3192,5,2,Singles (1992),Comedy|Drama|Romance
3,6040,1461,5,3,Love and Other Catastrophes (1996),Romance
4,6040,827,5,4,Emma (1996),Comedy|Drama|Romance
5,6040,887,5,5,Singin' in the Rain (1952),Musical|Romance
6,6040,593,5,6,Pretty Woman (1990),Comedy|Romance


### Train-test split
Methodology: Last user interaction is a test item. The rest is train. Validation part is 20% of test.

In [7]:
df_train, df_test, df_val = train_test_val_split(df_ratings, df_movies, RANDOM_STATE)

Enriching test:   0%|          | 0/4832 [00:00<?, ?it/s]

Enriching val:   0%|          | 0/1209 [00:00<?, ?it/s]

### Loading additional data

In [9]:
# cos_dist = computeCosineSimilarities(df_train, "userId", "movieId", df_movies["movieId"].nunique())

# with open("../../data/cos_dist.pt", "wb") as f:
#     torch.save(cos_dist, f)

with open("../../data/cos_dist.pt", "rb") as f:
    cos_dist = torch.load(f)
popularity = torch.bincount(
    torch.tensor(df_train.movieId.values), minlength=df_movies["movieId"].nunique()
)
popularity = popularity / torch.max(popularity)

## SVD++

In [10]:
%%time
model = SVDPlusPlus(random_state=RANDOM_STATE)
model.fit(df_train)

 processing epoch 0
 processing epoch 1
 processing epoch 2
 processing epoch 3
 processing epoch 4
 processing epoch 5
 processing epoch 6
 processing epoch 7
 processing epoch 8
 processing epoch 9
 processing epoch 10
 processing epoch 11
 processing epoch 12
 processing epoch 13
 processing epoch 14
 processing epoch 15
 processing epoch 16
 processing epoch 17
 processing epoch 18
 processing epoch 19
CPU times: user 2min 49s, sys: 1.25 s, total: 2min 50s
Wall time: 2min 49s


In [11]:
%%time
df_test["rating_pred"] = model.predict(df_test)

CPU times: user 44.3 s, sys: 984 ms, total: 45.3 s
Wall time: 44.4 s


In [12]:
pred, target, pred_items = split_test_df(
    df_test, "userId", "movieId", "rating_pred", "action"
)

In [13]:
reccomendation_report(pred, target, pred_items, cos_dist, popularity, k=15)

{'Hit rate @ 15': tensor(0.3601),
 'NDCG @ 15': tensor(0.1707),
 'Diversity (ILD)': tensor(0.0725),
 'Novelty (EPC)': tensor(0.8480)}

## Test user recommendations

In [14]:
df_test_user = add_not_watched_movies(new_user_id, df_test, df_train, df_movies)
df_test_user["rating_pred"] = model.predict(df_test_user)

In [16]:
display(create_test_user_display_df(df_test_user, df_movies, "rating_pred"))

Unnamed: 0,userId,movieId,name,genre,rating_pred
0,6040,315,"Shawshank Redemption, The (1994)",Drama,5.0
1,6040,49,"Usual Suspects, The (1995)",Crime|Thriller,4.995701
2,6040,1230,"Bridge on the River Kwai, The (1957)",Drama|War,4.91961
3,6040,3023,Chushingura (1962),Drama,4.863716
4,6040,663,Aparajito (1956),Drama,4.862007
5,6040,3401,Dersu Uzala (1974),Adventure|Drama,4.858487
6,6040,2728,Big (1988),Comedy|Fantasy,4.852229
7,6040,847,"Godfather, The (1972)",Action|Crime|Drama,4.850871
8,6040,3339,Erin Brockovich (2000),Drama,4.844189
9,6040,3732,Anatomy of a Murder (1959),Drama|Mystery,4.833993
