In [26]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import sys
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import torch
from recommenders.models.cornac.cornac_utils import predict_ranking
from recommenders.utils.timer import Timer
from recommenders.utils.constants import SEED
from recommenders.evaluation.python_evaluation import map, ndcg_at_k, precision_at_k, recall_at_k

from sklearn.model_selection import train_test_split
from surprise import Dataset
from cornac.data import Dataset

### Load Data :

In [19]:
COL_USER = "UserId"
COL_ITEM = "MovieId"
COL_RATING = "Rating"
COL_PREDICTION = "prediction"

In [20]:
data = Dataset.load_builtin('ml-1m')

In [21]:
trainset = data.build_full_trainset()
ratings = [(trainset.to_raw_uid(uid), trainset.to_raw_iid(iid), rating)
           for uid, iid, rating in trainset.all_ratings()]
# Create a Pandas DataFrame
ratings_df = pd.DataFrame(ratings, columns=[COL_USER, COL_ITEM, COL_RATING])


In [22]:
ratings_df.head(10)

Unnamed: 0,UserId,MovieId,Rating
0,1,1193,5.0
1,1,661,3.0
2,1,914,3.0
3,1,3408,4.0
4,1,2355,5.0
5,1,1197,3.0
6,1,1287,5.0
7,1,2804,5.0
8,1,594,4.0
9,1,919,4.0


In [23]:
df_train, df_test = train_test_split(ratings_df, test_size = 0.25 )

In [27]:
train_set = Dataset.from_uir(df_train.itertuples(index=False), seed=SEED)

print('Number of users in trainset: {}'.format(train_set.num_users))
print('Number of items in trainset: {}'.format(train_set.num_items))

Number of users in trainset: 6040
Number of items in trainset: 3665


## Bilateral Variational Autoencoder

In [28]:
from cornac.models import BiVAECF

In [29]:
# top k items to recommend
TOP_K = 10

# Model parameters
LATENT_DIM = 50
ENCODER_DIMS = [100]
ACT_FUNC = "tanh"
LIKELIHOOD = "pois"
NUM_EPOCHS = 500
BATCH_SIZE = 128
LEARNING_RATE = 1e-3

model_recomm = BiVAECF(
    k=LATENT_DIM,
    encoder_structure=ENCODER_DIMS,
    act_fn=ACT_FUNC,
    likelihood=LIKELIHOOD,
    n_epochs=NUM_EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=LEARNING_RATE,
    seed=SEED,
    use_gpu=torch.cuda.is_available(),
    verbose=True
)

In [30]:
with Timer() as t:
    model_recomm.fit(train_set)
print("Took {} seconds for training.".format(t))

  0%|          | 0/500 [00:00<?, ?it/s]

Took 1592.7019 seconds for training.


### Eval

In [31]:
with Timer() as t:
    all_predictions = predict_ranking(model_recomm, df_train, usercol=COL_USER, itemcol=COL_ITEM , remove_seen=True)
print("Took {} seconds for prediction.".format(t))

Took 40.1870 seconds for prediction.


In [35]:
all_predictions.head(20)


Unnamed: 0,UserId,MovieId,prediction
1,1,10,0.009665373
2,1,100,2.469421e-05
3,1,1000,3.390151e-08
4,1,1002,6.683798e-07
5,1,1003,4.590735e-06
6,1,1004,1.109255e-06
7,1,1005,0.001005209
8,1,1006,0.0002122127
9,1,1007,0.001509493
10,1,1008,0.0001739285


In [49]:
eval_map = map(df_test, all_predictions, col_user = 'UserId', col_item = "MovieId",col_prediction='prediction', k=TOP_K)
eval_ndcg = ndcg_at_k(df_test, all_predictions,  col_user = 'UserId', col_item = "MovieId",col_rating="Rating",col_prediction='prediction', k=TOP_K)

eval_precision = precision_at_k(df_test, all_predictions,  col_user = 'UserId', col_item = "MovieId",col_prediction='prediction',k=TOP_K)
eval_recall = recall_at_k(df_test, all_predictions,  col_user = 'UserId', col_item = "MovieId",col_prediction='prediction', k=TOP_K)



In [57]:
print(f"{eval_map=}")
print(f"{eval_ndcg=}")
print(f"{eval_precision=}")
print(f"{eval_recall=}")

eval_map=0.08326726853581365
eval_ndcg=0.39960874881300773
eval_precision=0.3648344370860927
eval_recall=0.13889817425933856
