In [1]:
import pandas as pd
from rectools import Columns

In [2]:
df = pd.read_csv('../data/interim/rating.csv')
df.datetime = pd.to_datetime(df.datetime)

# Split data
Split data into train and test sets by time. The train set should contain all the data before a certain time point. The test set should contain only users and items that are in the train set.

In [4]:
split_dt = pd.Timestamp("1998-03-01")
df_train = df.loc[df["datetime"] < split_dt]
df_test = df.loc[df["datetime"] >= split_dt]

In [5]:
# Remove non-intersected users and items in test set
df_test = df_test.loc[df_test[Columns.User].isin(df_train[Columns.User])]
df_test = df_test.loc[df_test[Columns.Item].isin(df_train[Columns.Item])]

In [6]:
df_train.shape

(77985, 4)

In [7]:
df_test.shape

(3715, 4)

# Fit model and generate recommendations
Make recommendations for all users in the train set.

In [10]:
from rectools.dataset import Dataset
from rectools.models import PureSVDModel

# Fit model and generate recommendations for all users
dataset = Dataset.construct(df_train)
model = PureSVDModel()
model.fit(dataset)
recos = model.recommend(
    users=df_train[Columns.User].unique(),
    dataset=dataset,
    k=10,
    filter_viewed=True,
)

In [13]:
recos.head(10)

Unnamed: 0,user_id,item_id,score,rank
0,196,275,1.252115,1
1,196,14,1.188147,2
2,196,83,1.128285,3
3,196,137,1.026659,4
4,196,283,1.010775,5
5,196,268,0.916849,6
6,196,216,0.823668,7
7,196,88,0.814514,8
8,196,168,0.805023,9
9,196,709,0.778019,10


# Evaluate the model
We will use the folowing metrics to evaluate the model:
- **MAP** focuses on the accuracy and relevance of the recommended items, as well as the order in which they are presented;
- **MIUF** ensures that the recommender system is not falling into the trap of suggesting the same popular items to everyone, thus maintaining individual user preferences;
- **Serendipity** adds value to the user experience by introducing novel and unexpected recommendations, enhancing user engagement.

In [15]:
from rectools.metrics import MAP, calc_metrics, MeanInvUserFreq, Serendipity

metrics_name = {
    'MAP': MAP,
    'MIUF': MeanInvUserFreq,
    'Serendipity': Serendipity
}
metrics = {}
for metric_name, metric in metrics_name.items():
    for k in (1, 5, 10):
        metrics[f'{metric_name}@{k}'] = metric(k=k)

# Source: https://github.com/MobileTeleSystems/RecTools/blob/main/examples/5_benchmark_iALS_with_features.ipynb

In [16]:
catalog = df_train[Columns.Item].unique()

calc_metrics(
    metrics,
    reco=recos,
    interactions=df_test,
    prev_interactions=df_train,
    catalog=catalog
)

{'MAP@1': 0.012814167708482482,
 'MAP@5': 0.03506828335993217,
 'MAP@10': 0.04703166095311614,
 'MIUF@1': 1.4890965142827466,
 'MIUF@5': 1.6994769174820519,
 'MIUF@10': 1.8418333388662567,
 'Serendipity@1': 0.000912556781310837,
 'Serendipity@5': 0.0008971785096257837,
 'Serendipity@10': 0.000849438270603504}

In [24]:
catalog = df_train[Columns.Item].unique()


# Make recommendations for a specific user

In [33]:
df_test.user_id.value_counts()

94     318
864    262
393    234
416    209
64     175
      ... 
654      1
454      1
790      1
838      1
913      1
Name: user_id, Length: 120, dtype: int64

In [54]:
USER_ID = 102

In [55]:
recos[recos.user_id == USER_ID]

Unnamed: 0,user_id,item_id,score,rank
200,102,748,1.606712,1
201,102,333,1.568864,2
202,102,269,1.559424,3
203,102,289,1.418947,4
204,102,268,1.351251,5
205,102,323,1.34419,6
206,102,690,1.26594,7
207,102,50,1.19998,8
208,102,340,1.105534,9
209,102,678,1.101053,10


In [56]:
inter = set(df_test[df_test.user_id == USER_ID].item_id) & set(recos[recos.user_id == USER_ID].item_id)

In [57]:
df_test[df_test.user_id == USER_ID][df_test.item_id.isin(inter)]

  df_test[df_test.user_id == USER_ID][df_test.item_id.isin(inter)]


Unnamed: 0,user_id,item_id,weight,datetime
12415,102,50,4,1998-03-02 01:15:15
15297,102,748,3,1998-03-02 01:09:54
46148,102,269,2,1998-04-01 10:53:16


In [58]:
recos[recos.user_id == USER_ID][recos.item_id.isin(inter)]

  recos[recos.user_id == USER_ID][recos.item_id.isin(inter)]


Unnamed: 0,user_id,item_id,score,rank
200,102,748,1.606712,1
202,102,269,1.559424,3
207,102,50,1.19998,8


PROBLEM: The model could recommend items that the user has already seen, but rated poorly. This is not a good recommendation. But according to the metrics, the prediction is good.

SOLUTION: Think about new metric.