In [3]:
import pandas as pd
from rectools import Columns

In [4]:
df = pd.read_csv('../data/interim/rating.csv')

In [5]:
df.datetime = pd.to_datetime(df.datetime)

In [6]:
split_dt = pd.Timestamp("1998-03-01")
df_train = df.loc[df["datetime"] < split_dt]
df_test = df.loc[df["datetime"] >= split_dt]

In [19]:
# Remove non-intersected users and items in test set
df_test = df_test.loc[df_test[Columns.User].isin(df_train[Columns.User])]

In [20]:
df_train.shape

(77985, 4)

In [21]:
df_test.shape

(3811, 4)

In [47]:
from rectools.dataset import Dataset
from rectools.models import PopularModel

# Fit model and generate recommendations for all users
dataset = Dataset.construct(df_train)
model = PopularModel()
model.fit(dataset)
recos = model.recommend(
    users=df_train[Columns.User].unique(),
    dataset=dataset,
    k=10,
    filter_viewed=True,
)

In [48]:
recos

Unnamed: 0,user_id,item_id,score,rank
0,196,50,466.0,1
1,196,181,414.0,2
2,196,100,412.0,3
3,196,294,388.0,4
4,196,258,382.0,5
...,...,...,...,...
7355,578,1,372.0,6
7356,578,121,359.0,7
7357,578,286,359.0,8
7358,578,174,333.0,9


In [49]:
df_test[df_test.user_id == 186]

Unnamed: 0,user_id,item_id,weight,datetime
1,186,302,3,1998-04-04 19:22:22
1091,186,148,4,1998-04-04 19:56:14
5758,186,333,3,1998-04-04 19:40:20
8692,186,1253,4,1998-04-04 19:56:14
10136,186,257,4,1998-04-04 19:56:14
10792,186,327,3,1998-04-04 19:23:26
11500,186,988,4,1998-04-04 19:56:15
14183,186,546,4,1998-04-04 19:56:15
15164,186,338,3,1998-03-13 19:45:31
21493,186,306,4,1998-04-04 19:21:30


In [50]:
recos[recos.user_id == 186]

Unnamed: 0,user_id,item_id,score,rank
1890,186,50,466.0,1
1891,186,181,414.0,2
1892,186,1,372.0,3
1893,186,286,359.0,4
1894,186,174,333.0,5
1895,186,127,327.0,6
1896,186,7,323.0,7
1897,186,98,307.0,8
1898,186,222,305.0,9
1899,186,172,293.0,10


In [51]:
from rectools.metrics import (
    Precision,
    Accuracy,
    NDCG,
    IntraListDiversity,
    Serendipity,
    calc_metrics,
)

precision = Precision(k=10)
accuracy_1 = Accuracy(k=1)
accuracy_10 = Accuracy(k=10)
serendipity = Serendipity(k=10)

In [52]:
precision_value = precision.calc(reco=recos, interactions=df_test)
print(f"precision: {precision_value}")

precision_per_user = precision.calc_per_user(reco=recos, interactions=df_test)
print("\nprecision per user:")
display(precision_per_user.head())

print("Values are equal? ", precision_per_user.mean() == precision_value)

precision: 0.15083333333333335

precision per user:


user_id
1     0.1
2     0.1
13    0.2
14    0.4
23    0.0
dtype: float64

Values are equal?  True


In [43]:
catalog = df_train[Columns.Item].unique()


In [46]:
from rectools.metrics.classification import make_confusions, FN

confusion_df = make_confusions(recos, df_test, k=1)
confusion_df["TN"] = len(catalog) - 1 - confusion_df[FN]
confusion_df

Unnamed: 0_level_0,__LIKED,__TP,__FP,__FN,TN
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,11,0,1,11,1596
2,10,0,1,10,1597
13,29,0,1,29,1578
14,37,1,0,36,1571
23,1,0,1,1,1606
...,...,...,...,...,...
891,22,1,0,21,1586
894,7,0,1,7,1600
913,1,0,1,1,1606
924,6,0,1,6,1601


In [36]:
print("Accuracy@1: ", accuracy_1.calc(reco=recos, interactions=df_test, catalog=catalog))
print("Accuracy@10: ", accuracy_10.calc(reco=recos, interactions=df_test, catalog=catalog))

Accuracy@1:  0.9798248341625208
Accuracy@10:  0.9746113184079602


In [38]:
ndcg = NDCG(k=10, log_base=3)

In [39]:
print("NDCG: ", ndcg.calc(reco=recos, interactions=df_test))

NDCG:  0.0738215750949401
