In [2]:
import pandas as pd
from rectools import Columns
from rectools.model_selection import TimeRangeSplitter
from rectools.dataset import Dataset
from rectools.models import PopularModel, PureSVDModel
from rectools.metrics import Precision, Recall, MeanInvUserFreq, Serendipity, calc_metrics
import numpy as np

import matplotlib.pyplot as plt
from matplotlib.pyplot import figure

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
np.random.seed(0)
DATA_PATH = '../datasets/KION/data/'
users_df = pd.read_csv(DATA_PATH + 'users.csv',)
items_df = pd.read_csv(DATA_PATH + 'items.csv',)
interactions = pd.read_csv(DATA_PATH + 'interactions.csv')

# interactions_train = interactions.query("last_watch_dt >= '2021-07-15' and last_watch_dt < '2021-08-16'").copy()
interactions_train = interactions.query("last_watch_dt >= '2021-08-10' and last_watch_dt < '2021-08-16'").copy()
interactions_test = interactions.query("last_watch_dt >= '2021-08-16'").copy()

sample_users = np.random.choice(interactions_train['user_id'].unique(), 10000)
interactions_train = interactions_train[interactions_train['user_id'].isin(sample_users)]

interactions_test = interactions_test.loc[
    interactions_test["user_id"].isin(interactions_train["user_id"])
    & interactions_test["item_id"].isin(interactions_train["item_id"])
]

interactions_train.fillna({"watched_pct": 1}, inplace=True)
interactions_train['watched_pct'] = interactions_train['watched_pct'].astype(int)

dataset = Dataset.construct(
    interactions_df=interactions_train
    .rename(
        columns={
            "user_id": Columns.User,
            "item_id": Columns.Item,
            "last_watch_dt": Columns.Datetime,
            "watched_pct": Columns.Weight,
        }
    )
    .reindex(columns=Columns.Interactions)
)

test_users = interactions_test["user_id"].unique()
test_users.size

user_ext_to_int_map = dataset.user_id_map.to_internal.to_dict()
item_int_to_ext_map = dataset.item_id_map.to_external.to_dict()

ui_csr = dataset.get_user_item_matrix()
ui_csr.shape

(9646, 3273)

In [8]:
pure_svd_model = PureSVDModel(verbose=1)
pure_svd_model.fit(dataset)

recs = pure_svd_model.recommend(users=test_users, dataset=dataset, k=10, filter_viewed=False)

precision = Precision(k=10)
recall = Recall(k=10)

precision_value = precision.calc(reco=recs, interactions=interactions_test)
recall_value = recall.calc(reco=recs, interactions=interactions_test)
print('PureSVDModel')
print(f"precision: {precision_value}\nrecall: {recall_value}", '\n')
print(recs['user_id'].unique().shape, recs['item_id'].unique().shape)

(9646, 10) (3273, 10)
PureSVDModel
precision: 0.01652314316469322
recall: 0.06253695150651718 

(3716,) (132,)


In [22]:
pure_svd_model.recommend(users=test_users, dataset=dataset, k=10, filter_viewed=False)['item_id'].unique().shape

(132,)

In [9]:
# добавлены гиперпараметры в конструктор
from rectools.models import BayesProbMF

T, D, initial_cutoff, lowest_rating, highest_rating = 10, 10, 0, 0, 100
bpmf_model = BayesProbMF(verbose=1, T=T, D=D, initial_cutoff=initial_cutoff, lowest_rating=lowest_rating, highest_rating=highest_rating)
bpmf_model.fit(dataset)

recs = bpmf_model.recommend(users=test_users, dataset=dataset, k=10, filter_viewed=False)

precision = Precision(k=10)
recall = Recall(k=10)

precision_value = precision.calc(reco=recs, interactions=interactions_test)
recall_value = recall.calc(reco=recs, interactions=interactions_test)
print(f"precision: {precision_value}\nrecall: {recall_value}")

min value: -0.0024455583416477314, max value: 0.0026533179393691216
Training RMSE at iteration  1  :    60.09
min value: -0.0039617276286774, max value: 0.004125243458777759
Training RMSE at iteration  2  :    60.09
min value: -0.004909373005951863, max value: 0.005332568799885863
Training RMSE at iteration  3  :    60.09
min value: -0.005911882246311435, max value: 0.006779283320778503
Training RMSE at iteration  4  :    60.09
min value: -0.007238215246120349, max value: 0.007864754936255986
Training RMSE at iteration  5  :    60.09
min value: -0.011080706575510458, max value: 0.012785474194250596
Training RMSE at iteration  6  :    60.09
min value: -0.017364457238910697, max value: 0.021287460823806746
Training RMSE at iteration  7  :    60.09
min value: -0.03966894039839562, max value: 0.04063581541383577
Training RMSE at iteration  8  :    60.09
min value: -0.09285482042088951, max value: 0.1660720112593094
Training RMSE at iteration  9  :    60.08
min value: -0.7942675646674952, m

In [11]:
bpmf_model.user_factors.min(), bpmf_model.user_factors.max(), \
bpmf_model.item_factors.min(), bpmf_model.item_factors.max()

(-0.43142409615429583,
 0.4277209530235401,
 -3.512299337370797,
 3.218144013988817)

In [16]:
recs['item_id'].unique().shape

(1170,)

In [4]:
from rectools.models import BayesProbMF_V2

bpmf_model = BayesProbMF_V2(verbose=1, n_iters=10, n_feature=10, max_rating=100., min_rating=0., beta=2., beta_user=2., beta_item=2)
bpmf_model.fit(dataset)

recs = bpmf_model.recommend(users=test_users, dataset=dataset, k=10, filter_viewed=False)

precision = Precision(k=10)
recall = Recall(k=10)

precision_value = precision.calc(reco=recs, interactions=interactions_test)
recall_value = recall.calc(reco=recs, interactions=interactions_test)
print(f"precision: {precision_value}\nrecall: {recall_value}")
print(recs['user_id'].unique().shape, recs['item_id'].unique().shape)

iteration: 1, train RMSE: 32.886853674875546
iteration: 2, train RMSE: 21.168039552554344
iteration: 3, train RMSE: 15.51718382166318
iteration: 4, train RMSE: 12.361803826521038
iteration: 5, train RMSE: 10.408551866928988
iteration: 6, train RMSE: 9.193664634369986
iteration: 7, train RMSE: 8.440823998333657
iteration: 8, train RMSE: 7.983437311026528
iteration: 9, train RMSE: 7.728564067336887
iteration: 10, train RMSE: 7.6100103601938915
precision: 0.004198062432723359
recall: 0.01704274251499529
(3716,) (1239,)
