In [1]:
! pip install rectools



In [2]:
import pandas as pd
import numpy as np
from copy import deepcopy
from rectools.model_selection import TimeRangeSplitter
from rectools import Columns
from rectools.dataset import Dataset, Interactions
from rectools.metrics import calc_metrics, Precision, Recall, Serendipity, MeanInvUserFreq, MAP, NDCG
from rectools.models import PopularModel, RandomModel

In [23]:
intdataset = pd.read_csv("interactions.csv")
item_dataset = pd.read_csv("items.csv")

In [4]:
names=[Columns.User, Columns.Item, Columns.Weight, Columns.Datetime]
print(names)
intdataset.head()


['user_id', 'item_id', 'weight', 'datetime']


Unnamed: 0,user_id,item_id,last_watch_dt,total_dur,watched_pct
0,176549,9506,2021-05-11,4250,72.0
1,699317,1659,2021-05-29,8317,100.0
2,656683,7107,2021-05-09,10,0.0
3,864613,7638,2021-07-05,14483,100.0
4,964868,9506,2021-04-30,6725,100.0


In [37]:
intdataset = intdataset.rename(columns={"total_dur":"weight","last_watch_dt":"datetime"})

In [38]:
intdataset.head()

Unnamed: 0,user_id,item_id,datetime,weight,watched_pct
0,176549,9506,2021-05-11,4250,72.0
1,699317,1659,2021-05-29,8317,100.0
2,656683,7107,2021-05-09,10,0.0
3,864613,7638,2021-07-05,14483,100.0
4,964868,9506,2021-04-30,6725,100.0


In [7]:
dataset0 = Interactions(intdataset)

In [8]:
n_splits = 3

cv = TimeRangeSplitter(
    test_size="14D",
    n_splits=n_splits,
    filter_already_seen=True,
    filter_cold_items=True,
    filter_cold_users=True,
)
models = {
    "random": RandomModel(random_state=42),
    "popular": PopularModel()
}

metrics = {
    "precision": Precision(k=10),
    "recall": Recall(k=10),
    "novelty": MeanInvUserFreq(k=10),
    "serendipity": Serendipity(k=10),
    "MAP@1": MAP(k=1),
    "MAP@5": MAP(k=5),
    "MAP@10": MAP(k=10),
    'NDCG@10': NDCG(k=10)
}

In [9]:
def crossvalidation(models,metrics,cv,qrek):
    results = []

    fold_iterator = cv.split(dataset0, collect_fold_stats=True)

    for train_ids, test_ids, fold_info in fold_iterator:
        print(f"\n==================== Fold {fold_info['i_split']}")

        df_train = dataset0.df.iloc[train_ids]
        dataset = Dataset.construct(df_train)

        df_test = dataset0.df.iloc[test_ids][Columns.UserItem]
        test_users = np.unique(df_test[Columns.User])

        # Catalog is set of items that we recommend.
        # Sometimes we recommend not all items from train.
        catalog = df_train[Columns.Item].unique()

        for model_name, model in models.items():
            model.fit(dataset)
            recos = model.recommend(
                users=test_users,
                dataset=dataset,
                k=qrek,
                filter_viewed=True,
            )
            metric_values = calc_metrics(
                metrics,
                reco=recos,
                interactions=df_test,
                prev_interactions=df_train,
                catalog=catalog,
            )
            res = {"fold": fold_info["i_split"], "model": model_name}
            res.update(metric_values)
            results.append(res)
    fincross = pd.DataFrame(results)
    crossdata = fincross.groupby("model")[list(metrics.keys())].mean()
    return crossdata

In [10]:
resultcross = crossvalidation(models,metrics,cv,10)






In [15]:
model = PopularModel()
dataset = Dataset.construct(intdataset) 
model.fit(dataset)

<rectools.models.popular.PopularModel at 0x22583aabc10>

In [40]:
def visialanaliz(model,dataset_interactions,user_ids,item_data):
    dataset = Dataset.construct(dataset_interactions)
    model_recomends = model.recommend(users = user_ids, dataset = dataset, k = 10, filter_viewed = True)
    dataset_interactions_merge = dataset_interactions.merge(item_data, on = "item_id")
    model_recomends_merge = model_recomends.merge(item_data, on = "item_id")
    for user in user_ids:
        print(user)
        print(dataset_interactions_merge[dataset_interactions_merge["user_id"] == user][["datetime","title","genres"]].sort_values(by = "datetime"))
        print(model_recomends_merge[model_recomends_merge["user_id"] == user][["title","genres"]])
        print()


In [41]:
visialanaliz(model,intdataset,[666262, 672861, 955527],item_dataset)

666262
           datetime              title                                 genres
4612573  2021-05-12   Последний викинг     боевики, историческое, приключения
4907745  2021-05-12  Робин Гуд: Начало         боевики, триллеры, приключения
1645045  2021-05-14            Томирис  боевики, драмы, историческое, военные
                           title                       genres
0                    Хрустальный          триллеры, детективы
3                Клиника счастья             драмы, мелодрамы
6              Гнев человеческий            боевики, триллеры
9                       Девятаев  драмы, военные, приключения
12        Секреты семейной жизни                      комедии
15  Прабабушка легкого поведения                      комедии
18                    Подслушано              драмы, триллеры
21                         Афера                      комедии
24                          Маша              драмы, триллеры
27                         Дуров               документальное