In [95]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import numpy as np
import pandas as pd

from tqdm.auto import tqdm

from implicit.nearest_neighbours import TFIDFRecommender, BM25Recommender
from implicit.als import AlternatingLeastSquares

from rectools import Columns
from rectools.dataset import Dataset
from rectools.metrics import Precision, Recall, MeanInvUserFreq, Serendipity, calc_metrics
from rectools.models import ImplicitItemKNNWrapperModel, RandomModel, PopularModel
from rectools.model_selection import TimeRangeSplitter, cross_validate
from rectools.visuals import MetricsApp
from implicit.nearest_neighbours import TFIDFRecommender
from rectools.models import ImplicitItemKNNWrapperModel


In [100]:
data = pd.read_csv(
    "rectools.csv",
    sep=",",
    engine="python",  
    header=None,
    names=[Columns.User, Columns.Item, Columns.Datetime, Columns.Weight],
)
data = data[1:]
dataset = Dataset.construct(data)

n_splits = 3
splitter = TimeRangeSplitter(
    test_size="14D",
    n_splits=n_splits,
    filter_already_seen=True,
    filter_cold_items=True,
    filter_cold_users=True,
)
models = {
    "random": RandomModel(random_state=42),
    "popular": PopularModel(),
    "most_rated": PopularModel(popularity="sum_weight"),
    "tfidf_k=5": ImplicitItemKNNWrapperModel(model=TFIDFRecommender(K=5)),
    "tfidf_k=10": ImplicitItemKNNWrapperModel(model=TFIDFRecommender(K=10)),
    "bm25_k=10_k1=0.05_b=0.1": ImplicitItemKNNWrapperModel(model=BM25Recommender(K=5, K1=0.05, B=0.1)),
}

# We will calculate several classic (precision@k and recall@k) and "beyond accuracy" metrics
metrics = {
    "prec@1": Precision(k=1),
    "prec@10": Precision(k=10),
    "recall@10": Recall(k=10),
    "novelty@10": MeanInvUserFreq(k=10),
    "serendipity@10": Serendipity(k=10),
}

K_RECS = 10


In [101]:
cv_results = cross_validate(
    dataset=dataset,
    splitter=splitter,
    models=models,
    metrics=metrics,
    k=K_RECS,
    filter_viewed=True,
)

In [104]:
pivot_results = (
    pd.DataFrame(cv_results["metrics"])
    .drop(columns="i_split")
    .groupby(["model"], sort=False)
    .agg(["mean"])
)
pivot_results.columns = pivot_results.columns.droplevel(1)


In [105]:
pivot_results

Unnamed: 0_level_0,prec@1,prec@10,recall@10,novelty@10,serendipity@10
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
random,0.003309,0.002886,0.000113,9.575497,1.9e-05
popular,0.040744,0.032478,0.007756,3.203251,6e-06
most_rated,0.040744,0.032478,0.007756,3.203251,6e-06
tfidf_k=5,0.054448,0.057984,0.025779,5.70954,0.000254
tfidf_k=10,0.054155,0.058543,0.027579,5.667466,0.000253
bm25_k=10_k1=0.05_b=0.1,0.043599,0.056612,0.026416,3.961774,6.4e-05
