In [1]:
import pandas as pd

from rectools.dataset import Interactions, Dataset
from rectools.models import RandomModel, PopularModel
from rectools.metrics import Precision, Recall, MeanInvUserFreq, Serendipity, MAP, MRR
from rectools.model_selection import TimeRangeSplitter

from ipywidgets import interact, fixed

from utils.analyze_data import count_metrics, get_films_data, get_recos_per_user, visual_analisys

### Загрузка данных

In [2]:
interactions_df = pd.read_csv("../DATA/interactions.csv")
interactions_df['weight'] = interactions_df['total_dur'] * interactions_df['watched_pct'] / 100
interactions_df['datetime'] = pd.to_datetime(interactions_df['last_watch_dt'])
interactions_df = interactions_df.drop(columns=['last_watch_dt', 'total_dur', 'watched_pct'])
interactions_df.head()

Unnamed: 0,user_id,item_id,weight,datetime
0,176549,9506,3060.0,2021-05-11
1,699317,1659,8317.0,2021-05-29
2,656683,7107,0.0,2021-05-09
3,864613,7638,14483.0,2021-07-05
4,964868,9506,6725.0,2021-04-30


In [3]:
items_data = pd.read_csv("../DATA/items.csv")
items_data.head()

Unnamed: 0,item_id,content_type,title,title_orig,release_year,genres,countries,for_kids,age_rating,studios,directors,actors,description,keywords
0,10711,film,Поговори с ней,Hable con ella,2002.0,"драмы, зарубежные, детективы, мелодрамы",Испания,,16.0,,Педро Альмодовар,"Адольфо Фернандес, Ана Фернандес, Дарио Гранди...",Мелодрама легендарного Педро Альмодовара «Пого...,"Поговори, ней, 2002, Испания, друзья, любовь, ..."
1,2508,film,Голые перцы,Search Party,2014.0,"зарубежные, приключения, комедии",США,,16.0,,Скот Армстронг,"Адам Палли, Брайан Хаски, Дж.Б. Смув, Джейсон ...",Уморительная современная комедия на популярную...,"Голые, перцы, 2014, США, друзья, свадьбы, прео..."
2,10716,film,Тактическая сила,Tactical Force,2011.0,"криминал, зарубежные, триллеры, боевики, комедии",Канада,,16.0,,Адам П. Калтраро,"Адриан Холмс, Даррен Шалави, Джерри Вассерман,...",Профессиональный рестлер Стив Остин («Все или ...,"Тактическая, сила, 2011, Канада, бандиты, ганг..."
3,7868,film,45 лет,45 Years,2015.0,"драмы, зарубежные, мелодрамы",Великобритания,,16.0,,Эндрю Хэй,"Александра Риддлстон-Барретт, Джеральдин Джейм...","Шарлотта Рэмплинг, Том Кортни, Джеральдин Джей...","45, лет, 2015, Великобритания, брак, жизнь, лю..."
4,16268,film,Все решает мгновение,,1978.0,"драмы, спорт, советские, мелодрамы",СССР,,12.0,Ленфильм,Виктор Садовский,"Александр Абдулов, Александр Демьяненко, Алекс...",Расчетливая чаровница из советского кинохита «...,"Все, решает, мгновение, 1978, СССР, сильные, ж..."


In [4]:
users_data = pd.read_csv("../DATA/users.csv")
users_data.head()

Unnamed: 0,user_id,age,income,sex,kids_flg
0,973171,age_25_34,income_60_90,М,1
1,962099,age_18_24,income_20_40,М,0
2,1047345,age_45_54,income_40_60,Ж,0
3,721985,age_45_54,income_20_40,Ж,0
4,704055,age_35_44,income_60_90,Ж,0


### Инициалзиация метрик и всего что нужно для обучения

In [7]:
K_RECOS = 10
N_FOLDS = 3
RANDOM_STATE = 32

models = {
    'random': RandomModel(random_state=RANDOM_STATE),
    'popular': PopularModel()
}

metrics = {
    "prec@1": Precision(k=1),
    "prec@5": Precision(k=5),
    "prec@10": Precision(k=10),
    "recall@1": Recall(k=1),
    "recall@5": Recall(k=5),
    "recall@10": Recall(k=10),

    "map@1": MAP(k=1),
    "map@5": MAP(k=5),
    "map@10": MAP(k=10),
    "mrr@1": MRR(k=1),
    "mrr@5": MRR(k=5),
    "mrr@10": MRR(k=10),
    
    "serendipity@1": Serendipity(k=1),
    "serendipity@5": Serendipity(k=5),
    "serendipity@10": Serendipity(k=10),
    "novelty@1": MeanInvUserFreq(k=1),
    "novelty@5": MeanInvUserFreq(k=5),
    "novelty@10": MeanInvUserFreq(k=10)
}
interactions = Interactions(interactions_df)
splitter = TimeRangeSplitter(
    test_size="7D",
    n_splits=N_FOLDS,
    filter_already_seen=True,
    filter_cold_items=True,
    filter_cold_users=True,
)

In [12]:
metrics_table = count_metrics(models, metrics, splitter, interactions, K_RECOS)

100%|██████████| 3/3 [01:01<00:00, 20.36s/it]


In [13]:
metrics_table


Unnamed: 0_level_0,time,prec@1,prec@1,recall@1,recall@1,prec@5,prec@5,recall@5,recall@5,prec@10,...,novelty@5,novelty@5,novelty@10,novelty@10,serendipity@1,serendipity@1,serendipity@5,serendipity@5,serendipity@10,serendipity@10
Unnamed: 0_level_1,sum,mean,std,mean,std,mean,std,mean,std,mean,...,mean,std,mean,std,mean,std,mean,std,mean,std
model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
random,7.8e-05,0.000221,3.3e-05,7.2e-05,1.9e-05,0.000202,2.8e-05,0.000365,8.4e-05,0.000193,...,15.612989,0.01957,15.613009,0.019786,6e-06,2.109823e-06,7e-06,5.111606e-07,7e-06,2.550754e-07
popular,4.770093,0.076432,0.006826,0.04272,0.004366,0.052402,0.001618,0.137413,0.005346,0.033903,...,3.066979,0.012316,3.71339,0.002076,2e-06,3.733454e-08,3e-06,1.262334e-07,2e-06,8.147679e-08


In [14]:
metrics_table.T # иногда удобнее транспонировать вывод

Unnamed: 0,model,random,popular
time,sum,7.820129e-05,4.770093
prec@1,mean,0.0002213983,0.07643235
prec@1,std,3.303078e-05,0.006826076
recall@1,mean,7.154415e-05,0.04272039
recall@1,std,1.865373e-05,0.004366103
prec@5,mean,0.0002024117,0.05240204
prec@5,std,2.838593e-05,0.001618343
recall@5,mean,0.0003653462,0.1374129
recall@5,std,8.396608e-05,0.005346473
prec@10,mean,0.0001929096,0.03390346


### Обучим модель на всем датасете и посмотрим на результат сами

In [9]:
test_users = [666262, 672861, 955527]

train_set = Dataset.construct(interactions_df)
pop_trained = models['popular']
pop_trained.fit(train_set)

films_data = get_films_data(interactions_df, items_data, extra_columns_list=['title', 'genres'])
recos_extended_info, history_extended_info = get_recos_per_user(pop_trained, interactions_df, test_users, films_data, K_RECOS)

In [11]:
interact(
    visual_analisys,
    usr_id=test_users,
    recos=fixed(recos_extended_info),
    history=fixed(history_extended_info),
    users_data=fixed(users_data));

interactive(children=(Dropdown(description='usr_id', options=(666262, 672861, 955527), value=666262), Output()…