## DSSM Model

In [72]:
!pip install -U sentence-transformers

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Requirement already up-to-date: sentence-transformers in /home/lis/Utkov/TEMP/reco_venv/lib/python3.8/site-packages (2.5.1)


In [78]:
import re
import json
import numpy as np
import os
import pandas as pd
import pickle
import tensorflow as tf
import tensorflow.keras.backend as K

from collections import Counter
from random import randint, random
from scipy.sparse import coo_matrix, hstack
from sklearn.metrics.pairwise import cosine_distances, cosine_similarity
from sklearn.metrics.pairwise import euclidean_distances
from tensorflow import keras
from tqdm import tqdm
tqdm.pandas()

from sentence_transformers import SentenceTransformer

Загрузим данные

In [4]:
DATA_PATH = "/home/lis/Utkov/TEMP/DATA"

interactions_df = pd.read_csv(os.path.join(DATA_PATH, 'interactions.csv'))
users_df = pd.read_csv(os.path.join(DATA_PATH, 'users.csv'))
items_df = pd.read_csv(os.path.join(DATA_PATH, 'items.csv'))

In [5]:
users_df.head()

Unnamed: 0,user_id,age,income,sex,kids_flg
0,973171,age_25_34,income_60_90,М,1
1,962099,age_18_24,income_20_40,М,0
2,1047345,age_45_54,income_40_60,Ж,0
3,721985,age_45_54,income_20_40,Ж,0
4,704055,age_35_44,income_60_90,Ж,0


In [6]:
items_df.head()

Unnamed: 0,item_id,content_type,title,title_orig,release_year,genres,countries,for_kids,age_rating,studios,directors,actors,description,keywords
0,10711,film,Поговори с ней,Hable con ella,2002.0,"драмы, зарубежные, детективы, мелодрамы",Испания,,16.0,,Педро Альмодовар,"Адольфо Фернандес, Ана Фернандес, Дарио Гранди...",Мелодрама легендарного Педро Альмодовара «Пого...,"Поговори, ней, 2002, Испания, друзья, любовь, ..."
1,2508,film,Голые перцы,Search Party,2014.0,"зарубежные, приключения, комедии",США,,16.0,,Скот Армстронг,"Адам Палли, Брайан Хаски, Дж.Б. Смув, Джейсон ...",Уморительная современная комедия на популярную...,"Голые, перцы, 2014, США, друзья, свадьбы, прео..."
2,10716,film,Тактическая сила,Tactical Force,2011.0,"криминал, зарубежные, триллеры, боевики, комедии",Канада,,16.0,,Адам П. Калтраро,"Адриан Холмс, Даррен Шалави, Джерри Вассерман,...",Профессиональный рестлер Стив Остин («Все или ...,"Тактическая, сила, 2011, Канада, бандиты, ганг..."
3,7868,film,45 лет,45 Years,2015.0,"драмы, зарубежные, мелодрамы",Великобритания,,16.0,,Эндрю Хэй,"Александра Риддлстон-Барретт, Джеральдин Джейм...","Шарлотта Рэмплинг, Том Кортни, Джеральдин Джей...","45, лет, 2015, Великобритания, брак, жизнь, лю..."
4,16268,film,Все решает мгновение,,1978.0,"драмы, спорт, советские, мелодрамы",СССР,,12.0,Ленфильм,Виктор Садовский,"Александр Абдулов, Александр Демьяненко, Алекс...",Расчетливая чаровница из советского кинохита «...,"Все, решает, мгновение, 1978, СССР, сильные, ж..."


In [7]:
interactions_df.head()

Unnamed: 0,user_id,item_id,last_watch_dt,total_dur,watched_pct
0,176549,9506,2021-05-11,4250,72.0
1,699317,1659,2021-05-29,8317,100.0
2,656683,7107,2021-05-09,10,0.0
3,864613,7638,2021-07-05,14483,100.0
4,964868,9506,2021-04-30,6725,100.0


Закодируем категориальные фичи с помощью one-hot encoding.

In [8]:
user_cat_feats = ["age", "income", "sex", "kids_flg"]
# из исходного датафрейма оставим только item_id - этот признак нам понадобится позже
# для того, чтобы маппить айтемы из датафрейма с фильмами с айтемами
# из датафрейма с взаимодействиями
users_ohe_df = users_df.user_id
for feat in user_cat_feats:
    # получаем датафрейм с one-hot encoding для каждой категориальной фичи
    ohe_feat_df = pd.get_dummies(users_df[feat], prefix=feat)
    # конкатенируем ohe-hot датафрейм с датафреймом,
    # который мы получили на предыдущем шаге
    users_ohe_df = pd.concat([users_ohe_df, ohe_feat_df], axis=1)

users_ohe_df.head()


Unnamed: 0,user_id,age_age_18_24,age_age_25_34,age_age_35_44,age_age_45_54,age_age_55_64,age_age_65_inf,income_income_0_20,income_income_150_inf,income_income_20_40,income_income_40_60,income_income_60_90,income_income_90_150,sex_Ж,sex_М,kids_flg_0,kids_flg_1
0,973171,False,True,False,False,False,False,False,False,False,False,True,False,False,True,False,True
1,962099,True,False,False,False,False,False,False,False,True,False,False,False,False,True,True,False
2,1047345,False,False,False,True,False,False,False,False,False,True,False,False,True,False,True,False
3,721985,False,False,False,True,False,False,False,False,True,False,False,False,True,False,True,False
4,704055,False,False,True,False,False,False,False,False,False,False,True,False,True,False,True,False


Тоже самое делаем с фичами айтемов

In [9]:
items_df.head()

Unnamed: 0,item_id,content_type,title,title_orig,release_year,genres,countries,for_kids,age_rating,studios,directors,actors,description,keywords
0,10711,film,Поговори с ней,Hable con ella,2002.0,"драмы, зарубежные, детективы, мелодрамы",Испания,,16.0,,Педро Альмодовар,"Адольфо Фернандес, Ана Фернандес, Дарио Гранди...",Мелодрама легендарного Педро Альмодовара «Пого...,"Поговори, ней, 2002, Испания, друзья, любовь, ..."
1,2508,film,Голые перцы,Search Party,2014.0,"зарубежные, приключения, комедии",США,,16.0,,Скот Армстронг,"Адам Палли, Брайан Хаски, Дж.Б. Смув, Джейсон ...",Уморительная современная комедия на популярную...,"Голые, перцы, 2014, США, друзья, свадьбы, прео..."
2,10716,film,Тактическая сила,Tactical Force,2011.0,"криминал, зарубежные, триллеры, боевики, комедии",Канада,,16.0,,Адам П. Калтраро,"Адриан Холмс, Даррен Шалави, Джерри Вассерман,...",Профессиональный рестлер Стив Остин («Все или ...,"Тактическая, сила, 2011, Канада, бандиты, ганг..."
3,7868,film,45 лет,45 Years,2015.0,"драмы, зарубежные, мелодрамы",Великобритания,,16.0,,Эндрю Хэй,"Александра Риддлстон-Барретт, Джеральдин Джейм...","Шарлотта Рэмплинг, Том Кортни, Джеральдин Джей...","45, лет, 2015, Великобритания, брак, жизнь, лю..."
4,16268,film,Все решает мгновение,,1978.0,"драмы, спорт, советские, мелодрамы",СССР,,12.0,Ленфильм,Виктор Садовский,"Александр Абдулов, Александр Демьяненко, Алекс...",Расчетливая чаровница из советского кинохита «...,"Все, решает, мгновение, 1978, СССР, сильные, ж..."


In [10]:
item_cat_feats = ['content_type','for_kids', 'age_rating',
                  'studios', 'countries', 'directors']

items_ohe_df = items_df.item_id

for feat in item_cat_feats:
    ohe_feat_df = pd.get_dummies(items_df[feat], prefix=feat)
    items_ohe_df = pd.concat([items_ohe_df, ohe_feat_df], axis=1)

items_ohe_df.head()

Unnamed: 0,item_id,content_type_film,content_type_series,for_kids_0.0,for_kids_1.0,age_rating_0.0,age_rating_6.0,age_rating_12.0,age_rating_16.0,age_rating_18.0,...,directors_Яннике Систад Якобсен,directors_Янус Мец,directors_Ярив Хоровиц,directors_Ярон Зильберман,directors_Ярополк Лапшин,directors_Ярослав Лупий,"directors_Ярроу Чейни, Скотт Моужер",directors_Ясина Сезар,directors_Ясуоми Умэцу,directors_сения Завьялова
0,10711,True,False,False,False,False,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
1,2508,True,False,False,False,False,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
2,10716,True,False,False,False,False,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
3,7868,True,False,False,False,False,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
4,16268,True,False,False,False,False,False,True,False,False,...,False,False,False,False,False,False,False,False,False,False


Отфильтруем непопулярные фильмы и малоактивных пользователей

In [11]:
interactions_df.item_id.value_counts()

item_id
10440    202457
15297    193123
9728     132865
13865    122119
4151      91167
          ...  
8076          1
8954          1
15664         1
818           1
10542         1
Name: count, Length: 15706, dtype: int64

In [12]:
interactions_df.user_id.value_counts()

user_id
416206     1341
1010539     764
555233      685
11526       676
409259      625
           ... 
45493         1
615194        1
96848         1
425823        1
697262        1
Name: count, Length: 962179, dtype: int64

Отфильтруем малоактивных юзеров и непопулярные фильмы.

In [13]:
print(f"N users before: {interactions_df.user_id.nunique()}")
print(f"N items before: {interactions_df.item_id.nunique()}\n")

# отфильтруем все события взаимодействий, в которых пользователь посмотрел
# фильм менее чем на 10 процентов
interactions_df = interactions_df[interactions_df.watched_pct > 10]

# соберем всех пользователей, которые посмотрели
# больше 10 фильмов
valid_users = []

c = Counter(interactions_df.user_id)
for user_id, entries in c.most_common():
    if entries > 10:
        valid_users.append(user_id)

# и соберем все фильмы, которые посмотрели больше 10 пользователей
valid_items = []

c = Counter(interactions_df.item_id)
for item_id, entries in c.most_common():
    if entries > 10:
        valid_items.append(item_id)

# отбросим непопулярные фильмы и неактивных юзеров
interactions_df = interactions_df[interactions_df.user_id.isin(valid_users)]
interactions_df = interactions_df[interactions_df.item_id.isin(valid_items)]

print(f"N users after: {interactions_df.user_id.nunique()}")
print(f"N items after: {interactions_df.item_id.nunique()}")

N users before: 962179
N items before: 15706

N users after: 79515
N items after: 6901


После фильтрации может получиться так, что некоторые айтемы/юзеры есть в датасете взаимодействий, но при этом они отсутствуют в датасетах айтемов/юзеров или наоборот. Поэтому найдем id айтемов и id юзеров, которые есть во всех датасетах и оставим только их.

In [14]:
common_users = set(interactions_df.user_id.unique()).intersection(set(users_ohe_df.user_id.unique()))
common_items = set(interactions_df.item_id.unique()).intersection(set(items_ohe_df.item_id.unique()))

print(len(common_users))
print(len(common_items))

interactions_df = interactions_df[interactions_df.item_id.isin(common_items)]
interactions_df = interactions_df[interactions_df.user_id.isin(common_users)]

items_ohe_df = items_ohe_df[items_ohe_df.item_id.isin(common_items)]
users_ohe_df = users_ohe_df[users_ohe_df.user_id.isin(common_users)]

65974
6901


In [15]:
print(interactions_df.item_id.nunique())
print(items_ohe_df.item_id.nunique())
print(interactions_df.user_id.nunique())
print(users_ohe_df.user_id.nunique())

print(set(items_ohe_df.item_id.unique()) - set(interactions_df.item_id.unique()))

6897
6901
65974
65974
{11805, 9788, 11501, 1734}


In [16]:
items_ohe_df = items_ohe_df[~items_ohe_df['item_id'].isin([11805, 9788, 11501, 1734])]

In [17]:
print(set(items_ohe_df.item_id.unique()) - set(interactions_df.item_id.unique()))

set()


Соберем взаимодействия в матрицу user*item так, чтобы в строках этой матрицы были user_id, в столбцах - item_id, а на пересечениях строк и столбцов - доля просмотра фильма (1 будет стоять, если фильм просмотрен от начала до конца). Таким образом будет учтено качество взаимодействия юзеров с айтемами.

In [18]:
interactions_df["uid"] = interactions_df["user_id"].astype("category")
interactions_df["uid"] = interactions_df["uid"].cat.codes

interactions_df["iid"] = interactions_df["item_id"].astype("category")
interactions_df["iid"] = interactions_df["iid"].cat.codes

print(sorted(interactions_df.iid.unique())[:5])
print(sorted(interactions_df.uid.unique())[:5])
interactions_df.head()

[0, 1, 2, 3, 4]
[0, 1, 2, 3, 4]


Unnamed: 0,user_id,item_id,last_watch_dt,total_dur,watched_pct,uid,iid
0,176549,9506,2021-05-11,4250,72.0,10616,3944
1,699317,1659,2021-05-29,8317,100.0,42131,675
6,1016458,354,2021-08-14,1672,25.0,61024,139
7,884009,693,2021-08-04,703,14.0,53150,279
14,5324,8437,2021-04-18,6598,92.0,310,3485


Отнормируем матрицу взаимодействий

In [19]:
interactions_vec = np.zeros((interactions_df.uid.nunique(),
                             interactions_df.iid.nunique()))

for user_id, item_id, weight in zip(
    interactions_df.uid, interactions_df.iid, interactions_df.watched_pct
):
    interactions_vec[user_id, item_id] += weight / 100


res = interactions_vec.sum(axis=1)
for i in range(len(interactions_vec)):
    interactions_vec[i] /= res[i]

In [20]:
print(interactions_df.item_id.nunique())
print(items_ohe_df.item_id.nunique())
print(interactions_df.user_id.nunique())
print(users_ohe_df.user_id.nunique())

print(set(items_ohe_df.item_id.unique()) - set(interactions_df.item_id.unique()))

6897
6897
65974
65974
set()


Для того, чтобы можно было удобно превратить iid/uid в item_id/user_id и наоборот соберем словари

{iid: item_id}, {uid: user_id} и {item_id: iid}, {user_id: uid}.

In [21]:
iid_to_item_id = interactions_df[["iid", "item_id"]].drop_duplicates().set_index("iid").to_dict()["item_id"]
item_id_to_iid = interactions_df[["iid", "item_id"]].drop_duplicates().set_index("item_id").to_dict()["iid"]

uid_to_user_id = interactions_df[["uid", "user_id"]].drop_duplicates().set_index("uid").to_dict()["user_id"]
user_id_to_uid = interactions_df[["uid", "user_id"]].drop_duplicates().set_index("user_id").to_dict()["uid"]

И проиндексируем датасеты users_ohe_df и items_ohe_df по внутренним айди:

In [22]:
items_ohe_df["iid"] = items_ohe_df["item_id"].apply(lambda x: item_id_to_iid[x])
items_ohe_df = items_ohe_df.set_index("iid")

users_ohe_df["uid"] = users_ohe_df["user_id"].apply(lambda x: user_id_to_uid[x])
users_ohe_df = users_ohe_df.set_index("uid")

### Loss

In [23]:
def triplet_loss(y_true, y_pred, n_dims=128, alpha=0.4):
    # будем ожидать, что на вход функции прилетит три сконкатенированных
    # вектора - вектор юзера и два вектора айтема
    anchor = y_pred[:, 0:n_dims]
    positive = y_pred[:, n_dims:n_dims*2]
    negative = y_pred[:, n_dims*2:n_dims*3]

    # считаем расстояния от вектора юзера до вектора хорошего айтема
    pos_dist = K.sum(K.square(anchor - positive), axis=1)
    # и до плохого
    neg_dist = K.sum(K.square(anchor - negative), axis=1)

    # считаем лосс
    basic_loss = pos_dist - neg_dist + alpha
    loss = K.maximum(basic_loss, 0.0) # возвращаем ноль, если лосс отрицательный

    return loss

Используем текстовые описния как доп источник информации. При помощи готовых моделей с huggingface закодируем текст в вектора размерности 312. Возмем самую легкую модель rubert-tiny2

In [37]:
print(items_df['description'].shape)

(15963,)


In [40]:
test_model = SentenceTransformer('cointegrated/rubert-tiny2')
embeddings = test_model.encode(items_df['description'])

print(embeddings.shape)

(15963, 312)


In [55]:
text_features = pd.DataFrame(embeddings)
text_features = pd.concat([items_df['item_id'], text_features], axis=1)
items_ohe_df = items_ohe_df.merge(text_features, on='item_id', how='left')
items_ohe_df

Unnamed: 0,item_id,content_type_film,content_type_series,for_kids_0.0,for_kids_1.0,age_rating_0.0,age_rating_6.0,age_rating_12.0,age_rating_16.0,age_rating_18.0,...,302,303,304,305,306,307,308,309,310,311
0,9853,True,False,False,False,False,False,False,True,False,...,0.016188,0.078133,-0.037567,0.066776,-0.113581,-0.034610,0.044744,-0.036373,0.008021,-0.043091
1,3526,True,False,False,False,False,False,False,False,True,...,0.013010,0.066581,0.006887,0.058789,-0.030617,-0.021004,-0.027264,0.007896,0.009952,-0.089032
2,15076,True,False,False,False,False,False,False,False,True,...,0.041563,0.094007,0.024585,0.059866,0.000668,-0.080835,-0.068151,0.000403,0.006932,-0.008984
3,2904,True,False,False,False,False,False,False,False,True,...,0.018184,0.082696,0.003527,0.077134,-0.038041,-0.033782,-0.006754,-0.023854,0.006545,-0.013249
4,2635,True,False,False,False,False,True,False,False,False,...,-0.002527,0.051450,0.000902,0.076608,0.005378,-0.018742,0.017306,0.034842,0.034779,0.041840
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6892,15610,False,True,False,False,False,False,False,True,False,...,-0.007200,-0.011505,0.030815,0.092085,-0.045808,-0.045111,0.078325,-0.057818,-0.006877,-0.072205
6893,6443,False,True,False,False,False,False,False,True,False,...,0.048668,0.105520,-0.009835,0.051581,-0.014555,-0.013426,-0.051969,-0.039778,0.009511,-0.051055
6894,2367,False,True,True,False,False,False,False,False,True,...,0.038956,0.112002,0.050387,0.068221,0.007175,-0.000047,-0.005421,-0.050936,-0.036506,-0.033906
6895,10632,False,True,True,False,False,False,False,False,True,...,0.026992,0.109124,0.020732,0.066368,-0.016810,-0.044144,-0.041571,-0.063304,-0.057703,-0.089809


## Генератор и семплирование

Сделаем простой генератор. Он будет брать рандромного юзера, и два разных айтема - хороший пример и плохой:
- хорошим примером будет тот айтем, который был взят из датасета взаимодействий в соответствии с распределением просмотренных айтемов для этого юзера;
- а плохим айтемом будет просто любой другой _случайный айтем_*


In [56]:
def generator(items, users, interactions, batch_size=1024):
    while True:
        uid_meta = []
        uid_interaction = []
        pos = []
        neg = []
        for _ in range(batch_size):
            # берем рандомный uid
            uid_i = randint(0, interactions.shape[0]-1)
            # id хорошего айтема
            pos_i = np.random.choice(range(interactions.shape[1]), p=interactions[uid_i])
            # id плохого айтема
            neg_i = np.random.choice(range(interactions.shape[1]))
            # фичи юзера
            uid_meta.append(users.iloc[uid_i])
            # вектор айтемов, с которыми юзер взаимодействовал
            uid_interaction.append(interactions_vec[uid_i])
            # фичи хорошего айтема
            pos.append(items.iloc[pos_i])
            # фичи плохого айтема
            neg.append(items.iloc[neg_i])

        yield [np.array(uid_meta), np.array(uid_interaction), np.array(pos), np.array(neg)], [np.array(uid_meta), np.array(uid_interaction)]


In [57]:
# инициализируем генератор
gen = generator(items=items_ohe_df.drop(["item_id"], axis=1),
                users=users_ohe_df.drop(["user_id"], axis=1),
                interactions=interactions_vec)

ret = next(gen)


print(f"вектор фичей юзера: {ret[0][0].shape}")
print(f"вектор взаимодействий юзера с айтемами: {ret[0][1].shape}")
print(f"вектор 'хорошего' айтема: {ret[0][2].shape}")
print(f"вектор 'плохого' айтема: {ret[0][3].shape}")
print()
print(f"вектор фичей юзера: {ret[1][0].shape}")
print(f"вектор взаимодействий юзера с айтемами: {ret[1][1].shape}")

вектор фичей юзера: (1024, 16)
вектор взаимодействий юзера с айтемами: (1024, 6897)
вектор 'хорошего' айтема: (1024, 9020)
вектор 'плохого' айтема: (1024, 9020)

вектор фичей юзера: (1024, 16)
вектор взаимодействий юзера с айтемами: (1024, 6897)


## Модель

In [58]:
N_FACTORS = 128

ITEM_MODEL_SHAPE = (items_ohe_df.drop(["item_id"], axis=1).shape[1], )
USER_META_MODEL_SHAPE = (users_ohe_df.drop(["user_id"], axis=1).shape[1], )

USER_INTERACTION_MODEL_SHAPE = (interactions_vec.shape[1], )

print(f"N_FACTORS: {N_FACTORS}")
print(f"ITEM_MODEL_SHAPE: {ITEM_MODEL_SHAPE}")
print(f"USER_META_MODEL_SHAPE: {USER_META_MODEL_SHAPE}")
print(f"USER_INTERACTION_MODEL_SHAPE: {USER_INTERACTION_MODEL_SHAPE}")

N_FACTORS: 128
ITEM_MODEL_SHAPE: (9020,)
USER_META_MODEL_SHAPE: (16,)
USER_INTERACTION_MODEL_SHAPE: (6897,)


In [60]:
def item_model(n_factors=N_FACTORS):
    # входной слой
    inp = keras.layers.Input(shape=ITEM_MODEL_SHAPE)

    # полносвязный слой
    layer_1 = keras.layers.Dense(N_FACTORS, activation='elu', use_bias=False,
                               kernel_regularizer=keras.regularizers.l2(1e-6),
                               activity_regularizer=keras.regularizers.l2(l2=1e-6))(inp)

    # делаем residual connection - складываем два слоя,
    # чтобы градиенты не затухали во время обучения
    layer_2 = keras.layers.Dense(N_FACTORS, activation='elu', use_bias=False,
                             kernel_regularizer=keras.regularizers.l2(1e-6),
                             activity_regularizer=keras.regularizers.l2(l2=1e-6))(layer_1)

    add = keras.layers.Add()([layer_1, layer_2])

    # выходной слой
    out = keras.layers.Dense(N_FACTORS, activation='linear', use_bias=False,
                             kernel_regularizer=keras.regularizers.l2(1e-6),
                             activity_regularizer=keras.regularizers.l2(l2=1e-6))(add)

    return keras.models.Model(inp, out)


def user_model(n_factors=N_FACTORS):
    # входной слой для вектора фичей юзера (из users_ohe_df)
    inp_meta = keras.layers.Input(shape=USER_META_MODEL_SHAPE)
    # входной слой для вектора просмотров (из iteractions_vec)
    inp_interaction = keras.layers.Input(shape=USER_INTERACTION_MODEL_SHAPE)

    # полносвязный слой
    layer_1_meta = keras.layers.Dense(N_FACTORS, activation='elu', use_bias=False,
                                 kernel_regularizer=keras.regularizers.l2(1e-6),
                                 activity_regularizer=keras.regularizers.l2(l2=1e-6))(inp_meta)

    layer_1_interaction = keras.layers.Dense(N_FACTORS, activation='elu', use_bias=False,
                                 kernel_regularizer=keras.regularizers.l2(1e-6),
                                 activity_regularizer=keras.regularizers.l2(l2=1e-6))(inp_interaction)

    # делаем residual connection - складываем два слоя,
    # чтобы градиенты не затухали во время обучения
    layer_2_meta = keras.layers.Dense(N_FACTORS, activation='elu', use_bias=False,
                                 kernel_regularizer=keras.regularizers.l2(1e-6),
                                 activity_regularizer=keras.regularizers.l2(l2=1e-6))(layer_1_meta)


    add = keras.layers.Add()([layer_1_meta, layer_2_meta])

    # конкатенируем вектор фичей с вектором просмотров
    concat_meta_interaction = keras.layers.Concatenate()([add, layer_1_interaction])

    # выходной слой
    out = keras.layers.Dense(N_FACTORS, activation='linear', use_bias=False,
                             kernel_regularizer=keras.regularizers.l2(1e-6),
                             activity_regularizer=keras.regularizers.l2(l2=1e-6))(concat_meta_interaction)

    return keras.models.Model([inp_meta, inp_interaction], out)

# инициализируем модели юзера и айтема
i2v = item_model()
u2v = user_model()

# вход для вектора фичей юзера (из users_ohe_df)
ancor_meta_in = keras.layers.Input(shape=USER_META_MODEL_SHAPE)
# вход для вектора просмотра юзера (из interactions_vec)
ancor_interaction_in = keras.layers.Input(shape=USER_INTERACTION_MODEL_SHAPE)

# вход для вектора "хорошего" айтема
pos_in = keras.layers.Input(shape=ITEM_MODEL_SHAPE)
# вход для вектора "плохого" айтема
neg_in = keras.layers.Input(shape=ITEM_MODEL_SHAPE)

# получаем вектор юзера
ancor = u2v([ancor_meta_in, ancor_interaction_in])
# получаем вектор "хорошего" айтема
pos = i2v(pos_in)
# получаем вектор "плохого" айтема
neg = i2v(neg_in)

# конкатенируем полученные векторы
res = keras.layers.Concatenate(name="concat_ancor_pos_neg")([ancor, pos, neg])

# собираем модель
model = keras.models.Model([ancor_meta_in, ancor_interaction_in, pos_in, neg_in], res)

2024-03-11 00:50:31.777470: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1960] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [61]:
model_name = 'recsys_resnet_linear'

# логируем процесс обучения в тензорборд
t_board = keras.callbacks.TensorBoard(log_dir=f'runs/{model_name}')

# уменьшаем learning_rate, если лосс долго не уменьшается (в течение двух эпох)
decay = keras.callbacks.ReduceLROnPlateau(monitor='loss', patience=2, factor=0.8, verbose=1)

# сохраняем модель после каждой эпохи, если лосс уменьшился
check = keras.callbacks.ModelCheckpoint(filepath=model_name + '/epoch{epoch}-{loss:.2f}.h5', monitor="loss")


In [62]:
# компилируем модель, используем оптимайзер Adam и triplet loss
opt = keras.optimizers.Adam(learning_rate=0.001)
model.compile(loss=triplet_loss, optimizer=opt)

In [63]:
# модель айтема
item_model().summary()

Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_8 (InputLayer)        [(None, 9020)]               0         []                            
                                                                                                  
 dense_7 (Dense)             (None, 128)                  1154560   ['input_8[0][0]']             
                                                                                                  
 dense_8 (Dense)             (None, 128)                  16384     ['dense_7[0][0]']             
                                                                                                  
 add_2 (Add)                 (None, 128)                  0         ['dense_7[0][0]',             
                                                                     'dense_8[0][0]']       

In [64]:
# модель юзера
user_model().summary()

Model: "model_4"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_9 (InputLayer)        [(None, 16)]                 0         []                            
                                                                                                  
 dense_10 (Dense)            (None, 128)                  2048      ['input_9[0][0]']             
                                                                                                  
 dense_12 (Dense)            (None, 128)                  16384     ['dense_10[0][0]']            
                                                                                                  
 input_10 (InputLayer)       [(None, 6897)]               0         []                            
                                                                                            

In [65]:
# общая модель
model.summary()

Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_4 (InputLayer)        [(None, 16)]                 0         []                            
                                                                                                  
 input_5 (InputLayer)        [(None, 6897)]               0         []                            
                                                                                                  
 input_6 (InputLayer)        [(None, 9020)]               0         []                            
                                                                                                  
 input_7 (InputLayer)        [(None, 9020)]               0         []                            
                                                                                            

In [49]:
model.fit(generator(items=items_ohe_df.drop(["item_id"], axis=1),
                    users=users_ohe_df.drop(["user_id"], axis=1),
                    interactions=interactions_vec,
                    batch_size=120),
          steps_per_epoch=100,
          epochs=30,
          initial_epoch=0,
          callbacks=[decay, t_board, check]
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 20: ReduceLROnPlateau reducing learning rate to 0.000800000037997961.
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 27: ReduceLROnPlateau reducing learning rate to 0.0006400000303983689.
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.src.callbacks.History at 0x7de70a2eb340>

In [50]:
# берем рандомного юзера
rand_uid = np.random.choice(list(users_ohe_df.index))

# получаем фичи юзера и вектор его просмотров айтемов
user_meta_feats = users_ohe_df.drop(["user_id"], axis=1).iloc[rand_uid]
user_interaction_vec = interactions_vec[rand_uid]

# берем рандомный айтем
rand_iid = np.random.choice(list(items_ohe_df.index))
# получаем фичи айтема
item_feats = items_ohe_df.drop(["item_id"], axis=1).iloc[rand_iid]

# получаем вектор юзера
user_vec = u2v.predict([np.array(user_meta_feats).reshape(1, -1),
                        np.array(user_interaction_vec).reshape(1, -1)])

# и вектор айтема
item_vec = i2v.predict(np.array(item_feats).reshape(1, -1))

# считаем расстояние между вектором юзера и вектором айтема
euclidean_distances(user_vec, item_vec)



array([[1.3203154]], dtype=float32)

Заранее посчитаем рекомендации и сохраним их в файл

In [51]:
# получаем фичи всех айтемов
items_feats = items_ohe_df.drop(["item_id"], axis=1).to_numpy(dtype=np.float32)
# получаем векторы всех айтемов
items_vecs = i2v.predict(items_feats)

# считаем расстояния
dists = euclidean_distances(user_vec, items_vecs)



In [89]:
def batch_recommend(user_ids, items_vecs):
  uids = [user_id_to_uid[user_id] for user_id in user_ids]
  users_meta_feats = users_ohe_df.drop(["user_id"], axis=1).iloc[uids]
  users_interaction_vec = interactions_vec[uids]

  users_vecs = u2v.predict([np.array(users_meta_feats), np.array(users_interaction_vec)])

  dists = euclidean_distances(users_vecs, items_vecs)
  top10_iids = np.argsort(dists, axis=1)[:,:10]
  top10_iids_items = [iid_to_item_id[iid] for iid in top10_iids.reshape(-1)]
  top10_iids_items = np.array(top10_iids_items).reshape(top10_iids.shape).tolist()
  user_ids = [int(user_id) for user_id in user_ids]
  return dict(zip(user_ids, top10_iids_items))

In [71]:
recos = {}
for user_ids in tqdm(np.array_split(list(user_id_to_uid.keys()), 300)):
    recos_for_users = batch_recommend(user_ids, items_vecs)
    recos.update(recos_for_users)
    
with open("dssm_recos.pkl", "wb") as reco_file:
    pickle.dump(recos, reco_file)

  0%|          | 0/300 [00:00<?, ?it/s]



  0%|          | 1/300 [00:00<01:17,  3.85it/s]



  1%|          | 2/300 [00:00<01:16,  3.89it/s]



  1%|          | 3/300 [00:00<01:14,  4.01it/s]



  1%|▏         | 4/300 [00:01<01:17,  3.82it/s]



  2%|▏         | 5/300 [00:01<01:15,  3.91it/s]



  2%|▏         | 6/300 [00:01<01:17,  3.79it/s]



  2%|▏         | 7/300 [00:01<01:15,  3.88it/s]



  3%|▎         | 8/300 [00:02<01:13,  3.97it/s]



  3%|▎         | 9/300 [00:02<01:13,  3.98it/s]



  3%|▎         | 10/300 [00:02<01:14,  3.87it/s]



  4%|▎         | 11/300 [00:02<01:12,  3.98it/s]



  4%|▍         | 12/300 [00:03<01:10,  4.07it/s]



  4%|▍         | 13/300 [00:03<01:16,  3.77it/s]



  5%|▍         | 14/300 [00:03<01:20,  3.56it/s]



  5%|▌         | 15/300 [00:04<01:26,  3.28it/s]



  5%|▌         | 16/300 [00:04<01:28,  3.21it/s]



  6%|▌         | 17/300 [00:04<01:34,  3.00it/s]



  6%|▌         | 18/300 [00:05<01:38,  2.87it/s]



  6%|▋         | 19/300 [00:05<01:37,  2.88it/s]



  7%|▋         | 20/300 [00:05<01:38,  2.85it/s]



  7%|▋         | 21/300 [00:06<01:42,  2.73it/s]



  7%|▋         | 22/300 [00:06<01:35,  2.90it/s]



  8%|▊         | 23/300 [00:06<01:39,  2.79it/s]



  8%|▊         | 24/300 [00:07<01:35,  2.88it/s]



  8%|▊         | 25/300 [00:07<01:34,  2.91it/s]



  9%|▊         | 26/300 [00:07<01:32,  2.95it/s]



  9%|▉         | 27/300 [00:08<01:38,  2.78it/s]



  9%|▉         | 28/300 [00:08<01:29,  3.05it/s]



 10%|▉         | 29/300 [00:08<01:22,  3.29it/s]



 10%|█         | 30/300 [00:09<01:20,  3.37it/s]



 10%|█         | 31/300 [00:09<01:14,  3.59it/s]



 11%|█         | 32/300 [00:09<01:11,  3.74it/s]



 11%|█         | 33/300 [00:09<01:12,  3.69it/s]



 11%|█▏        | 34/300 [00:10<01:12,  3.68it/s]



 12%|█▏        | 35/300 [00:10<01:08,  3.84it/s]



 12%|█▏        | 36/300 [00:10<01:08,  3.86it/s]



 12%|█▏        | 37/300 [00:10<01:06,  3.97it/s]



 13%|█▎        | 38/300 [00:11<01:05,  4.02it/s]



 13%|█▎        | 39/300 [00:11<01:04,  4.06it/s]



 13%|█▎        | 40/300 [00:11<01:08,  3.82it/s]



 14%|█▎        | 41/300 [00:11<01:08,  3.77it/s]



 14%|█▍        | 42/300 [00:12<01:09,  3.71it/s]



 14%|█▍        | 43/300 [00:12<01:08,  3.73it/s]



 15%|█▍        | 44/300 [00:12<01:10,  3.64it/s]



 15%|█▌        | 45/300 [00:13<01:10,  3.62it/s]



 15%|█▌        | 46/300 [00:13<01:13,  3.47it/s]



 16%|█▌        | 47/300 [00:13<01:18,  3.24it/s]



 16%|█▌        | 48/300 [00:13<01:14,  3.37it/s]



 16%|█▋        | 49/300 [00:14<01:10,  3.58it/s]



 17%|█▋        | 50/300 [00:14<01:07,  3.71it/s]



 17%|█▋        | 51/300 [00:14<01:06,  3.74it/s]



 17%|█▋        | 52/300 [00:14<01:03,  3.89it/s]



 18%|█▊        | 53/300 [00:15<01:05,  3.80it/s]



 18%|█▊        | 54/300 [00:15<01:02,  3.91it/s]



 18%|█▊        | 55/300 [00:15<01:04,  3.79it/s]



 19%|█▊        | 56/300 [00:15<01:02,  3.91it/s]



 19%|█▉        | 57/300 [00:16<01:01,  3.94it/s]



 19%|█▉        | 58/300 [00:16<01:00,  4.01it/s]



 20%|█▉        | 59/300 [00:16<01:02,  3.87it/s]



 20%|██        | 60/300 [00:17<01:03,  3.81it/s]



 20%|██        | 61/300 [00:17<01:01,  3.89it/s]



 21%|██        | 62/300 [00:17<00:59,  4.00it/s]



 21%|██        | 63/300 [00:17<00:59,  3.99it/s]



 21%|██▏       | 64/300 [00:17<00:58,  4.04it/s]



 22%|██▏       | 65/300 [00:18<00:57,  4.08it/s]



 22%|██▏       | 66/300 [00:18<00:58,  4.00it/s]



 22%|██▏       | 67/300 [00:18<01:04,  3.61it/s]



 23%|██▎       | 68/300 [00:19<01:07,  3.41it/s]



 23%|██▎       | 69/300 [00:19<01:11,  3.23it/s]



 23%|██▎       | 70/300 [00:19<01:13,  3.11it/s]



 24%|██▎       | 71/300 [00:20<01:12,  3.18it/s]



 24%|██▍       | 72/300 [00:20<01:13,  3.11it/s]



 24%|██▍       | 73/300 [00:20<01:12,  3.13it/s]



 25%|██▍       | 74/300 [00:21<01:12,  3.11it/s]



 25%|██▌       | 75/300 [00:21<01:14,  3.03it/s]



 25%|██▌       | 76/300 [00:21<01:13,  3.04it/s]



 26%|██▌       | 77/300 [00:22<01:16,  2.91it/s]



 26%|██▌       | 78/300 [00:22<01:19,  2.80it/s]



 26%|██▋       | 79/300 [00:22<01:21,  2.71it/s]



 27%|██▋       | 80/300 [00:23<01:21,  2.69it/s]



 27%|██▋       | 81/300 [00:23<01:14,  2.94it/s]



 27%|██▋       | 82/300 [00:23<01:07,  3.22it/s]



 28%|██▊       | 83/300 [00:24<01:03,  3.42it/s]



 28%|██▊       | 84/300 [00:24<01:00,  3.59it/s]



 28%|██▊       | 85/300 [00:24<00:59,  3.61it/s]



 29%|██▊       | 86/300 [00:24<00:59,  3.57it/s]



 29%|██▉       | 87/300 [00:25<00:58,  3.67it/s]



 29%|██▉       | 88/300 [00:25<00:58,  3.65it/s]



 30%|██▉       | 89/300 [00:25<00:58,  3.61it/s]



 30%|███       | 90/300 [00:25<00:55,  3.77it/s]



 30%|███       | 91/300 [00:26<00:55,  3.79it/s]



 31%|███       | 92/300 [00:26<00:55,  3.75it/s]



 31%|███       | 93/300 [00:26<00:56,  3.67it/s]



 31%|███▏      | 94/300 [00:27<00:56,  3.65it/s]



 32%|███▏      | 95/300 [00:27<00:54,  3.74it/s]



 32%|███▏      | 96/300 [00:27<00:52,  3.88it/s]



 32%|███▏      | 97/300 [00:27<00:51,  3.95it/s]



 33%|███▎      | 98/300 [00:28<00:52,  3.85it/s]



 33%|███▎      | 99/300 [00:28<00:53,  3.75it/s]



 33%|███▎      | 100/300 [00:28<00:51,  3.85it/s]



 34%|███▎      | 101/300 [00:28<00:53,  3.75it/s]



 34%|███▍      | 102/300 [00:29<00:50,  3.89it/s]



 34%|███▍      | 103/300 [00:29<00:52,  3.78it/s]



 35%|███▍      | 104/300 [00:29<00:52,  3.74it/s]



 35%|███▌      | 105/300 [00:29<00:50,  3.87it/s]



 35%|███▌      | 106/300 [00:30<00:51,  3.79it/s]



 36%|███▌      | 107/300 [00:30<00:51,  3.73it/s]



 36%|███▌      | 108/300 [00:30<00:50,  3.77it/s]



 36%|███▋      | 109/300 [00:30<00:49,  3.87it/s]



 37%|███▋      | 110/300 [00:31<00:50,  3.76it/s]



 37%|███▋      | 111/300 [00:31<00:50,  3.74it/s]



 37%|███▋      | 112/300 [00:31<00:50,  3.69it/s]



 38%|███▊      | 113/300 [00:32<00:51,  3.66it/s]



 38%|███▊      | 114/300 [00:32<00:51,  3.59it/s]



 38%|███▊      | 115/300 [00:32<00:49,  3.75it/s]



 39%|███▊      | 116/300 [00:32<00:47,  3.89it/s]



 39%|███▉      | 117/300 [00:33<00:45,  3.98it/s]



 39%|███▉      | 118/300 [00:33<00:45,  4.01it/s]



 40%|███▉      | 119/300 [00:33<00:45,  3.97it/s]



 40%|████      | 120/300 [00:33<00:52,  3.44it/s]



 40%|████      | 121/300 [00:34<00:55,  3.25it/s]



 41%|████      | 122/300 [00:34<00:58,  3.03it/s]



 41%|████      | 123/300 [00:35<01:01,  2.87it/s]



 41%|████▏     | 124/300 [00:35<01:01,  2.89it/s]



 42%|████▏     | 125/300 [00:35<01:02,  2.79it/s]



 42%|████▏     | 126/300 [00:36<00:59,  2.91it/s]



 42%|████▏     | 127/300 [00:36<00:58,  2.95it/s]



 43%|████▎     | 128/300 [00:36<00:56,  3.07it/s]



 43%|████▎     | 129/300 [00:37<00:57,  2.98it/s]



 43%|████▎     | 130/300 [00:37<00:58,  2.89it/s]



 44%|████▎     | 131/300 [00:37<00:57,  2.96it/s]



 44%|████▍     | 132/300 [00:38<00:56,  2.98it/s]



 44%|████▍     | 133/300 [00:38<00:56,  2.95it/s]



 45%|████▍     | 134/300 [00:38<00:58,  2.84it/s]



 45%|████▌     | 135/300 [00:39<00:53,  3.11it/s]



 45%|████▌     | 136/300 [00:39<00:51,  3.22it/s]



 46%|████▌     | 137/300 [00:39<00:49,  3.28it/s]



 46%|████▌     | 138/300 [00:39<00:46,  3.51it/s]



 46%|████▋     | 139/300 [00:40<00:43,  3.69it/s]



 47%|████▋     | 140/300 [00:40<00:41,  3.83it/s]



 47%|████▋     | 141/300 [00:40<00:42,  3.72it/s]



 47%|████▋     | 142/300 [00:40<00:42,  3.69it/s]



 48%|████▊     | 143/300 [00:41<00:41,  3.81it/s]



 48%|████▊     | 144/300 [00:41<00:39,  3.93it/s]



 48%|████▊     | 145/300 [00:41<00:41,  3.77it/s]



 49%|████▊     | 146/300 [00:41<00:39,  3.89it/s]



 49%|████▉     | 147/300 [00:42<00:38,  3.98it/s]



 49%|████▉     | 148/300 [00:42<00:39,  3.86it/s]



 50%|████▉     | 149/300 [00:42<00:38,  3.90it/s]



 50%|█████     | 150/300 [00:42<00:37,  3.96it/s]



 50%|█████     | 151/300 [00:43<00:38,  3.86it/s]



 51%|█████     | 152/300 [00:43<00:39,  3.78it/s]



 51%|█████     | 153/300 [00:43<00:39,  3.71it/s]



 51%|█████▏    | 154/300 [00:44<00:39,  3.67it/s]



 52%|█████▏    | 155/300 [00:44<00:39,  3.67it/s]



 52%|█████▏    | 156/300 [00:44<00:39,  3.66it/s]



 52%|█████▏    | 157/300 [00:44<00:38,  3.72it/s]



 53%|█████▎    | 158/300 [00:45<00:36,  3.85it/s]



 53%|█████▎    | 159/300 [00:45<00:35,  3.96it/s]



 53%|█████▎    | 160/300 [00:45<00:36,  3.83it/s]



 54%|█████▎    | 161/300 [00:45<00:35,  3.91it/s]



 54%|█████▍    | 162/300 [00:46<00:36,  3.80it/s]



 54%|█████▍    | 163/300 [00:46<00:36,  3.74it/s]



 55%|█████▍    | 164/300 [00:46<00:35,  3.85it/s]



 55%|█████▌    | 165/300 [00:46<00:36,  3.72it/s]



 55%|█████▌    | 166/300 [00:47<00:36,  3.65it/s]



 56%|█████▌    | 167/300 [00:47<00:35,  3.78it/s]



 56%|█████▌    | 168/300 [00:47<00:34,  3.88it/s]



 56%|█████▋    | 169/300 [00:47<00:33,  3.90it/s]



 57%|█████▋    | 170/300 [00:48<00:34,  3.78it/s]



 57%|█████▋    | 171/300 [00:48<00:33,  3.90it/s]



 57%|█████▋    | 172/300 [00:48<00:32,  3.93it/s]



 58%|█████▊    | 173/300 [00:49<00:33,  3.82it/s]



 58%|█████▊    | 174/300 [00:49<00:37,  3.35it/s]



 58%|█████▊    | 175/300 [00:49<00:39,  3.17it/s]



 59%|█████▊    | 176/300 [00:50<00:41,  3.00it/s]



 59%|█████▉    | 177/300 [00:50<00:41,  2.97it/s]



 59%|█████▉    | 178/300 [00:50<00:39,  3.10it/s]



 60%|█████▉    | 179/300 [00:51<00:41,  2.95it/s]



 60%|██████    | 180/300 [00:51<00:41,  2.93it/s]



 60%|██████    | 181/300 [00:51<00:40,  2.91it/s]



 61%|██████    | 182/300 [00:52<00:39,  2.97it/s]



 61%|██████    | 183/300 [00:52<00:38,  3.00it/s]



 61%|██████▏   | 184/300 [00:52<00:40,  2.88it/s]



 62%|██████▏   | 185/300 [00:53<00:41,  2.80it/s]



 62%|██████▏   | 186/300 [00:53<00:42,  2.69it/s]



 62%|██████▏   | 187/300 [00:54<00:41,  2.75it/s]



 63%|██████▎   | 188/300 [00:54<00:37,  2.95it/s]



 63%|██████▎   | 189/300 [00:54<00:34,  3.23it/s]



 63%|██████▎   | 190/300 [00:54<00:33,  3.32it/s]



 64%|██████▎   | 191/300 [00:55<00:30,  3.54it/s]



 64%|██████▍   | 192/300 [00:55<00:29,  3.69it/s]



 64%|██████▍   | 193/300 [00:55<00:29,  3.61it/s]



 65%|██████▍   | 194/300 [00:55<00:29,  3.58it/s]



 65%|██████▌   | 195/300 [00:56<00:28,  3.69it/s]



 65%|██████▌   | 196/300 [00:56<00:28,  3.66it/s]



 66%|██████▌   | 197/300 [00:56<00:27,  3.77it/s]



 66%|██████▌   | 198/300 [00:56<00:26,  3.88it/s]



 66%|██████▋   | 199/300 [00:57<00:25,  3.94it/s]



 67%|██████▋   | 200/300 [00:57<00:26,  3.84it/s]



 67%|██████▋   | 201/300 [00:57<00:25,  3.91it/s]



 67%|██████▋   | 202/300 [00:57<00:24,  4.01it/s]



 68%|██████▊   | 203/300 [00:58<00:25,  3.87it/s]



 68%|██████▊   | 204/300 [00:58<00:24,  3.94it/s]



 68%|██████▊   | 205/300 [00:58<00:24,  3.85it/s]



 69%|██████▊   | 206/300 [00:58<00:23,  3.95it/s]



 69%|██████▉   | 207/300 [00:59<00:23,  4.02it/s]



 69%|██████▉   | 208/300 [00:59<00:23,  3.99it/s]



 70%|██████▉   | 209/300 [00:59<00:23,  3.85it/s]



 70%|███████   | 210/300 [00:59<00:23,  3.76it/s]



 70%|███████   | 211/300 [01:00<00:22,  3.88it/s]



 71%|███████   | 212/300 [01:00<00:22,  3.95it/s]



 71%|███████   | 213/300 [01:00<00:22,  3.79it/s]



 71%|███████▏  | 214/300 [01:01<00:22,  3.74it/s]



 72%|███████▏  | 215/300 [01:01<00:22,  3.79it/s]



 72%|███████▏  | 216/300 [01:01<00:21,  3.91it/s]



 72%|███████▏  | 217/300 [01:01<00:20,  3.98it/s]



 73%|███████▎  | 218/300 [01:01<00:20,  4.04it/s]



 73%|███████▎  | 219/300 [01:02<00:20,  3.90it/s]



 73%|███████▎  | 220/300 [01:02<00:20,  3.99it/s]



 74%|███████▎  | 221/300 [01:02<00:20,  3.88it/s]



 74%|███████▍  | 222/300 [01:03<00:20,  3.80it/s]



 74%|███████▍  | 223/300 [01:03<00:19,  3.86it/s]



 75%|███████▍  | 224/300 [01:03<00:20,  3.80it/s]



 75%|███████▌  | 225/300 [01:03<00:20,  3.74it/s]



 75%|███████▌  | 226/300 [01:04<00:20,  3.62it/s]



 76%|███████▌  | 227/300 [01:04<00:22,  3.25it/s]



 76%|███████▌  | 228/300 [01:04<00:22,  3.25it/s]



 76%|███████▋  | 229/300 [01:05<00:21,  3.27it/s]



 77%|███████▋  | 230/300 [01:05<00:22,  3.13it/s]



 77%|███████▋  | 231/300 [01:05<00:21,  3.16it/s]



 77%|███████▋  | 232/300 [01:06<00:21,  3.14it/s]



 78%|███████▊  | 233/300 [01:06<00:21,  3.07it/s]



 78%|███████▊  | 234/300 [01:06<00:21,  3.00it/s]



 78%|███████▊  | 235/300 [01:07<00:22,  2.89it/s]



 79%|███████▊  | 236/300 [01:07<00:22,  2.90it/s]



 79%|███████▉  | 237/300 [01:07<00:21,  2.91it/s]



 79%|███████▉  | 238/300 [01:08<00:20,  2.99it/s]



 80%|███████▉  | 239/300 [01:08<00:20,  2.91it/s]



 80%|████████  | 240/300 [01:08<00:21,  2.85it/s]



 80%|████████  | 241/300 [01:09<00:20,  2.86it/s]



 81%|████████  | 242/300 [01:09<00:19,  3.05it/s]



 81%|████████  | 243/300 [01:09<00:17,  3.30it/s]



 81%|████████▏ | 244/300 [01:10<00:15,  3.51it/s]



 82%|████████▏ | 245/300 [01:10<00:14,  3.71it/s]



 82%|████████▏ | 246/300 [01:10<00:14,  3.66it/s]



 82%|████████▏ | 247/300 [01:10<00:14,  3.63it/s]



 83%|████████▎ | 248/300 [01:11<00:14,  3.64it/s]



 83%|████████▎ | 249/300 [01:11<00:14,  3.63it/s]



 83%|████████▎ | 250/300 [01:11<00:13,  3.70it/s]



 84%|████████▎ | 251/300 [01:11<00:13,  3.63it/s]



 84%|████████▍ | 252/300 [01:12<00:13,  3.64it/s]



 84%|████████▍ | 253/300 [01:12<00:12,  3.69it/s]



 85%|████████▍ | 254/300 [01:12<00:12,  3.63it/s]



 85%|████████▌ | 255/300 [01:12<00:11,  3.80it/s]



 85%|████████▌ | 256/300 [01:13<00:11,  3.93it/s]



 86%|████████▌ | 257/300 [01:13<00:11,  3.84it/s]



 86%|████████▌ | 258/300 [01:13<00:11,  3.73it/s]



 86%|████████▋ | 259/300 [01:14<00:11,  3.70it/s]



 87%|████████▋ | 260/300 [01:14<00:10,  3.68it/s]



 87%|████████▋ | 261/300 [01:14<00:10,  3.66it/s]



 87%|████████▋ | 262/300 [01:14<00:10,  3.63it/s]



 88%|████████▊ | 263/300 [01:15<00:10,  3.63it/s]



 88%|████████▊ | 264/300 [01:15<00:12,  2.99it/s]



 88%|████████▊ | 265/300 [01:15<00:11,  3.02it/s]



 89%|████████▊ | 266/300 [01:16<00:10,  3.29it/s]



 89%|████████▉ | 267/300 [01:16<00:09,  3.51it/s]



 89%|████████▉ | 268/300 [01:16<00:09,  3.50it/s]



 90%|████████▉ | 269/300 [01:17<00:08,  3.55it/s]



 90%|█████████ | 270/300 [01:17<00:08,  3.56it/s]



 90%|█████████ | 271/300 [01:17<00:07,  3.72it/s]



 91%|█████████ | 272/300 [01:17<00:07,  3.55it/s]



 91%|█████████ | 273/300 [01:18<00:07,  3.55it/s]



 91%|█████████▏| 274/300 [01:18<00:07,  3.57it/s]



 92%|█████████▏| 275/300 [01:18<00:06,  3.70it/s]



 92%|█████████▏| 276/300 [01:18<00:06,  3.68it/s]



 92%|█████████▏| 277/300 [01:19<00:06,  3.67it/s]



 93%|█████████▎| 278/300 [01:19<00:06,  3.55it/s]



 93%|█████████▎| 279/300 [01:19<00:06,  3.31it/s]



 93%|█████████▎| 280/300 [01:20<00:06,  3.14it/s]



 94%|█████████▎| 281/300 [01:20<00:06,  2.98it/s]



 94%|█████████▍| 282/300 [01:20<00:06,  2.94it/s]



 94%|█████████▍| 283/300 [01:21<00:05,  2.94it/s]



 95%|█████████▍| 284/300 [01:21<00:05,  3.01it/s]



 95%|█████████▌| 285/300 [01:21<00:05,  2.91it/s]



 95%|█████████▌| 286/300 [01:22<00:04,  2.99it/s]



 96%|█████████▌| 287/300 [01:22<00:04,  2.95it/s]



 96%|█████████▌| 288/300 [01:22<00:04,  2.95it/s]



 96%|█████████▋| 289/300 [01:23<00:03,  2.89it/s]



 97%|█████████▋| 290/300 [01:23<00:03,  2.83it/s]



 97%|█████████▋| 291/300 [01:24<00:03,  2.73it/s]



 97%|█████████▋| 292/300 [01:24<00:02,  2.78it/s]



 98%|█████████▊| 293/300 [01:24<00:02,  3.03it/s]



 98%|█████████▊| 294/300 [01:24<00:01,  3.18it/s]



 98%|█████████▊| 295/300 [01:25<00:01,  3.38it/s]



 99%|█████████▊| 296/300 [01:25<00:01,  3.38it/s]



 99%|█████████▉| 297/300 [01:25<00:00,  3.57it/s]



 99%|█████████▉| 298/300 [01:26<00:00,  3.58it/s]



100%|█████████▉| 299/300 [01:26<00:00,  3.72it/s]



100%|██████████| 300/300 [01:26<00:00,  3.47it/s]
