## Imports

In [None]:
import warnings
warnings.filterwarnings("ignore")

from implicit.nearest_neighbours import BM25Recommender
from rectools.dataset import Dataset
from rectools import Columns
from rectools.model_selection import TimeRangeSplit
import pandas as pd


from service.models.userknn import UserKnn
from service.utils import save_model, load_model

## Data

In [None]:
!mkdir ../data
!wget https://storage.yandexcloud.net/itmo-recsys-public-data/kion_train.zip -O ../data/data_original.zip
!unzip ../data/data_original.zip -d ../data

In [None]:
interactions = pd.read_csv('../data/kion_train/interactions.csv')
users = pd.read_csv('../data/kion_train/users.csv')
items = pd.read_csv('../data/kion_train/items.csv')


# rename columns, convert timestamp
interactions.rename(columns={'last_watch_dt': Columns.Datetime,
                            'total_dur': Columns.Weight},
                    inplace=True)

interactions['datetime'] = pd.to_datetime(interactions['datetime'])


## Train test split

In [None]:
n_folds = 1
unit = "W"
n_units = 1
periods = n_folds + 1

last_date = interactions[Columns.Datetime].max().normalize()
start_date = last_date - pd.Timedelta(n_folds * n_units + 1, unit=unit)  # TimeDelta возвращает длительность промежутка между датами
print(f"Start date and last date of the test fold: {start_date, last_date}")

date_range = pd.date_range(start=start_date, periods=periods, freq=unit, tz=last_date.tz)
print(f"Test fold borders: {date_range.values.astype('datetime64[D]')}")

# generator of folds
cv = TimeRangeSplit(
    date_range=date_range,
    filter_already_seen=True,
    filter_cold_items=True,
    filter_cold_users=True,
)
print(f"Real number of folds: {cv.get_n_splits(interactions)}")

In [None]:
(train_ids, test_ids, fold_info) = cv.split(interactions, collect_fold_stats=True).__next__()

In [None]:
train = interactions.loc[train_ids].reset_index(drop=True)
test = interactions.loc[test_ids].reset_index(drop=True)

## Train Model

In [None]:
model = UserKnn(model=BM25Recommender(K=10, K1=2), n_neighbors=10)

In [None]:
train_copy = train.copy()
train_copy = train_copy
model.fit(train_copy)

In [None]:
test = pd.DataFrame({'user_id': [964868]})
model.predict(test=test)

In [None]:
save_model(model_path='../models/userknn.pickle', model=model.model)

reco_model = load_model(model_path='../models/userknn.pickle')

In [None]:
model.predict(test=test)