In [None]:
!pip -q install rectools==0.4.2
!pip -q install lightfm

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m102.5/102.5 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.9/8.9 MB[0m [31m34.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m316.4/316.4 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for lightfm (setup.py) ... [?25l[?25hdone


In [None]:
import pickle
import typing as tp
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from lightfm import LightFM
from rectools import Columns
from rectools.dataset import Dataset
from rectools.metrics import (
    MAP,
    NDCG,
    MeanInvUserFreq,
    Precision,
    Recall,
    Serendipity,
    calc_metrics,
)
from rectools.model_selection import TimeRangeSplitter, cross_validate
from rectools.models import LightFMWrapperModel, PopularModel
from tqdm import tqdm

In [None]:
users = pd.read_csv("users.csv")
items = pd.read_csv("items.csv")
interactions = pd.read_csv("interactions.csv")

In [None]:
interactions.count()

user_id          5476251
item_id          5476251
last_watch_dt    5476251
total_dur        5476251
watched_pct      5475423
dtype: int64

In [None]:
Columns.Datetime = "last_watch_dt"
interactions.drop(interactions[interactions[Columns.Datetime].str.len() != 10].index, inplace=True)
interactions[Columns.Datetime] = pd.to_datetime(interactions[Columns.Datetime], format="%Y-%m-%d")
max_date = interactions[Columns.Datetime].max()
interactions[Columns.Weight] = np.where(interactions["watched_pct"] > 10, 3, 1)

In [None]:
max_date = interactions[Columns.Datetime].max()
min_date = interactions[Columns.Datetime].min()
ranker_days_count = 30

interactions = interactions[
    (interactions[Columns.Datetime] < max_date - pd.Timedelta(days=ranker_days_count))
]

In [None]:
dataset = Dataset.construct(
    interactions_df=interactions
)

In [None]:
model = LightFMWrapperModel(
            LightFM(
                no_components=64,
                learning_rate=0.018752552061051517,
                user_alpha=1.7865068939394515e-06,
                item_alpha=1.930834633424772e-07,
                loss='warp',
                random_state=42,
            ),
            epochs=2,
            num_threads=32,
        )

model.fit(dataset)

<rectools.models.lightfm.LightFMWrapperModel at 0x79bc5780ab60>

In [None]:
top_N = 50
candidates = model.recommend(dataset.user_id_map.external_ids, dataset, top_N, True)

In [None]:
candidates = candidates.rename({"rank": "lfm_rank", "score": "lfm_score"}, axis=1)
candidates.head()

Unnamed: 0,user_id,item_id,lfm_score,lfm_rank
0,176549,7571,3.588528,1
1,176549,13865,3.561206,2
2,176549,3734,3.449157,3
3,176549,10440,3.278818,4
4,176549,12173,3.218931,5


In [None]:
candidates.to_csv("lfm_preds.csv", index=False)