# Imports

In [31]:
import pandas as pd
from rectools import Columns
from rectools.models.popular import PopularModel
from rectools.dataset import Dataset

# Loading data

In [6]:
interactions = pd.read_csv("data/interactions.csv")
items = pd.read_csv("data/items.csv")
users = pd.read_csv("data/users.csv") 

In [41]:
sample_submission = pd.read_csv("data/sample_submission.csv")

In [42]:
sample_submission

Unnamed: 0,user_id,item_id
0,3,"[9728, 15297, 10440, 14488, 13865, 12192, 341,..."
1,11,"[9728, 15297, 10440, 14488, 13865, 12192, 341,..."
2,29,"[9728, 15297, 10440, 14488, 13865, 12192, 341,..."
3,30,"[9728, 15297, 10440, 14488, 13865, 12192, 341,..."
4,33,"[9728, 15297, 10440, 14488, 13865, 12192, 341,..."
...,...,...
193108,1097527,"[9728, 15297, 10440, 14488, 13865, 12192, 341,..."
193109,1097537,"[9728, 15297, 10440, 14488, 13865, 12192, 341,..."
193110,1097538,"[9728, 15297, 10440, 14488, 13865, 12192, 341,..."
193111,1097544,"[9728, 15297, 10440, 14488, 13865, 12192, 341,..."


In [9]:
interactions.rename(
    columns={
        'track_id' : Columns.Item,
        'last_watch_dt' : Columns.Datetime,
        'total_dur' : Columns.Weight
    },
    inplace=True
)

In [46]:
1097558 in users.user_id

False

In [17]:
_, bins = pd.qcut(items["release_year"], 10, retbins=True)
labels = bins[:-1]

year_feature = pd.DataFrame(
    {
        Columns.Item: items[Columns.Item],
        "value": pd.cut(items["release_year"], bins=bins, labels=bins[:-1]),
        "feature": "release_year",
    }
)
year_feature.head()

Unnamed: 0,item_id,value,feature
0,10711,1983.0,release_year
1,2508,2012.0,release_year
2,10716,2009.0,release_year
3,7868,2014.0,release_year
4,16268,1897.0,release_year


In [24]:
items['genre'] = items['genres'].str.split(",")

In [25]:
genre_feature = items[[Columns.Item, "genre"]].explode("genre")
genre_feature.columns = [Columns.Item, "value"]
genre_feature["feature"] = "genre"
genre_feature.head()

Unnamed: 0,item_id,value,feature
0,10711,драмы,genre
0,10711,зарубежные,genre
0,10711,детективы,genre
0,10711,мелодрамы,genre
1,2508,зарубежные,genre


In [26]:
item_feat = pd.concat([genre_feature, year_feature])
item_feat = item_feat[item_feat[Columns.Item].isin(interactions[Columns.Item])]

In [32]:
dataset = Dataset.construct(
    interactions_df=interactions,
    user_features_df=None,
    item_features_df=item_feat,
    cat_item_features=['genre', 'release_year']
)

In [34]:
pop = PopularModel()
pop.fit(dataset)

<rectools.models.popular.PopularModel at 0x7ff3200a3ac0>

In [39]:
pop.recommend(
    dataset.user_id_map.external_ids[:2], 
    dataset=dataset, 
    k=1, 
    filter_viewed=False  # True - throw away some items for each user
).merge(items[['item_id', 'title']], 
       on='item_id',
       how='left')

Unnamed: 0,user_id,item_id,score,rank,title
0,176549,10440,202457.0,1,Хрустальный
1,699317,10440,202457.0,1,Хрустальный
