In [1]:
import pandas as pd
from models.lightfm import SimpleLightFM, FeaturedLightFM, WeightFeaturedLightFM
from models.hybrid import TwoStepRecommender, CombineRecommender, CombineUnseenRecommender
from models.popular import PopularUnseenRecommmender, PopularRecommender, SegmentUnseenRecommender



In [2]:
data = pd.DataFrame([
    ('u1', 'i1', '2021-01-01', 90),
    ('u1', 'i2', '2021-01-01', 90),
    ('u2', 'i2', '2021-01-01', 90),
    ('u2', 'i3', '2021-01-01', 90),
    ('u3', 'i2', '2021-01-01', 90),
    ('u3', 'i3', '2021-01-01', 90),
    ('u4', 'i1', '2021-01-01', 90),
    ('u5', 'i2', '2021-01-01', 90),
    ('u6', 'i3', '2021-01-01', 90),
], columns=['uid', 'iid', 'dd', 'watched_pct'])

data['dd'] = pd.to_datetime(data['dd'])

if_df = pd.DataFrame([
    ('i1', 'f11', 'f21'),
    ('i2', 'f12', 'f22'),
    ('i3', 'f11', 'f23')
], columns=['iid', 'if1', 'if2'])

uf_df = pd.DataFrame([
    ('u1', 'age10'),
    ('u2', 'age10'),
    ('u3', 'age10'),
    ('u4', 'age10'),
    ('u5', 'age10'),
    ('u6', 'age10'),
], columns=['uid', 'uf1'])

unused_df = pd.DataFrame([
    ('u1', 'i3'),
    ('u2', 'i1'),
    ('u3', 'i1'),
    ('u4', 'i2'),
    ('u4', 'i3'),
    ('u5', 'i1'),
    ('u5', 'i3'),
    ('u6', 'i1'),
    ('u6', 'i2')
], columns=['uid', 'iid'])

In [5]:
model = CombineRecommender(
    models=['popular.PopularUnseenRecommmender'],
    models_n=[100],
    params=[
        '--days', '10'
    ],
    user_col='uid',
    item_col='iid',
    date_col='dd',
)

model.fit(data)

model.recommend(user_ids=['u4', 'u5', 'u6', 'u7'], N=10)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 8529.34it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 2239.65it/s]


0        [i2, i3]
1        [i1, i3]
2        [i1, i2]
3    [i1, i2, i3]
dtype: object

In [None]:
model = PopularUnseenRecommmender(
    days=10,
    user_col='uid',
    item_col='iid',
    date_col='dd',
)

model.add_item_features(if_df)
model.add_user_features(uf_df)

model.fit(data)

model.recommend(user_ids=['u4', 'u5', 'u6', 'u7'], N=10)

In [None]:
list(set(['if5', 'if2']))

In [None]:
model = WeightFeaturedLightFM(
    no_components=10,
    user_col='uid',
    item_col='iid',
    date_col='dd',
    user_features_col=['uf1'],
    item_features_col=['if1'],
    preprocess_array_split=['if1']
)

# model = TwoStepRecommender(
#     models=['lightfm.FeaturedLightFM'],
#     models_n=[100],
#     final_model='boost.XGBoostRecommender',
#     params=[
#         '--days', '10',
#         '--watched_pct_min', '0',
#         '--user_features_col', 'uf1',
#         '--item_features_col', 'if1', 'if2',
#         '--category_features', 'uf1', 'if1', 'if2',
#     ],
#     user_col='uid',
#     item_col='iid',
#     date_col='dd'
# )

model.add_item_features(if_df)
model.add_user_features(uf_df)
model.add_unused(unused_df)

In [None]:
model.fit(data)

In [None]:
model.recommend(user_ids=['u4', 'u5', 'u6'], N=10)

In [None]:
model.data.mapping()

In [None]:
model.user_seen

In [None]:
'u7' in data.groupby('uid')['iid'].apply(list).index.intersection(['u6', 'u7'])

In [None]:
data['iid'].value_counts()

In [13]:
data.groupby('uid')['iid'].apply(lambda x: x.value_counts().head(2))

uid    
u1   i1    1
     i2    1
u2   i2    1
     i3    1
u3   i2    1
     i3    1
u4   i1    1
u5   i2    1
u6   i3    1
Name: iid, dtype: int64

In [22]:
pd.Series(
    data.groupby('uid')['iid'].apply(lambda x: x.value_counts().head(2).tolist()).loc[['u1', 'u3', 'u5']].values
)

0    [1, 1]
1    [1, 1]
2       [1]
dtype: object

In [71]:
def explode(df):
    df['iid'] = df['iid'].map(
        lambda x: list(enumerate(x))
    )
    df = df.explode('iid')
    df[['rank', 'iid']] = df['iid'].apply(pd.Series)
    return df

def score(x, N):
    return np.power(
        np.prod(x),
        1 / N
    )

r1 = pd.DataFrame([
    ('u1', ['i1', 'i2', 'i3', 'i4']),
    ('u2', ['i1', 'i2', 'i3', 'i4'])
], columns=['uid', 'iid'])

r2 = pd.DataFrame([
    ('u1', ['i4', 'i1', 'i3', 'i2']),
    ('u2', ['i1', 'i4', 'i3', 'i2'])
], columns=['uid', 'iid'])


# r1[['rank', 'iid']] = explode(r1) 
# r1

(
    pd
    .concat([
        explode(r1),
        explode(r2)
    ])
    .groupby(['uid', 'iid'])['rank']
    .apply(lambda x: score(x))
    .reset_index()
    .groupby('uid')
    .apply(lambda x: x.sort_values('rank')['iid'].tolist()[:2])
)

uid
u1    [i1, i4]
u2    [i1, i2]
dtype: object

In [54]:
import numpy as np
np.prod([1, 2, 3, 3])

18

In [55]:
np.power(4, 1/2)

2.0