In [1]:
import pandas as pd
import numpy as np
from lightfm.data import Dataset
from lightfm import LightFM


In [2]:
df = pd.read_pickle("../data/data.pkl")

In [3]:
df.head()

Unnamed: 0,user_id,anime_id,user_rating,name,genre,type,episodes,anime_rating,members
0,1,8074,10,Highschool of the Dead,"Action, Ecchi, Horror, Supernatural",TV,12,7.46,535892
1,3,8074,6,Highschool of the Dead,"Action, Ecchi, Horror, Supernatural",TV,12,7.46,535892
2,5,8074,2,Highschool of the Dead,"Action, Ecchi, Horror, Supernatural",TV,12,7.46,535892
3,12,8074,6,Highschool of the Dead,"Action, Ecchi, Horror, Supernatural",TV,12,7.46,535892
4,14,8074,6,Highschool of the Dead,"Action, Ecchi, Horror, Supernatural",TV,12,7.46,535892


In [4]:
df.shape

(5957004, 9)

In [5]:
df = df.sample(10000, random_state=42)

In [6]:
df = df[["user_id", "name", "user_rating", "genre", "type", "episodes"]]

In [7]:
pre_df = df.copy()

In [8]:
pre_df["item_features"] = pre_df[["genre", "type", "episodes"]].values.tolist()
pre_df["user_features"] = pre_df[["user_rating"]].values.tolist()

In [9]:
pre_df.drop(["genre", "type", "episodes", "user_rating"], axis=1, inplace=True)

In [10]:
pre_df.head()

Unnamed: 0,user_id,name,item_features,user_features
3669635,562,Ranpo Kitan: Game of Laplace,"[Mystery, TV, 11]",[6]
2412642,42684,Death Note Rewrite,"[Mystery, Police, Psychological, Supernatural,...",[7]
3014556,53767,Kuroshitsuji II,"[Action, Comedy, Demons, Fantasy, Shounen, Sup...",[7]
732241,27381,DearS,"[Comedy, Ecchi, Harem, Romance, Sci-Fi, TV, 12]",[7]
1937180,57309,Yuusha ni Narenakatta Ore wa Shibushibu Shuush...,"[Comedy, Ecchi, Fantasy, Romance, TV, 12]",[5]


In [11]:
# create encorder
uq_users = np.unique(pre_df.user_id.values)
uq_items = np.unique(pre_df.name.values)

In [12]:
uq_user_features = np.unique(np.array(list(pre_df.user_features.values)))
uq_item_features = np.unique(np.array(list(pre_df.item_features.values)))

In [13]:
dataset = Dataset()
dataset.fit(uq_users, uq_items, item_features=uq_item_features, user_features=uq_user_features)

In [14]:
# build dataset
df_train_interactions = pre_df[["user_id", "name"]].drop_duplicates()
train_interactions = list(df_train_interactions.itertuples(index=False, name=None))  # [(user_id, item_id), ...]

In [15]:
train_interactions[0:10]

[(562, 'Ranpo Kitan: Game of Laplace'),
 (42684, 'Death Note Rewrite'),
 (53767, 'Kuroshitsuji II'),
 (27381, 'DearS'),
 (57309,
  'Yuusha ni Narenakatta Ore wa Shibushibu Shuushoku wo Ketsui Shimashita.'),
 (13894, 'Tokyo Ghoul: &quot;Jack&quot;'),
 (9750, 'Rosario to Vampire'),
 (56631, 'Inugami-san to Nekoyama-san'),
 (20853, 'Magi: The Kingdom of Magic'),
 (26667, 'One Piece Movie 5: Norowareta Seiken')]

In [16]:
# user feature matrix
df_user_features = pre_df[["user_id","user_features"]].drop_duplicates(subset='user_id').set_index('user_id')
user_features = list(df_user_features.itertuples(index=True, name=None))  # (user_id, [feature1, feature2, ...])

In [17]:
user_features[0:10]

[(562, [6]),
 (42684, [7]),
 (53767, [7]),
 (27381, [7]),
 (57309, [5]),
 (13894, [7]),
 (9750, [7]),
 (56631, [7]),
 (20853, [8]),
 (26667, [10])]

In [18]:
# item feature matrix
df_item_features = pre_df[["name","item_features"]].drop_duplicates(subset='name').set_index('name')
item_features = list(df_item_features.itertuples(index=True, name=None))  # (item_id, [feature1, feature2, ...])

In [19]:
item_features[0:10]

[('Ranpo Kitan: Game of Laplace', ['Mystery', 'TV', '11']),
 ('Death Note Rewrite',
  ['Mystery, Police, Psychological, Supernatural, Thriller', 'Special', '2']),
 ('Kuroshitsuji II',
  ['Action, Comedy, Demons, Fantasy, Shounen, Supernatural', 'TV', '12']),
 ('DearS', ['Comedy, Ecchi, Harem, Romance, Sci-Fi', 'TV', '12']),
 ('Yuusha ni Narenakatta Ore wa Shibushibu Shuushoku wo Ketsui Shimashita.',
  ['Comedy, Ecchi, Fantasy, Romance', 'TV', '12']),
 ('Tokyo Ghoul: &quot;Jack&quot;',
  ['Action, Drama, Horror, School, Seinen, Supernatural', 'OVA', '1']),
 ('Rosario to Vampire',
  ['Comedy, Ecchi, Fantasy, Harem, Romance, School, Shounen, Vampire',
   'TV',
   '13']),
 ('Inugami-san to Nekoyama-san', ['Comedy, School, Shoujo Ai', 'TV', '12']),
 ('Magi: The Kingdom of Magic',
  ['Action, Adventure, Fantasy, Magic, Shounen', 'TV', '25']),
 ('One Piece Movie 5: Norowareta Seiken',
  ['Action, Adventure, Comedy, Fantasy, Shounen, Super Power', 'Movie', '1'])]

In [20]:
train_interactions, _ = dataset.build_interactions(train_interactions)
user_features = dataset.build_user_features(user_features)
item_features = dataset.build_item_features(item_features)

In [21]:
# get mapping
user_id_map, user_feature_map, item_id_map, item_feature_map = dataset.mapping()

In [22]:
model = LightFM(no_components=10, loss='warp', random_state=42)
model.fit(train_interactions, item_features=item_features, user_features=user_features, epochs=10)

<lightfm.lightfm.LightFM at 0x7fc81ce2ac70>

In [23]:
predictions = model.predict(user_ids=0, item_ids=np.array(range(0, len(item_id_map))))

In [24]:
sort_idx = np.argsort(-predictions)

In [25]:
sort_idx[0:10]

array([ 274, 2027,  399,  518,  328,  507, 1746,   84, 1497,  535])

In [26]:
recommend_list = []

for id in sort_idx[0:10]:
    for k, v in enumerate(item_id_map):
        if k == id:
            recommend_list.append([k, v])


In [27]:
recommend_list

[[274, 'Byousoku 5 Centimeter'],
 [2027, 'Toradora!'],
 [399, 'Deadman Wonderland'],
 [518, 'Elfen Lied'],
 [328, 'Clannad: After Story - Mou Hitotsu no Sekai, Kyou-hen'],
 [507, 'Durarara!!'],
 [1746, 'Sen to Chihiro no Kamikakushi'],
 [84, 'Angel Beats!'],
 [1497, 'Ore no Imouto ga Konnani Kawaii Wake ga Nai'],
 [535, 'FLCL']]