##### import libs

In [1]:
import numpy as np
import pandas as pd
import lightfm
import implicit
import matplotlib.pyplot as plt
from scipy.sparse import csr_matrix, coo_matrix
from sklearn.model_selection import train_test_split

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from utils import evaluate, load_data

In [3]:
pd.set_option('display.max_rows', 100)

##### read data

In [4]:
user_item_data, user_meta_data, item_meta_data, test_pairs_data = load_data()

In [5]:
user_item_data = user_item_data.merge(
    item_meta_data.drop(columns="embeddings"), on="item_id", how="left"
)

In [6]:
user_item_data["timespent_rel"] = (
    user_item_data["timespent"] / user_item_data["duration"]
)

In [7]:
share_weight = 10
bookmarks_weight = 1
timespent_rel_weight = 50

In [8]:
user_item_data["weighted_target"] = user_item_data["like"]*(
    1 + 
    share_weight*user_item_data.share + 
    bookmarks_weight*user_item_data.bookmarks + 
    timespent_rel_weight*user_item_data.timespent_rel)

##### split data

In [9]:
ui_train, ui_val = train_test_split(user_item_data,
                                    test_size=0.15,
                                    random_state=42,
                                    shuffle=False)

In [10]:
u_train = ui_train.user_id
i_train = ui_train.item_id
likes_train = ui_train.like
dislikes_train = ui_train.dislike

u_val = ui_val.user_id
i_val = ui_val.item_id
likes_val = ui_val.like
dislikes_val = ui_val.dislike

In [11]:
weights = coo_matrix((ui_train.weighted_target, (u_train, i_train)))

In [21]:
sparse_train = coo_matrix((likes_train, (u_train, i_train)))
sparse_val = coo_matrix((likes_val-dislikes_val, (u_val, i_val)))

In [23]:
sparse_train = implicit.nearest_neighbours.tfidf_weight(sparse_train)

##### lightfm model

In [24]:
model = lightfm.LightFM(no_components=128,
                        loss="bpr",
                        random_state=42)

In [25]:
model.fit(
    interactions=sparse_train,
    epochs=40,
    num_threads=16,
    verbose=True)

Epoch: 100%|██████████| 40/40 [13:16<00:00, 19.91s/it]


<lightfm.lightfm.LightFM at 0x7f3dc83cc3d0>

In [26]:
val_lightfm_pred = model.predict(u_val.values, i_val.values, num_threads=16)

In [27]:
val_score = evaluate(
    user_id=ui_val.user_id.values,
    target=ui_val.explicit.values,
    score=val_lightfm_pred)
print(f"{val_score=}")

100%|██████████| 181721/181721 [01:01<00:00, 2955.91it/s]

val_score=0.6160764308368426





: 

In [None]:
val_score = evaluate(
    user_id=ui_val.user_id.values,
    target=ui_val.explicit.values,
    score=val_lightfm_pred)
print(f"{val_score=}")

100%|██████████| 181721/181721 [00:52<00:00, 3482.04it/s]

val_score=0.6160483495058312





In [12]:
np.save("dumps/lfm_user_embeddings.npy", model.user_embeddings)
np.save("dumps/lfm_item_embeddings.npy", model.item_embeddings)
np.save("dumps/lfm_user_biases.npy", model.user_biases)
np.save("dumps/lfm_item_biases.npy", model.item_biases)

In [13]:
user_ids = user_item_data.user_id.values
item_ids = user_item_data.item_id.values

lightfm_pred = model.predict(user_ids=user_ids,
                               item_ids=item_ids)

In [14]:
np.save("dumps/lfm_scores.npy", lightfm_pred)

##### submission

In [40]:
test_lightfm_pred = model.predict(
    test_pairs_data.user_id.values,
    test_pairs_data.item_id.values,
    num_threads=16)

In [16]:
np.save("dumps/test_lightfm_scores.npy", test_lightfm_pred)

In [41]:
test_pairs_data["predict"] = test_lightfm_pred
test_pairs_data.to_csv("./lfm_submission.csv",index=False)