##### import libs

In [2]:
import numpy as np
import pandas as pd
import implicit
import matplotlib.pyplot as plt
from tqdm import tqdm
from pathlib import Path
from scipy.sparse import csr_matrix
from sklearn.model_selection import train_test_split

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from utils import evaluate, load_data

##### read data

In [4]:
user_item_data, user_meta_data, item_meta_data, test_pairs_data = load_data()

In [5]:
user_item_data = user_item_data.merge(
    item_meta_data.drop(columns="embeddings"), on="item_id", how="left"
)

In [6]:
user_item_data["timespent_rel"] = (
    user_item_data["timespent"] / user_item_data["duration"]
)

In [7]:
share_weight = 10
bookmarks_weight = 1
timespent_rel_weight = 50

In [8]:
user_item_data["weighted_like"] = (user_item_data["like"])*(
    1 + 
    share_weight*user_item_data.share + 
    bookmarks_weight*user_item_data.bookmarks + 
    timespent_rel_weight*user_item_data.timespent_rel)

In [9]:
user_item_data["weighted_target"] = (user_item_data["like"]-user_item_data["dislike"])*(
    1 + 
    share_weight*user_item_data.share + 
    bookmarks_weight*user_item_data.bookmarks + 
    timespent_rel_weight*user_item_data.timespent_rel)

##### split data

In [13]:
ui_train, ui_val = train_test_split(user_item_data,
                                    test_size=0.15,
                                    random_state=42,
                                    shuffle=False)

In [14]:
u_train = ui_train.user_id
i_train = ui_train.item_id
likes_train = ui_train.like
dislikes_train = ui_train.dislike

u_val = ui_val.user_id
i_val = ui_val.item_id
likes_val = ui_val.like
dislikes_val = ui_val.dislike

In [15]:
likes_train = ui_train.weighted_target

In [16]:
likes_train

0             0.0
1             0.0
2             0.0
3             0.0
4             0.0
             ... 
123817184     0.0
123817185    31.0
123817186     0.0
123817187     0.0
123817188     0.0
Name: weighted_target, Length: 123817189, dtype: float64

In [22]:
sparse_train = csr_matrix((likes_train, (u_train, i_train)))
sparse_val = csr_matrix((likes_val, (u_val, i_val)))

##### als model

In [23]:
sparse_train = implicit.nearest_neighbours.tfidf_weight(sparse_train)

  X.data = sqrt(X.data) * idf[X.col]


In [None]:
model = implicit.als.AlternatingLeastSquares(
    random_state=42,
    )
model.fit(sparse_train, show_progress=True)

##### evaluation

In [None]:
model = model.to_cpu()

  check_blas_config()


In [None]:
als_score = (model.user_factors[ui_val['user_id']] * 
             model.item_factors[ui_val['item_id']]).sum(axis=1)

In [17]:
evaluate(user_id=ui_val.user_id.values,
         target=likes_val.values,
         score=als_score)

100%|██████████| 181721/181721 [00:58<00:00, 3093.98it/s]


0.5929413232653898

In [None]:
evaluate(user_id=ui_val.user_id.values,
         target=likes_val.values,
         score=als_score)

100%|██████████| 181721/181721 [00:49<00:00, 3707.79it/s]


0.5929246356077261

##### submission

обучим модель на всех данных

In [8]:
sparse_train = csr_matrix((user_item_data.weighted_target,
                           (user_item_data.user_id,
                            user_item_data.item_id)))

##### als model

In [9]:
sparse_train = implicit.nearest_neighbours.tfidf_weight(sparse_train)

In [10]:
model = implicit.als.AlternatingLeastSquares(
    random_state=42,
    )
model.fit(sparse_train, show_progress=True)

100%|██████████| 15/15 [00:46<00:00,  3.12s/it]


##### evaluation

In [11]:
model = model.to_cpu()

  check_blas_config()


In [12]:
als_score = [model.user_factors[user_id] @ model.item_factors[item_id] for user_id, item_id in tqdm(zip(
    user_item_data.user_id.values,
    user_item_data.item_id.values
    ))]

145667282it [03:18, 732168.43it/s]


In [15]:
len(als_score)==len(user_item_data)

True

In [16]:
np.save("dumps/ials_scores.npy", als_score)

In [17]:
test_als_score = (
    model.user_factors[test_pairs_data['user_id']] * 
    model.item_factors[test_pairs_data['item_id']]
    ).sum(axis=1)

In [19]:
np.save("dumps/test_ials_scores.npy", test_als_score)

In [20]:
test_pairs_data["predict"] = test_als_score
test_pairs_data.to_csv("./ials_submission.csv",index=False)