In [1]:
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
import implicit

In [2]:
def calc_mrr(predict, answer):
    for i in range(len(predict)):
        if predict[i] == answer:
            return 1. / (i + 1)
    return 0

max_prediction_len = 100

def calc_score(target_path, predict_path):
    with open(target_path) as f:
        y_true = [int(x.strip()) for x in f.readlines()]

    with open(predict_path) as f:
        y_pred = [[int(x) for x in line.strip().split(' ')] for line in f.readlines()]

    mrr_score = 0
    for (pred, answer) in zip(y_pred, y_true):
        if len(pred) > max_prediction_len:
            raise ValueError('$maximum prediction length is {}, got {}$'.format(max_prediction_len, len(y_pred[i])))
        mrr_score += calc_mrr(pred, answer)

    print(f"MRR@100 = {(mrr_score / len(y_true)):.4f}")

In [3]:
%%time

list_coord = []
list_coord_val = []
target_val = []
np.random.seed(42)  
val_id = np.random.choice(range(10000), size=2500, replace=False)
val_id.sort()

with open('/Users/david/Documents/GitHub/YandexCup_RecSys/train') as f:
    lines = f.readlines()
    idx = 0
    idx_val = 0
    for line in lines:
        tracks = line.strip().split(' ')
        if len(tracks) == 0:
            print(0)
        if len(tracks) == 1:
            print(1)
        for i, track in enumerate(tracks):
            if idx in val_id and i+1 == len(tracks):
                target_val.append(track)
                idx_val += 1
                continue
            if idx in val_id:
                list_coord_val.append((idx_val, track))
            list_coord.append((idx, track))
        idx += 1
        if idx >= 10000:
            break

CPU times: user 4.58 s, sys: 438 ms, total: 5.01 s
Wall time: 5.67 s


In [36]:
%%time

arr_rows = np.array([i[0] for i in list_coord])
arr_cols = np.array([i[1] for i in list_coord]).astype(int)
ones = np.ones(arr_rows.shape[0])
user_item = csr_matrix((ones, (arr_rows, arr_cols)), dtype=int)
item_user = csr_matrix((ones, (arr_cols, arr_rows)), dtype=int)

arr_rows_val = np.array([i[0] for i in list_coord_val])
arr_cols_val = np.array([i[1] for i in list_coord_val]).astype(int)
ones_val = np.ones(arr_rows_val.shape[0])
user_item_val = csr_matrix((ones_val, (arr_rows_val, arr_cols_val)), dtype=int)
item_user_val = csr_matrix((ones_val, (arr_cols_val, arr_rows_val)), dtype=int)

CPU times: user 400 ms, sys: 174 ms, total: 574 ms
Wall time: 1.75 s


## val matrix

In [5]:
# initialize a model
model = implicit.als.AlternatingLeastSquares(factors=10,
                                             regularization=10,
                                             alpha=10,
                                             use_native=True,
                                             use_cg=False,
                                             iterations=15,
                                             calculate_training_loss=False,
                                             random_state=42)

In [6]:
# train the model on a sparse matrix of item/user/confidence weights
model.fit(user_item)

  0%|          | 0/15 [00:00<?, ?it/s]

In [7]:
%%time
# recommend items for a user
userid = val_id
recommendations = model.recommend(userid, user_item_val, N=100, filter_already_liked_items=True)

CPU times: user 13 s, sys: 4.97 s, total: 18 s
Wall time: 3.8 s


In [8]:
recommendations[0]

array([[244723, 266527, 144033, ..., 469176, 377553, 453471],
       [244723, 266527, 389761, ..., 475541, 316972, 287050],
       [307702, 446274, 203963, ..., 413342, 193058, 465526],
       ...,
       [460783, 245354, 361704, ..., 374694, 482922,  39199],
       [285875,  19156, 476656, ...,  95038, 155742, 380542],
       [ 99446, 336586, 321731, ...,  68836, 452568, 376268]], dtype=int32)

In [9]:
result = [' '.join(map(str, i)) + '\n' for i in recommendations[0]]

In [10]:
with open('/Users/david/Documents/GitHub/YandexCup_RecSys/impicit_als_checkup_matrix_pred', 'w') as f:
    f.writelines(result)

with open('/Users/david/Documents/GitHub/YandexCup_RecSys/impicit_als_checkup_matrix_target', 'w') as f:
    for i in target_val:
        f.write(f'{i}\n')

calc_score('/Users/david/Documents/GitHub/YandexCup_RecSys/impicit_als_checkup_matrix_target',
           "/Users/david/Documents/GitHub/YandexCup_RecSys/impicit_als_checkup_matrix_pred")

MRR@100 = 0.0095


# full matrix - val idx

In [11]:
# initialize a model
model = implicit.als.AlternatingLeastSquares(factors=10,
                                             regularization=10,
                                             alpha=10,
                                             use_native=True,
                                             use_cg=False,
                                             iterations=15,
                                             calculate_training_loss=False,
                                             random_state=42)

In [12]:
# train the model on a sparse matrix of item/user/confidence weights
model.fit(user_item)

  0%|          | 0/15 [00:00<?, ?it/s]

In [17]:
%%time
# recommend items for a user
userid = np.arange(10000)
recommendations = model.recommend(userid, user_item, N=100, filter_already_liked_items=True)

CPU times: user 1min 10s, sys: 5.03 s, total: 1min 15s
Wall time: 37.4 s


In [18]:
recommendations[0]

array([[244723, 266527, 144033, ..., 469176, 377553, 453471],
       [165509,  94009,  64323, ..., 444015, 407246, 220227],
       [460783, 361704, 245354, ..., 411269, 164780, 392615],
       ...,
       [ 15028, 308953, 163814, ..., 326106, 428843, 290883],
       [ 99446, 336586, 321731, ...,  68836, 452568, 376268],
       [177527,  74777, 145882, ..., 148208, 320189, 262757]], dtype=int32)

In [19]:
result = [' '.join(map(str, i)) + '\n' for idx, i in enumerate(recommendations[0]) if idx in val_id]

In [20]:
with open('/Users/david/Documents/GitHub/YandexCup_RecSys/impicit_als_checkup_matrix_pred', 'w') as f:
    f.writelines(result)

with open('/Users/david/Documents/GitHub/YandexCup_RecSys/impicit_als_checkup_matrix_target', 'w') as f:
    for i in target_val:
        f.write(f'{i}\n')

calc_score('/Users/david/Documents/GitHub/YandexCup_RecSys/impicit_als_checkup_matrix_target',
           "/Users/david/Documents/GitHub/YandexCup_RecSys/impicit_als_checkup_matrix_pred")

MRR@100 = 0.0095


# item_user

In [39]:
# initialize a model
model = implicit.als.AlternatingLeastSquares(factors=10,
                                             regularization=10,
                                             alpha=10,
                                             use_native=True,
                                             use_cg=False,
                                             iterations=15,
                                             calculate_training_loss=False,
                                             random_state=42)

In [40]:
# train the model on a sparse matrix of item/user/confidence weights
model.fit(user_item)

  0%|          | 0/15 [00:00<?, ?it/s]

In [41]:
%%time
# recommend items for a user
userid = np.arange(2500)
recommendations = model.recommend(userid, user_item_val, N=100, filter_already_liked_items=True)

CPU times: user 16.9 s, sys: 1.01 s, total: 17.9 s
Wall time: 2.94 s


In [42]:
recommendations[0]

array([[244723, 266527, 144033, ..., 469176, 377553, 453471],
       [165509,  94009,  64323, ...,  85019,  62121, 424525],
       [460783, 361704, 245354, ..., 424273, 411269, 164780],
       ...,
       [336586, 177281, 461156, ..., 134342, 433817, 193472],
       [245354,   6539,  97353, ..., 222388,  60343, 223595],
       [ 19156, 144359, 147078, ...,  63415, 418883,  85019]], dtype=int32)

In [43]:
result = [' '.join(map(str, i)) + '\n' for i in recommendations[0]]

In [45]:
with open('/Users/david/Documents/GitHub/YandexCup_RecSys/impicit_als_checkup_matrix_pred', 'w') as f:
    f.writelines(result)

with open('/Users/david/Documents/GitHub/YandexCup_RecSys/impicit_als_checkup_matrix_target', 'w') as f:
    for i in target_val:
        f.write(f'{i}\n')

calc_score('/Users/david/Documents/GitHub/YandexCup_RecSys/impicit_als_checkup_matrix_target',
           "/Users/david/Documents/GitHub/YandexCup_RecSys/impicit_als_checkup_matrix_pred")

MRR@100 = 0.0013
