Условия

Если задача не отображается, то вы либо не авторизованы, либо не нажали кнопку кнопку «принять участие» на этой странице.

Для данной задачи вам предстоит построить рекомендательную систему баннеров на основе логов просмотров и лайков.

Логи представлены четырьмя колонками:

user_id (идентификатор пользователя),
item_id (идентификатор баннера),
like (флаг понравился ли пользователю баннер),
timestamp (unix время в секундах совершения действия).
Кроме того, для пользователей и баннеров имеются признаки размерностью 32.

Вам необходимо предсказать 20 баннеров для пользователей. Качество решения будет оцениваться как доля "лайкнутых" пользователей баннеров из предложенного вами списка (top-20 accuracy).

Описание файлов
test.csv — тестовый файл, содержащий идентификаторы пользователи, для которых необходимо сделать предсказания

train.csv — обучающий датасет

item-features.csv — признаки для баннеров

​​user-features — признаки для пользователей

sample-submission.csv — пример решения (сабмита).

качество решения оценивается по метрике Top-K Accuracy, где k = 20. Код:

In [1]:
def calc_score(test_choices, pred_choices, tk):
    s = 0
    for gt, p in zip(test_choices, pred_choices):
        s += int(gt in p)
    score = s / len(test_choices)
    return score

На экзамене за данную задачу вы можете получить до 100 баллов. Расчёт баллов производится по формуле (отличается от задачи 1 коэффициентами):

# Libs

In [2]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from sklearn.metrics import roc_auc_score
from sklearn.model_selection import cross_validate

from sklearn.model_selection import train_test_split

import seaborn

где y — это ваш результат по метрике Top-K Accuracy. Количество баллов является округленным целым числом.

Желаем удачи!

In [3]:
!ls

docs		   recommend-Copy1.ipynb  test.csv
item-features.csv  recommend.ipynb	  train.csv
predictions.csv    sample-submission.csv  user-features.csv


# Data description

## Matrix items-features

In [4]:
item_features = (pd.read_csv("item-features.csv")).sort_values(by = ['item_id'])
item_features

Unnamed: 0,item_id,0,1,2,3,4,5,6,7,8,...,22,23,24,25,26,27,28,29,30,31
388,0,0.001433,-0.003243,-0.003030,0.004299,-0.001026,0.001412,0.001671,0.001373,-0.006249,...,-0.008651,-0.001440,0.002312,-0.002225,-0.004108,-0.004108,0.000871,-0.002408,-0.002408,0.000613
169,1,0.002482,-0.005617,-0.005248,0.007446,-0.001777,0.002446,0.002895,0.002378,-0.010824,...,-0.014983,-0.002493,0.004004,-0.003855,-0.007115,-0.007115,0.001508,-0.004171,-0.004171,0.001062
239,2,0.001871,-0.004236,-0.003958,0.005615,-0.001340,0.001845,0.002183,0.001793,-0.008162,...,-0.011299,-0.001880,0.003020,-0.002907,-0.005365,-0.005365,0.001137,-0.003145,-0.003145,0.000801
425,3,0.001390,-0.003146,-0.002940,0.004171,-0.000996,0.001370,0.001622,0.001332,-0.006063,...,-0.008392,-0.001397,0.002243,-0.002159,-0.003985,-0.003985,0.000845,-0.002336,-0.002336,0.000595
260,4,0.001738,-0.003933,-0.003675,0.005213,-0.001244,0.001713,0.002027,0.001665,-0.007579,...,-0.010490,-0.001746,0.002804,-0.002699,-0.004982,-0.004982,0.001056,-0.002920,-0.002920,0.000743
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
167,439,0.000348,-0.000787,-0.000735,0.001043,-0.000249,0.000343,0.000405,0.000333,-0.001516,...,-0.002098,-0.000349,0.000561,-0.000540,-0.000996,-0.000996,0.000211,-0.000584,-0.000584,0.000149
346,440,0.000777,-0.001759,-0.001643,0.002332,-0.000557,0.000766,0.000906,0.000745,-0.003389,...,-0.004691,-0.000781,0.001254,-0.001207,-0.002228,-0.002228,0.000472,-0.001306,-0.001306,0.000332
37,441,-0.121692,0.181685,0.204412,-0.150535,0.235095,-0.389399,-0.326175,0.287157,-0.107230,...,-0.035643,-0.015009,-0.286772,0.002378,-0.065991,-0.065991,-0.112601,0.159891,0.159891,-0.370836
178,442,0.000491,-0.001112,-0.001039,0.001475,-0.000352,0.000484,0.000573,0.000471,-0.002144,...,-0.002967,-0.000494,0.000793,-0.000763,-0.001409,-0.001409,0.000299,-0.000826,-0.000826,0.000210


In [5]:
item_features["item_id"].astype(object).describe()[['count', 'unique']]

count     444
unique    444
Name: item_id, dtype: int64

In [6]:
print(f"444 items and 32 features")

444 items and 32 features


In [7]:
matrix_item_features = item_features[item_features.columns[1:]].values
matrix_item_features.shape

(444, 32)

In [10]:
from scipy.linalg import norm

In [15]:
muf = np.zeros(shape = matrix_item_features.shape)
for row, x in enumerate(matrix_item_features):
    muf[row] = x/norm(x)

## Matrix user-features

In [8]:
user_features = (pd.read_csv("user-features.csv")).sort_values(by=['user_id'])
user_features

Unnamed: 0,user_id,0,1,2,3,4,5,6,7,8,...,22,23,24,25,26,27,28,29,30,31
0,0,0.000695,-0.001573,-0.001470,0.002085,-0.000498,0.000685,0.000811,0.000666,-0.003031,...,-0.004196,-0.000698,0.001121,-0.001079,-0.001993,-0.001993,0.000422,-0.001168,-0.001168,0.000297
1,1,0.001204,-0.002725,-0.002546,0.003612,-0.000862,0.001187,0.001404,0.001154,-0.005251,...,-0.007268,-0.001209,0.001942,-0.001870,-0.003451,-0.003451,0.000732,-0.002023,-0.002023,0.000515
2,2,0.000491,-0.001112,-0.001039,0.001475,-0.000352,0.000484,0.000573,0.000471,-0.002144,...,-0.002967,-0.000494,0.000793,-0.000763,-0.001409,-0.001409,0.000299,-0.000826,-0.000826,0.000210
3,3,0.000777,-0.001759,-0.001643,0.002332,-0.000557,0.000766,0.000906,0.000745,-0.003389,...,-0.004691,-0.000781,0.001254,-0.001207,-0.002228,-0.002228,0.000472,-0.001306,-0.001306,0.000332
4,4,0.000695,-0.001573,-0.001470,0.002085,-0.000498,0.000685,0.000811,0.000666,-0.003031,...,-0.004196,-0.000698,0.001121,-0.001079,-0.001993,-0.001993,0.000422,-0.001168,-0.001168,0.000297
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
492,492,0.000983,-0.002225,-0.002079,0.002949,-0.000704,0.000969,0.001147,0.000942,-0.004287,...,-0.005934,-0.000988,0.001586,-0.001527,-0.002818,-0.002818,0.000597,-0.001652,-0.001652,0.000421
493,493,0.001300,-0.002943,-0.002750,0.003901,-0.000931,0.001282,0.001517,0.001246,-0.005671,...,-0.007850,-0.001306,0.002098,-0.002020,-0.003728,-0.003728,0.000790,-0.002185,-0.002185,0.000556
494,494,0.000491,-0.001112,-0.001039,0.001475,-0.000352,0.000484,0.000573,0.000471,-0.002144,...,-0.002967,-0.000494,0.000793,-0.000763,-0.001409,-0.001409,0.000299,-0.000826,-0.000826,0.000210
495,495,0.000983,-0.002225,-0.002079,0.002949,-0.000704,0.000969,0.001147,0.000942,-0.004287,...,-0.005934,-0.000988,0.001586,-0.001527,-0.002818,-0.002818,0.000597,-0.001652,-0.001652,0.000421


In [9]:
user_features["user_id"].astype(object).describe()[['count', 'unique']]

count     497
unique    497
Name: user_id, dtype: int64

In [10]:
print(f"497 items and 32 features")

497 items and 32 features


In [11]:
matrix_user_features = user_features[user_features.columns[1:]].values
matrix_user_features.shape

(497, 32)

## logs

In [12]:
logs = (pd.read_csv("train.csv")).sort_values(by = ['timestamp'])
logs["timestamp"]= pd.to_datetime(logs["timestamp"]*10**9)
logs

Unnamed: 0,user_id,item_id,like,timestamp
0,140,342,0,2017-03-31 05:03:42
1,378,172,1,2017-03-31 05:03:48
2,150,182,0,2017-03-31 05:04:10
3,455,17,0,2017-03-31 05:05:04
4,350,409,0,2017-03-31 05:05:35
...,...,...,...,...
8669,161,312,0,2017-04-03 10:31:59
8670,406,208,0,2017-04-03 10:32:23
8671,196,43,0,2017-04-03 10:32:56
8672,84,100,0,2017-04-03 10:32:59


## Test table for recommendation

In [13]:
test = pd.read_csv("test.csv").sort_values(by = ['timestamp'])
test["timestamp"] = pd.to_datetime(test["timestamp"]*10**9)
test

Unnamed: 0,user_id,timestamp
0,166,2017-03-31 07:13:51
1,26,2017-03-31 10:49:31
2,41,2017-03-31 11:02:27
3,286,2017-03-31 14:40:55
4,108,2017-03-31 16:13:56
...,...,...
492,190,2017-04-03 10:20:14
493,181,2017-04-03 10:20:29
494,448,2017-04-03 10:22:08
495,124,2017-04-03 10:26:37


In [14]:
test["predictions"] = ""
test["item_id"] = ""
test["like"] = ""
test

Unnamed: 0,user_id,timestamp,predictions,item_id,like
0,166,2017-03-31 07:13:51,,,
1,26,2017-03-31 10:49:31,,,
2,41,2017-03-31 11:02:27,,,
3,286,2017-03-31 14:40:55,,,
4,108,2017-03-31 16:13:56,,,
...,...,...,...,...,...
492,190,2017-04-03 10:20:14,,,
493,181,2017-04-03 10:20:29,,,
494,448,2017-04-03 10:22:08,,,
495,124,2017-04-03 10:26:37,,,


In [15]:
# get k indexes with to k values
def get_predictions(A :np.array, top_k=20):
    result = np.zeros(shape = (len(A), top_k), dtype = int)
    for row in range(len(A)):
        values = [(x, i) for i, x in enumerate(A[row])]
        max_values = sorted(values, key = lambda x: -x[0])[:top_k]
#         print(max_values)
        indexes = [x[1] for x in max_values]
        result[row] = indexes
    return result

A = np.array([[6,2,3,4,5, 2000, 20, 2, 2000, 2000], [500,9,8,7,11,100, 2000, 2, 2000, 200]])
get_predictions(A, 3)

array([[5, 8, 9],
       [6, 8, 0]])

In [16]:
logs["predictions"]= ""
logs

Unnamed: 0,user_id,item_id,like,timestamp,predictions
0,140,342,0,2017-03-31 05:03:42,
1,378,172,1,2017-03-31 05:03:48,
2,150,182,0,2017-03-31 05:04:10,
3,455,17,0,2017-03-31 05:05:04,
4,350,409,0,2017-03-31 05:05:35,
...,...,...,...,...,...
8669,161,312,0,2017-04-03 10:31:59,
8670,406,208,0,2017-04-03 10:32:23,
8671,196,43,0,2017-04-03 10:32:56,
8672,84,100,0,2017-04-03 10:32:59,


In [17]:
columns = "user_id item_id like timestamp predictions".split()

In [18]:
r = (pd.concat([logs[columns], test[columns]])).sort_values(by = ['timestamp']).values
r

array([[140, 342, 0, Timestamp('2017-03-31 05:03:42'), ''],
       [378, 172, 1, Timestamp('2017-03-31 05:03:48'), ''],
       [150, 182, 0, Timestamp('2017-03-31 05:04:10'), ''],
       ...,
       [196, 43, 0, Timestamp('2017-04-03 10:32:56'), ''],
       [84, 100, 0, Timestamp('2017-04-03 10:32:59'), ''],
       [48, 75, 0, Timestamp('2017-04-03 10:33:15'), '']], dtype=object)

# answers

In [19]:
logs = logs.sort_values(by = ['timestamp'])
logs = logs.values

In [20]:
to_predict = r.copy()
to_predict

array([[140, 342, 0, Timestamp('2017-03-31 05:03:42'), ''],
       [378, 172, 1, Timestamp('2017-03-31 05:03:48'), ''],
       [150, 182, 0, Timestamp('2017-03-31 05:04:10'), ''],
       ...,
       [196, 43, 0, Timestamp('2017-04-03 10:32:56'), ''],
       [84, 100, 0, Timestamp('2017-04-03 10:32:59'), ''],
       [48, 75, 0, Timestamp('2017-04-03 10:33:15'), '']], dtype=object)

In [21]:
logs

array([[140, 342, 0, Timestamp('2017-03-31 05:03:42'), ''],
       [378, 172, 1, Timestamp('2017-03-31 05:03:48'), ''],
       [150, 182, 0, Timestamp('2017-03-31 05:04:10'), ''],
       ...,
       [196, 43, 0, Timestamp('2017-04-03 10:32:56'), ''],
       [84, 100, 0, Timestamp('2017-04-03 10:32:59'), ''],
       [48, 75, 0, Timestamp('2017-04-03 10:33:15'), '']], dtype=object)

In [22]:
ls  = pd.DataFrame(to_predict, columns = "user_id item_id like timestamp predictions".split())
ls

Unnamed: 0,user_id,item_id,like,timestamp,predictions
0,140,342,0,2017-03-31 05:03:42,
1,378,172,1,2017-03-31 05:03:48,
2,150,182,0,2017-03-31 05:04:10,
3,455,17,0,2017-03-31 05:05:04,
4,350,409,0,2017-03-31 05:05:35,
...,...,...,...,...,...
9166,161,312,0,2017-04-03 10:31:59,
9167,406,208,0,2017-04-03 10:32:23,
9168,196,43,0,2017-04-03 10:32:56,
9169,84,100,0,2017-04-03 10:32:59,


In [24]:
UF = matrix_user_features.copy()
IF = matrix_item_features.copy()

# predict

In [23]:
from tqdm.notebook import tqdm
scores = []

w0 = 0.5
w1 = 0.5
w2 = 1.2
w3 = 0

UF = matrix_user_features.copy()
IF = matrix_item_features.copy()
score = 0
like_score = 0

for row in range(len(to_predict)):
    user_id = to_predict[row, 0]
    item_id = to_predict[row, 1]
    like_value = to_predict[row, 2]
    time = to_predict[row, 3]

#     predictions = get_predictions([UF[user_id].dot(IF.T)])

    seen = ((ls[ls['timestamp'] < pd.Timestamp(time)])[
            ls['user_id'] == user_id])['item_id'].tolist()
    ps = get_predictions([(UF[user_id]).dot(IF.T)], 30+len(seen))[0].tolist()
    i = 0
    for x in seen:
        if x in ps:
            ps.pop(i)
            i = i-1
        i = i+1

    to_predict[row, 4] = ps[:20]

    if item_id in ps[:20]:
        score += 1
    if (item_id in ps[:20]) and (like_value == 1):
        like_score += 1
    

    if like_value == 1:
        # new UF by user_id click on iten_id
#         print(user_id, item_id)
    #     new UF by user_id click on iten_id
        uf = UF[user_id]-np.min(UF[user_id])
    #     uf_p = uf/np.sum(uf)
        if_ = IF[item_id]-np.min(IF[item_id])
        if_p = if_/np.sum(if_)
        UF[user_id] = IF[item_id]/3
#                 UF[user_id] = (UF[user_id] + w0*IF[item_id])/(w0+1)
#     elif like_value == 0:
#                 UF[user_id] = (w1*UF[user_id] - IF[item_id])/(w1-1)


print(f'{w0:.2f}, {w1:.2f}, {w2:.2f}, {w3:.2f}, {score} , {like_score}')
scores.append((like_score, score, w0, w1, w2, w3))



0.50, 0.50, 1.20, 0.00, 778 , 611


In [39]:
to_predict

array([[140, 342, 0, 0,
        list([284, 286, 76, 35, 287, 37, 22, 60, 65, 80, 72, 66, 200, 58, 7, 40, 11, 148, 36, 21])],
       [378, 172, 1, 0,
        list([284, 286, 76, 35, 287, 37, 22, 60, 65, 80, 72, 66, 200, 58, 7, 40, 11, 148, 36, 21])],
       [150, 182, 0, 0,
        list([284, 286, 76, 35, 287, 37, 22, 60, 65, 80, 72, 66, 200, 58, 7, 40, 11, 148, 36, 21])],
       ...,
       [196, 43, 0, 0,
        list([284, 286, 76, 35, 287, 37, 22, 60, 65, 80, 72, 66, 200, 58, 7, 40, 11, 148, 36, 21])],
       [84, 100, 0, 0,
        list([284, 286, 76, 35, 287, 37, 22, 60, 65, 80, 72, 66, 200, 58, 7, 40, 11, 148, 36, 21])],
       [48, 75, 0, 0,
        list([284, 286, 76, 35, 287, 37, 22, 60, 65, 80, 72, 66, 200, 58, 7, 40, 11, 148, 36, 21])]],
      dtype=object)

In [40]:
for row in range(len(to_predict)):
    to_predict[row, 3] = 0
    if to_predict[row, 1] != "":
        to_predict[row, 3] = int(to_predict[row, 1] in to_predict[row, 4])

In [41]:
sum(to_predict[:,3])

753

In [42]:
m = to_predict[to_predict[:, 1] == ""][:, [0,4]]
m

array([[166,
        list([284, 286, 76, 35, 287, 37, 22, 60, 65, 80, 72, 66, 200, 58, 7, 40, 11, 148, 36, 21])],
       [26,
        list([284, 286, 76, 35, 287, 37, 22, 60, 65, 80, 72, 66, 200, 58, 7, 40, 11, 148, 36, 21])],
       [41,
        list([284, 286, 76, 35, 287, 37, 22, 60, 65, 80, 72, 66, 200, 58, 7, 40, 11, 148, 36, 21])],
       [286,
        list([284, 286, 76, 35, 287, 37, 22, 60, 65, 80, 72, 66, 200, 58, 7, 40, 11, 148, 36, 21])],
       [108,
        list([284, 286, 76, 35, 287, 37, 22, 60, 65, 80, 72, 66, 200, 58, 7, 40, 11, 148, 36, 21])],
       [479,
        list([284, 286, 76, 35, 287, 37, 22, 60, 65, 80, 72, 66, 200, 58, 7, 40, 11, 148, 36, 21])],
       [483,
        list([284, 286, 76, 35, 287, 37, 22, 60, 65, 80, 72, 66, 200, 58, 7, 40, 11, 148, 36, 21])],
       [355,
        list([284, 286, 76, 35, 287, 37, 22, 60, 65, 80, 72, 66, 200, 58, 7, 40, 11, 148, 36, 21])],
       [297,
        list([284, 286, 76, 35, 287, 37, 22, 60, 65, 80, 72, 66, 200, 58, 7, 

In [43]:
predictions = np.zeros(shape = (len(test), 21), dtype = int)
predictions[:, 0] = m[:, 0]
for x in range(len(predictions)):
    predictions[x][1:] = m[x][1]

In [44]:
predictions

array([[166, 284, 286, ..., 148,  36,  21],
       [ 26, 284, 286, ..., 148,  36,  21],
       [ 41, 284, 286, ..., 148,  36,  21],
       ...,
       [448, 284, 286, ..., 148,  36,  21],
       [124, 284, 286, ..., 148,  36,  21],
       [167, 284, 286, ..., 148,  36,  21]])

In [45]:
preds = pd.DataFrame(predictions, columns = ['user_id']+list(range(20)))
preds

Unnamed: 0,user_id,0,1,2,3,4,5,6,7,8,...,10,11,12,13,14,15,16,17,18,19
0,166,284,286,76,35,287,37,22,60,65,...,72,66,200,58,7,40,11,148,36,21
1,26,284,286,76,35,287,37,22,60,65,...,72,66,200,58,7,40,11,148,36,21
2,41,284,286,76,35,287,37,22,60,65,...,72,66,200,58,7,40,11,148,36,21
3,286,284,286,76,35,287,37,22,60,65,...,72,66,200,58,7,40,11,148,36,21
4,108,284,286,76,35,287,37,22,60,65,...,72,66,200,58,7,40,11,148,36,21
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
492,190,284,286,76,35,287,37,22,60,65,...,72,66,200,58,7,40,11,148,36,21
493,181,284,286,76,35,287,37,22,60,65,...,72,66,200,58,7,40,11,148,36,21
494,448,284,286,76,35,287,37,22,60,65,...,72,66,200,58,7,40,11,148,36,21
495,124,284,286,76,35,287,37,22,60,65,...,72,66,200,58,7,40,11,148,36,21


In [46]:
preds.to_csv("predictions.csv", index =False)

# search

In [None]:
from sklearn.preprocessing import normalize

In [81]:
matrix_user_features

array([[ 0.00069502, -0.00157307, -0.00146987, ..., -0.00116813,
        -0.00116813,  0.00029734],
       [ 0.00120381, -0.00272464, -0.00254588, ..., -0.00202326,
        -0.00202326,  0.00051502],
       [ 0.00049145, -0.00111233, -0.00103935, ..., -0.00082599,
        -0.00082599,  0.00021025],
       ...,
       [ 0.00049145, -0.00111233, -0.00103935, ..., -0.00082599,
        -0.00082599,  0.00021025],
       [ 0.0009829 , -0.00222466, -0.00207871, ..., -0.00165198,
        -0.00165198,  0.00042051],
       [ 0.00109892, -0.00248725, -0.00232406, ..., -0.00184697,
        -0.00184697,  0.00047014]])

In [19]:
UF = matrix_user_features.copy()
IF = matrix_item_features.copy()

In [47]:
seen = [284, 286]
ps = get_predictions([(UF[user_id]).dot(IF.T)], 20+len(seen))[0].tolist()
print(ps)

i=0
for x in seen:
    if x in ps: 
        ps.pop(i)
        i = i-1
    i = i+1
print(ps)

[284, 286, 76, 35, 287, 37, 22, 60, 65, 80, 72, 66, 200, 58, 7, 40, 11, 148, 36, 21, 5, 59]
[76, 35, 287, 37, 22, 60, 65, 80, 72, 66, 200, 58, 7, 40, 11, 148, 36, 21, 5, 59]


In [53]:
user_id

58

In [61]:
from tqdm.notebook import tqdm
scores = []
for w0 in tqdm(np.linspace(0.5, 1.0, 3)):
    for w1 in np.linspace(w0, 1.25, int((1.25-w0)/0.25)+1):
        for w2 in np.linspace(0.6, 1.2, 4):
            for w3 in np.linspace(0.2, 1, 3):
                UF = matrix_user_features.copy()
                IF = matrix_item_features.copy()
                score = 0
                like_score = 0

                for row in range(len(logs)):
                    user_id = logs.iat[row, 0]
                    item_id = logs.iat[row, 1]
                    like_value = logs.iat[row, 2]
                    time = logs.iat[row, 3]
                    
                    seen =  ((logs[logs['timestamp'] < pd.Timestamp(time)])[logs['user_id']==user_id])['item_id'].tolist()
                    ps = get_predictions([(UF[user_id]).dot(IF.T)], 20+len(seen))[0].tolist()

                    i=0
                    for x in seen:
                        if x in ps: 
                            ps.pop(i)
                            i = i-1
                        i = i+1
                    
                    if item_id in ps:
                        score += 1
                    if (item_id in ps) and (like_value==1):
                        like_score += 1
                    # new UF by user_id click on iten_id
                    if like_value == 1:
                        UF[user_id] = w0*UF[user_id] + w1*IF[item_id]/3
                    if like_value == 0:
                        UF[user_id] = w2*UF[user_id] + w3*IF[item_id]/3
                
                print( f'{w0:.2f}, {w1:.2f}, {w2:.2f}, {w3:.2f}, {score} , {like_score}')
                scores.append((like_score, score, w0 , w1, w2, w3))

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))



0.50, 0.50, 0.60, 0.20, 994 , 733
0.50, 0.50, 0.60, 0.60, 993 , 733
0.50, 0.50, 0.60, 1.00, 992 , 733
0.50, 0.50, 0.80, 0.20, 994 , 733
0.50, 0.50, 0.80, 0.60, 995 , 733
0.50, 0.50, 0.80, 1.00, 995 , 733
0.50, 0.50, 1.00, 0.20, 993 , 733
0.50, 0.50, 1.00, 0.60, 998 , 733
0.50, 0.50, 1.00, 1.00, 997 , 733
0.50, 0.50, 1.20, 0.20, 992 , 733
0.50, 0.50, 1.20, 0.60, 993 , 733
0.50, 0.50, 1.20, 1.00, 994 , 733
0.50, 0.75, 0.60, 0.20, 992 , 733
0.50, 0.75, 0.60, 0.60, 995 , 733
0.50, 0.75, 0.60, 1.00, 991 , 732
0.50, 0.75, 0.80, 0.20, 995 , 732
0.50, 0.75, 0.80, 0.60, 995 , 733
0.50, 0.75, 0.80, 1.00, 994 , 733



KeyboardInterrupt: 

In [64]:
sorted(scores, key = lambda x : -x[0])[:200]

[(758, 0.7, 0.5, 1.1, 0.5),
 (758, 0.7, 0.5, 1.1, 0.6000000000000001),
 (758, 0.7, 0.5, 1.1, 0.7),
 (758, 0.7, 0.5, 1.1, 0.8),
 (758, 0.7, 0.5, 1.1, 0.9000000000000001),
 (758, 0.7, 0.5, 1.1, 1.0),
 (758, 0.7, 0.5, 1.3, 0.2),
 (758, 0.7, 0.5, 1.3, 0.30000000000000004),
 (758, 0.7, 0.5, 1.3, 0.4),
 (758, 0.7, 0.5, 1.3, 0.5),
 (758, 0.7, 0.5, 1.3, 0.6000000000000001),
 (758, 0.7, 0.5, 1.3, 0.7),
 (758, 0.7, 0.5, 1.3, 0.8),
 (758, 0.7, 0.5, 1.3, 0.9000000000000001),
 (758, 0.7, 0.5, 1.3, 1.0),
 (758, 0.7, 0.5, 1.5, 0.2),
 (758, 0.7, 0.5, 1.5, 0.30000000000000004),
 (758, 0.7, 0.5, 1.5, 0.4),
 (758, 0.7, 0.5, 1.5, 0.5),
 (758, 0.7, 0.5, 1.5, 0.6000000000000001),
 (758, 0.7, 0.5, 1.5, 0.7),
 (758, 0.7, 0.5, 1.5, 0.8),
 (758, 0.7, 0.5, 1.5, 0.9000000000000001),
 (758, 0.7, 0.5, 1.5, 1.0),
 (758, 0.7, 0.7, 0.5, 0.2),
 (758, 0.7, 0.7, 0.5, 0.30000000000000004),
 (758, 0.7, 0.7, 0.5, 0.4),
 (758, 0.7, 0.7, 0.5, 0.5),
 (758, 0.7, 0.7, 0.5, 0.6000000000000001),
 (758, 0.7, 0.7, 0.5, 0.7),
 (758, 

In [57]:
sorted(list(filter(lambda x : x[2]==0.2, scores)), key = lambda x : -x[0])

[(754, 0.30000000000000004, 0.2, 1.0, 0.2),
 (753, 0.2, 0.2, 1.0, 1.0),
 (753, 0.2, 0.2, 1.0, 1.1),
 (753, 0.2, 0.2, 1.0, 1.2),
 (753, 0.2, 0.2, 1.0, 1.3),
 (753, 0.2, 0.2, 1.0, 1.4000000000000001),
 (753, 0.2, 0.2, 1.0, 1.5),
 (753, 0.30000000000000004, 0.2, 1.0, 0.4),
 (753, 0.30000000000000004, 0.2, 1.0, 0.5),
 (753, 0.30000000000000004, 0.2, 1.0, 0.6000000000000001),
 (753, 0.30000000000000004, 0.2, 1.0, 0.7),
 (753, 0.30000000000000004, 0.2, 1.0, 0.8),
 (753, 0.30000000000000004, 0.2, 1.0, 0.9000000000000001),
 (753, 0.30000000000000004, 0.2, 1.0, 1.0),
 (753, 0.30000000000000004, 0.2, 1.0, 1.1),
 (753, 0.30000000000000004, 0.2, 1.0, 1.2),
 (753, 0.30000000000000004, 0.2, 1.0, 1.3),
 (753, 0.30000000000000004, 0.2, 1.0, 1.4000000000000001),
 (753, 0.30000000000000004, 0.2, 1.0, 1.5),
 (752, 0.2, 0.2, 1.0, 0.7),
 (752, 0.2, 0.2, 1.0, 0.8),
 (752, 0.2, 0.2, 1.0, 0.9000000000000001),
 (752, 0.30000000000000004, 0.2, 1.0, 0.30000000000000004),
 (751, 0.2, 0.2, 1.0, 0.2),
 (751, 0.2, 0

In [152]:
matrix_item_features = item_features[item_features.columns[1:]].values
matrix_user_features = user_features[user_features.columns[1:]].values

In [153]:
UF = matrix_user_features
IF = matrix_item_features
UI = UF.dot(IF.T)

In [169]:
get_predictions([UI[0]])

array([[284, 286,  76,  35, 287,  37,  22,  60,  65,  80,  72,  66, 200,
         58,   7,  40,  11, 148,  36,  21]])

In [138]:
np.max(UI)

0.021655600303483808