In [1]:
import sys
import pandas as pd
import tensorflow as tf
tf.get_logger().setLevel('ERROR')

from recommenders.utils.timer import Timer
from recommenders.models.ncf.ncf_singlenode import NCF
from recommenders.models.ncf.dataset import Dataset as NCFDataset
from recommenders.datasets import movielens
from recommenders.utils.notebook_utils import is_jupyter
from recommenders.datasets.python_splitters import python_chrono_split
from recommenders.evaluation.python_evaluation import (rmse, mae, rsquared, exp_var, map_at_k, ndcg_at_k, precision_at_k, 
                                                     recall_at_k, get_top_k_items)
from sklearn import preprocessing
print("System version: {}".format(sys.version))
print("Pandas version: {}".format(pd.__version__))
print("Tensorflow version: {}".format(tf.__version__))

System version: 3.6.5 |Anaconda, Inc.| (default, Mar 29 2018, 13:32:41) [MSC v.1900 64 bit (AMD64)]
Pandas version: 1.1.5
Tensorflow version: 2.6.2


In [13]:
# top k items to recommend
TOP_K = 10

# Model parameters
EPOCHS = 20
BATCH_SIZE = 256
SEED = 42

In [8]:
df = pd.read_excel('forncfitems.xlsx', engine='openpyxl')

In [3]:
df = df[['userID','itemID','rating','timestamp']]

In [4]:
df = df.dropna()

In [9]:
df = df[df.claster == 4][['userID','itemID','rating','timestamp']]

In [10]:
df

Unnamed: 0,userID,itemID,rating,timestamp
0,32565748-000000000,Игровые мыши,1,2016-03-11 19:49:12
1,32565748-000000000,Электробритвы,1,2016-03-13 15:59:10
2,32565748-000000000,Автомобильные колонки,1,2016-03-15 19:48:43
3,32565748-000000000,Конструкторы LEGO,1,2016-03-23 16:13:42
4,32565748-000000000,Внешние аккумуляторы,1,2016-04-08 19:43:47
...,...,...,...,...
25554,55575757-57565548505473,Морозильники,1,2016-05-21 14:09:18
25555,56574954-55565757535585,Души и душевые системы,1,2016-04-12 12:29:00
25556,56574954-55565757535585,Смесители,1,2016-04-12 12:29:00
25557,56574954-55565757535585,Смесители,1,2016-04-12 12:29:00


In [14]:
data = NCFDataset(train=train, test=test, seed=SEED)

In [11]:
train, test = python_chrono_split(
    df, ratio=0.75, filter_by="user",
    col_user='userID', col_item='itemID', col_timestamp='timestamp')

In [15]:
model = NCF (
    n_users=data.n_users, 
    n_items=data.n_items,
    model_type="NeuMF",
    n_factors=4,
    layer_sizes=[16,8,4],
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=1e-3,
    verbose=10,
    seed=SEED
)



In [16]:
with Timer() as train_time:
    model.fit(data)

print("Took {} seconds for training.".format(train_time))

Took 7.0753 seconds for training.


In [32]:
with Timer() as test_time:
    users, items, preds = [], [], []
    item = list(train.itemID.unique())
    for user in train.userID.unique():
        user = [user] * len(item) 
        users.extend(user)
        items.extend(item)
        preds.extend(list(model.predict(user, item, is_list=True)))

    all_predictions = pd.DataFrame(data={"userID": users, "itemID":items, "prediction":preds})

    merged = pd.merge(train, all_predictions, on=["userID", "itemID"], how="outer")
    all_predictions = merged[merged.rating.isnull()].drop('rating', axis=1)

print("Took {} seconds for prediction.".format(test_time))

Took 1.2439 seconds for prediction.


In [33]:
eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.105363
NDCG:	0.430995
Precision@K:	0.452317
Recall@K:	0.259667


In [99]:
top5 = all_predictions.sort_values(['userID', 'prediction'], ascending=[True, False]).groupby('userID').head(5)

In [82]:
all_predictions

Unnamed: 0,userID,itemID,timestamp,prediction
3279,55574848-51545049555575,МФУ,NaT,0.260232
3280,55574848-51545049555575,Картриджи струйные,NaT,0.057745
3281,55574848-51545049555575,Гарнитуры для ПК,NaT,0.009036
3282,55574848-51545049555575,Мониторы,NaT,0.108584
3283,55574848-51545049555575,Планшеты на Android,NaT,0.093583
...,...,...,...,...
34912,55575757-55505350574972,Гири,NaT,0.005407
34913,55575757-55505350574972,Триммеры для бороды и усов,NaT,0.016545
34914,55575757-55505350574972,Автомобильные пылесосы,NaT,0.006711
34915,55575757-55505350574972,Аксессуары для посудомоечных машин,NaT,0.005917


In [104]:
top5[top5.userID == 3]

Unnamed: 0,userID,itemID,timestamp,prediction
21141,3,646,NaT,0.996701
21145,3,550,NaT,0.995977
21172,3,473,NaT,0.985903
21184,3,678,NaT,0.983658
21194,3,15,NaT,0.982345
21185,3,140,NaT,0.974568
21150,3,680,NaT,0.959223
21176,3,18,NaT,0.956776
21227,3,677,NaT,0.955642
21188,3,474,NaT,0.933262
