In [None]:
import pandas as pd
import numpy as np
from sklearn.utils.extmath import randomized_svd
from sklearn.preprocessing import LabelEncoder

#получаем набор данных
products_df = pd.read_csv('products.csv')
transactions_df =  pd.read_csv('transactions.csv')

#вычисляем процент каждого товара от общего числа, купленных каждым юзером
user_product = transactions_df.groupby(['user_id', 'product_id'])['product_id'].count()
user = transactions_df.groupby(['user_id'])['product_id'].count()
g = pd.DataFrame(user_product.div(user, level='user_id')).add_suffix('_count').reset_index()

#присваиваем каждому юзеру и товару порядковые номера
le = LabelEncoder()
g['user_id_num'] = le.fit_transform(g['user_id'])
g['product_id_num'] = le.fit_transform(g['product_id'])


In [None]:
g[['user_id', 'user_id_num']].drop_duplicates().to_csv('users_meta.csv')
g[['product_id', 'product_id_num']].drop_duplicates().to_csv('products_meta.csv')

In [None]:
from keras.models import Model, Sequential
from keras.layers import Embedding, Flatten, Input, concatenate, Dropout, Dense, dot, BatchNormalization, Add
from keras.optimizers import Adam
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
import pydot

#строим модель NCF (neural collaborative filtering) paper: https://arxiv.org/pdf/1708.05031.pdf

num_users = len(g.user_id_num.unique())
num_product = len(g.product_id_num.unique())

latent_dim = 10

product_input = Input(shape=[1],name='product-input')
user_input = Input(shape=[1], name='user-input')

product_embedding_mlp = Embedding(num_product + 1, latent_dim, name='product-embedding-mlp')(product_input)
product_vec_mlp = Flatten(name='flatten-product-mlp')(product_embedding_mlp)

user_embedding_mlp = Embedding(num_users + 1, latent_dim, name='user-embedding-mlp')(user_input)
user_vec_mlp = Flatten(name='flatten-user-mlp')(user_embedding_mlp)

product_embedding_mf = Embedding(num_product + 1, latent_dim, name='product-embedding-mf')(product_input)
product_vec_mf = Flatten(name='flatten-product-mf')(product_embedding_mf)

user_embedding_mf = Embedding(num_users + 1, latent_dim, name='user-embedding-mf')(user_input)
user_vec_mf = Flatten(name='flatten-user-mf')(user_embedding_mf)

concat = concatenate([product_vec_mlp, user_vec_mlp], name='concat')
concat_dropout = Dropout(0.2)(concat)
fc_1 = Dense(100, name='fc-1', activation='relu')(concat_dropout)
fc_1_bn = BatchNormalization(name='batch-norm-1')(fc_1)
fc_1_dropout = Dropout(0.2)(fc_1_bn)
fc_2 = Dense(50, name='fc-2', activation='relu')(fc_1_dropout)
fc_2_bn = BatchNormalization(name='batch-norm-2')(fc_2)
fc_2_dropout = Dropout(0.2)(fc_2_bn)

pred_mlp = Dense(10, name='pred-mlp', activation='relu')(fc_2_dropout)
pred_mf = dot([product_vec_mf, user_vec_mf], normalize = False, axes = -1, name='pred-mf')
combine_mlp_mf = concatenate([pred_mf, pred_mlp], name='combine-mlp-mf')

# Final prediction
result = Dense(1, name='result', activation='relu')(combine_mlp_mf)

model = Model([user_input, product_input], result)
model.compile(optimizer=Adam(lr=0.01), loss='mean_absolute_error')
model.summary()

In [None]:
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error

X_train, X_test = train_test_split(g,test_size=0.4,random_state=42)

history = model.fit([X_train.user_id_num, X_train.product_id_num], X_train.product_id_count, epochs=1)

y_hat = np.round(model.predict([X_test.user_id_num, X_test.product_id_num]), decimals=2)
y_true = X_test.product_id_count
mean_absolute_error(y_true, y_hat)

In [None]:
model.save('ncf.h5')

In [None]:
from keras import backend as K
from keras.models import load_model
import pandas as pd

model = load_model('ncf.h5')
