In [13]:
import pandas as pd
import numpy as np
from copy import deepcopy
from scipy.spatial.distance import pdist, squareform
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Embedding, Dense, Dropout, Flatten, dot, Lambda
# https://towardsdatascience.com/recommender-systems-from-learned-embeddings-f1d12288f278

In [2]:
train_path = '../data/ml-100k/u1.base'
test_path = '../data/ml-100k/u1.test'

df = pd.read_csv(train_path, delimiter = '\t', names = ['userid', 'itemid', 'rating', 'timestamp'])
df.head()

Unnamed: 0,userid,itemid,rating,timestamp
0,1,1,5,874965758
1,1,2,3,876893171
2,1,3,4,878542960
3,1,4,3,876893119
4,1,5,3,889751712


In [35]:
num_unique_user = max(df.userid.unique())
num_unique_item = max(df.itemid.unique())

df = df.sample(frac = 1)

train_data = df[:int(df.shape[0]*0.8)]
test_data = df[int(df.shape[0]*0.8):]

In [61]:
embedding_dim = 16

model_user = Sequential()
model_user.add(Input(shape = (1,),name = 'user'))
model_user.add(Embedding(
    input_dim = num_unique_user+1,
    output_dim = embedding_dim,
    input_length = train_data.shape[0]
))
model_user.add(Flatten())

model_item = Sequential()
model_item.add(Input(shape = (1,),name = 'item'))
model_item.add(Embedding(
    input_dim = num_unique_item+1,
    output_dim = embedding_dim,
    input_length = train_data.shape[0]
))
model_item.add(Flatten())

merge_model = dot([model_user.output, model_item.output], axes = 1, normalize = True)
dense1 = Dense(32, activation='relu')(merge_model)
drop1 = Dropout(0.2)(dense1)
dense2 = Dense(8, activation='relu')(drop1)
drop2 = Dropout(0.2)(dense2)
output_layer = Dense(1, activation='relu')(drop2)
# output_layer = Dense(1, activation='softmax')(drop2)
# output_layer = Lambda(lambda x: x*4+1)(output_layer1)

model = Model([model_user.input, model_item.input], output_layer)


opt = tf.keras.optimizers.Adam(learning_rate = 0.0001,amsgrad = True)

model.compile(optimizer=opt,loss = 'mean_squared_error')


In [62]:
model.fit([train_data.userid, train_data.itemid], train_data.rating, batch_size = 10, epochs = 10)
#          validation_data = ([test_data.userid, test_data.itemid],test_data.rating))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1ed0da880f0>

In [63]:
pred = model.predict([test_data.userid, test_data.itemid])

In [58]:
np.mean(np.sqrt(pred.flatten() - test_data.rating))

  result = getattr(ufunc, method)(*inputs, **kwargs)


0.8853245285444075

In [65]:
pred.mean()

1.0

In [43]:
model.summary()

Model: "model_4"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
user (InputLayer)               [(None, 1)]          0                                            
__________________________________________________________________________________________________
item (InputLayer)               [(None, 1)]          0                                            
__________________________________________________________________________________________________
embedding_14 (Embedding)        (None, 1, 16)        15104       user[0][0]                       
__________________________________________________________________________________________________
embedding_15 (Embedding)        (None, 1, 16)        26928       item[0][0]                       
____________________________________________________________________________________________