-
Notifications
You must be signed in to change notification settings - Fork 1
/
02.MF_nn.py
103 lines (74 loc) · 2.61 KB
/
02.MF_nn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import pandas as pd
import numpy as np
import matplotlib.pyplot import plt
from sklearn.utils import shuffle
from keras.models import Model
from keras.layers import Input, Embedding, Dot, Add, Flatten
dir = '../'
# Load the data
rt = pd.read_csv(dir + 'data/rating_2.csv')
N = df.userId.max() + 1
M = df.movie_idx.max() + 1
print("The number of Users : ", N)
print("The number of Movies : ", M)
N = df.userId.max() + 1
M = df.movie_idx.max() + 1
print("The number of Users : ", N)
print("The number of Movies : ", M)
# Splitting the data into train and test set
df = shuffle(df)
cut = int(0.8*len(df))
tr = df.iloc[:cut]
te = df.iloc[cut:]
X_tr = tr[['userId', 'movie_idx']]
mu = tr.rating.mean()
y_tr = tr.rating - mu
X_te = te[['userId', 'movie_idx']]
y_te = te.rating - mu
# Modeling
K = 10 # latent Dimensionality
reg = 0. # regularization penalty
epochs = 10
# Input layer
u = Input(shape = (1, ))
m = Input(shape = (1, ))
# Embedding Layer
u_embedding = Embedding(input_dim= N, output_dim= K, embeddings_regularizer=l2(reg))(u) # (N, 1, K)
m_embedding = Embedding(input_dim= M, output_dim= K, embeddings_regularizer=l2(reg))(m) # (M, 1, K)
x = Dot(axes = 2)([u_embedding, m_embedding]) # (N, 1, 1)
# Dimensionality check
#submodel = Model([u, m], [u_embedding, m_embedding])
#user_ids = tr.userId.values[:5]
#movie_ids = tr.movie_idx.values[:5]
#print("input shape : ", user_ids.shape)
#p = submodel.predict([user_ids, movie_idx])
#print("output shape: ", p.shape)
#submodel = Model(inputs = [u, m], outputs = x)
#user_ids = tr.userId.values[:5]
#movie_ids = tr.movie_idx.values[:5]
#p = submodel.predict([user_ids, movie_idx])
#print("output shape: ", p.shape)
# Bias layer
u_bias = Embedding(input_dim= N, output_dim= 1, embeddings_regularizer=l2(reg))(u) # (N, 1, 1)
m_bias = Embedding(input_dim= M, output_dim= 1, embeddings_regularizer=l2(reg))(m) # (M, 1, 1)
x = Add()([x, u_bias, m_bias])
x = Flatten()(x)
model = Model(inputs = [u, m], outputs = x)
model.compile(optimizer= SGD(lr = .01, momentum = .9),
loss = 'mse',
metrics = ['mse']) # due to regularization terms
# Train the model
r = model.fit(X_tr, y_tr,
epochs=epochs,
batch_size=128,
validation_data= [X_te, y_te])
# plot losses
plt.plot(r.history['loss'], label="train loss")
plt.plot(r.history['val_loss'], label="test loss")
plt.legend()
plt.show()
# plot mse
plt.plot(r.history['mean_squared_error'], label="train mse")
plt.plot(r.history['val_mean_squared_error'], label="test mse")
plt.legend()
plt.show()