# 行列分解による推薦モデルの学習

In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds

from tensorflow.keras.layers import Embedding
from tensorflow.keras import Model

In [2]:
class MatrixFactorization(Model):
    def __init__(self, lamda=0.1):
        super(MatrixFactorization, self).__init__()
        self.lamda = lamda
        self.user_factors = Embedding(944, 32)
        self.item_factors = Embedding(1683, 32)
        self.user_biases = Embedding(944, 1)
        self.item_biases = Embedding(1683, 1)
        self.biases = Embedding(1, 1)
    
    def call(self, user_id, item_id):
        return tf.math.reduce_sum(self.user_factors(user_id) * self.item_factors(item_id) + self.lamda * (self.biases(0) + self.user_biases(user_id) + self.item_biases(item_id)), axis=1)

model = MatrixFactorization(lamda=0.1)

In [3]:
loss_object = tf.keras.losses.MeanSquaredError()

optimizer = tf.keras.optimizers.Adam()

In [4]:
train_mse = tf.keras.metrics.MeanSquaredError(name="train_mse")

test_mse = tf.keras.metrics.MeanSquaredError(name="test_mse")

In [5]:
@tf.function
def train_step(user_id, item_id, rating):
    with tf.GradientTape() as tape:
        predictions = model(user_id, item_id)
        loss = loss_object(rating, predictions)
        
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    
    train_mse(rating, predictions)

In [6]:
@tf.function
def test_step(user_id, item_id, rating):
    predictions = model(user_id, item_id)
    t_loss = loss_object(rating, predictions)

    test_mse(rating, predictions)

In [7]:
ds = tfds.load("movielens/100k-ratings", split=["train[:80%]", "train[-20%:]"], shuffle_files=True)
# ds = tf.data.experimental.make_csv_dataset("../data/movielens100k.csv", batch_size=8)

EPOCHS = 4
for epoch in range(EPOCHS):
    for batch in ds[0].padded_batch(32):
        train_step(tf.strings.to_number(batch["user_id"], out_type=tf.dtypes.int32), tf.strings.to_number(batch["movie_id"], out_type=tf.dtypes.int32), batch["user_rating"])

    for batch in ds[1].padded_batch(32):
        test_step(tf.strings.to_number(batch["user_id"], out_type=tf.dtypes.int32), tf.strings.to_number(batch["movie_id"], out_type=tf.dtypes.int32), batch["user_rating"])

    template = 'Epoch {}, Train RMSE: {}, Test RMSE: {}'
    print(template.format(epoch+1,
                          train_mse.result() ** 0.5,
                          test_mse.result() ** 0.5))

    train_mse.reset_states()
    test_mse.reset_states()

Epoch 1, Train RMSE: 1.682153582572937, Test RMSE: 0.9686179161071777
Epoch 2, Train RMSE: 0.9230291843414307, Test RMSE: 0.9266422986984253
Epoch 3, Train RMSE: 0.8664652109146118, Test RMSE: 0.9153723120689392
Epoch 4, Train RMSE: 0.8140984773635864, Test RMSE: 0.9142385125160217


In [8]:
for i in range(10):
    print(i)
    print(model(tf.constant([1]), tf.constant([i])).numpy())

0
[2.822658]
1
[4.1819167]
2
[3.332088]
3
[3.194069]
4
[3.6317716]
5
[2.9678195]
6
[4.49494]
7
[3.9677973]
8
[4.157263]
9
[4.5439568]


In [None]:
model.user_factors.get_weights()

In [None]:
model.user_biases.get_weights()

In [None]:
model.biases.get_weights()