# Movie recommendations with TensorFlow

In this notebook, we are going to implement a factorization machine and train it on movie ratings.  
The code is partly taken from [Factorization Machines with Tensorflow](http://nowave.it/factorization-machines-with-tensorflow.html) by Gabriele Modena.

## Setup

In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
import os
import sys

module_path = os.path.abspath("../../scripts")
sys.path.append(module_path)
import recommendation_helper as rh

In [None]:
seed = 99
training_epochs = 200
k = 5
learning_rate = 0.05

# Constants which control the impact of the regularization terms
lambda_w = tf.constant(0.001, name='lambda_w')
lambda_V = tf.constant(0.001, name='lambda_V')

In [None]:
ignored_columns = ["movieId", "userId", "title", "year_bucket", "rating"]

Load the data and generate a train and a test set.

In [None]:
def load_data():
    """Loads data and splits into train and test set."""
    data = pd.read_csv(tf.gfile.Open("../../data/05/movielens.csv"), sep=";")

    np.random.seed(seed)
    msk = np.random.rand(len(data)) < 0.9     # split into 90% train and 10% test data
    return data.loc[msk], data.loc[~msk]

Remove columns which are not used for training, e.g. movie and user ID which are only used for generating recommendations later on.

In [None]:
def get_input_data(input):
    return input.drop([col for col in input.columns if col.startswith(tuple(ignored_columns))], axis=1)

In [None]:
df_train, df_test = load_data()
print("Set contains %i training samples and %i test samples." % (len(df_train), len(df_test)))

train = get_input_data(df_train)
test = get_input_data(df_test)
df_train.sample(5)

## Model implementation

First, define the placeholders and variables needed for the model.

In [None]:
_, p = train.shape

X = tf.placeholder('float', shape=[None, p], name="X")
y = tf.placeholder('float', shape=[None, 1], name="y")

# interaction factors, randomly initialized
V = tf.Variable(tf.random_normal([k, p], stddev=0.01), name="V")

# bias and weights
w0 = tf.Variable(tf.zeros([1]), name="w0")
w = tf.Variable(tf.zeros([p]), name="w")

The factorization machine computes the following function:  
$$\hat{y}(x) = w_0 + \sum_{i=1}^{p} w_i \cdot X_i + \frac{1}{2} \cdot \sum_{i=1}^{k} (X_k V^T_k)^2 - (X_k^2 (V_k^2)^T)$$

In [None]:
def build_model():
    # linear_terms = w0 + Σ_{i=1}^{p} w_i * X_i
    linear_terms = tf.add(w0, tf.reduce_sum(tf.multiply(w, X), 1, keep_dims=True))
    
    # interactions = 0.5 * Σ_{i=1}^{k} (X_k * V^T_k)^2 - (X_k^2 * (V_k^2)^T)
    interactions = tf.multiply(0.5, tf.reduce_sum(
                       tf.subtract(tf.square(tf.matmul(X, tf.transpose(V))),
                                             tf.matmul(tf.square(X), tf.transpose(tf.square(V)))),
                       1, keep_dims=True))
    
    # Final step: add linear and interaction terms
    y_hat = tf.add(linear_terms, interactions, name="prediction")
    return y_hat

We optimize the model using the Mean Squared Error with L2-regularization over the weights and latent factors as loss function.

In [None]:
def create_optimizer(y_hat):
    # Mean Squared Error: 1/n Σ(y - y_hat)^2
    mse = tf.reduce_mean(tf.square(tf.subtract(y, y_hat)))
    
    # L2 regularization over W and V: Σ(λ_w * W^2 + λ_V * V^2)
    l2_norm = tf.reduce_sum(tf.add(tf.multiply(lambda_w, tf.square(w)),
                                   tf.multiply(lambda_V, tf.square(V))))

    # Total loss used for optimization
    loss = tf.add(mse, l2_norm)
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
    return optimizer, mse, loss

In [None]:
def create_metrics(y_hat):
    # Mean Absolute Error |y - y_hat|
    mae = tf.reduce_mean(tf.abs(y - y_hat))
    
    # Root Mean Squared Error √(1/n Σ(y - y_hat)^2)
    rmse = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(y, y_hat))))
    return mae, rmse

## Train and evaluate the model

In [None]:
sess = tf.InteractiveSession()

In [None]:
def train_and_evaluate():
    # Set up the model, optimizer and metrics
    y_hat = build_model()
    optimizer, mse, loss = create_optimizer(y_hat)
    mae, rmse = create_metrics(y_hat)

    # IMPORTANT: initialize variables before computing anything else
    init = tf.global_variables_initializer()
    sess.run(init)
    
    # Training loop
    for epoch in range(training_epochs):
        # Shuffle the training data
        indices = np.array(train.index)
        np.random.shuffle(indices)

        # Train the model
        x_data, y_data = train.loc[indices], pd.DataFrame({"rating": df_train.rating.loc[indices]})
        sess.run(optimizer, feed_dict={X: x_data, y: y_data})
        if epoch % 10 == 9:
            error, cost, mean_abs, root_mean = sess.run([mse, loss, mae, rmse], feed_dict={X: x_data, y: y_data})
            print("Epoch %i\tLoss (regularized error): %.3f\tMSE: %.3f\tRMSE: %.3f\tMAE: %.3f" 
                  % ((epoch + 1), cost, error, root_mean, mean_abs))

    print('MSE Test:', sess.run(mse, feed_dict={X: test, y: pd.DataFrame({"rating": df_test.rating})}))
    print('RMSE Test:', sess.run(rmse, feed_dict={X: test, y: pd.DataFrame({"rating": df_test.rating})}))
    print('MAE Test:', sess.run(mae, feed_dict={X: test, y: pd.DataFrame({"rating": df_test.rating})}))


In [None]:
train_and_evaluate()

## Get recommendations

Now that we've trained our model, we can use it to recommend movies to users.

For this, we need the nodes in the session graph which take the input and compute the estimates for the user's ratings.

In [None]:
# Get the necessary nodes from the model graph
prediction = sess.graph.get_tensor_by_name("prediction:0")
X = sess.graph.get_tensor_by_name("X:0")

Compute the first *n* recommendations for a user:

In [None]:
def get_n_recommendations(user_id, n):
    input = rh.get_user_data(user_id)
    recommendations = rh.movies[["title", "movieId"]]
    
    # Here, we use the nodes which we extracted before
    recommendations["rating"] = sess.run(prediction, feed_dict={X: input})
    return recommendations[["title", "movieId", "rating"]].sort_values("rating", ascending=False).head(n)

In [None]:
get_n_recommendations(1, 25)

 **Optional tasks**:
* Experiment with the model parameters. Try, e.g., different values for *k* and different learning rates.
* Try training the model with no/less/more regularization. (Hint: vary the values for $\lambda_V$ and $\lambda_w$).
* Try different optimization algorithms (see [TensorFlow Optimizers](https://www.tensorflow.org/api_guides/python/train#Optimizers) for an overview of optimizers).
* Compare the recommendations for different users.