# Recommendation Systems with TensorFlow

NOTE: This example does not work. The code is still using the legacy implementation.


https://colab.research.google.com/github/google/eng-edu/blob/main/ml/recommendation-systems/recommendation-systems.ipynb?utm_source=ss-recommendation-systems&utm_campaign=colab-external&utm_medium=referral&utm_content=recommendation-systems#scrollTo=6NHoOwido4tk

In [26]:
import collections

import matplotlib.pyplot as plt
import tensorflow as tf

from movie_helper.rating import load_ratings

In [27]:
ratings = load_ratings()
ratings.head()

Unnamed: 0,user_id,item_id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [28]:
ratings["user_id"] -= 1
ratings["item_id"] -= 1

In [29]:
ratings.head()

Unnamed: 0,user_id,item_id,rating,timestamp
0,195,241,3,881250949
1,185,301,3,891717742
2,21,376,1,878887116
3,243,50,2,880606923
4,165,345,1,886397596


In [30]:
(n_user,) = ratings["user_id"].unique().shape
(n_movie,) = ratings["item_id"].unique().shape

In [31]:
def build_rating_sparse_tensor(ratings_df):
    indices = ratings_df[["user_id", "item_id"]].values
    values = ratings_df["rating"].values

    return tf.SparseTensor(
        indices=indices, values=values, dense_shape=[n_user, n_movie]
    )

In [118]:
def sparse_mean_square_error(sparse_ratings, user_embeddings, movie_embeddings):
    predictions = tf.gather_nd(
        tf.matmul(user_embeddings, movie_embeddings, transpose_b=True),
        sparse_ratings.indices,
    )
    loss = tf.losses.mean_squared_error(sparse_ratings.values, predictions)
    return loss

In [290]:
def sparse_mean_square_error(sparse_ratings, user_embeddings, movie_embeddings):
    predictions = tf.reduce_sum(
        tf.gather(user_embeddings, sparse_ratings.indices[:, 0])
        * tf.gather(movie_embeddings, sparse_ratings.indices[:, 1]),
        axis=1,
    )
    loss_fn = tf.keras.losses.MeanSquaredError()
    loss = loss_fn(y_true=sparse_ratings.values, y_pred=predictions)
    return loss

In [406]:
class CFModel(object):
    """Simple class that represents a collaborative filtering model"""

    def __init__(self, embedding_vars, loss, metrics=None):
        """Initializes a CFModel.
        Args:
          embedding_vars: A dictionary of tf.Variables.
          loss: A float Tensor. The loss to optimize.
          metrics: optional list of dictionaries of Tensors. The metrics in each
            dictionary will be plotted in a separate figure during training.
        """
        self._embedding_vars = embedding_vars
        self._loss = loss
        self._metrics = metrics
        self._embeddings = {k: None for k in embedding_vars}

    @property
    def embeddings(self):
        """The embeddings dictionary."""
        return self._embeddings

    def train(
        self,
        num_iterations=100,
        learning_rate=1.0,
        plot_results=True,
        optimizer=tf.keras.optimizers.SGD,
    ):
        """Trains the model.
        Args:
          iterations: number of iterations to run.
          learning_rate: optimizer learning rate.
          plot_results: whether to plot the results at the end of training.
          optimizer: the optimizer to use. Default to SGD.
        Returns:
          The metrics dictionary evaluated at the last iteration.
        """
        opt = optimizer(learning_rate)

        iterations = []
        metrics = self._metrics or ({},)
        metrics_vals = [collections.defaultdict(list) for _ in self._metrics]

        # Train and append results.
        for i in range(num_iterations + 1):

            with tf.GradientTape() as tape:
                train_op = opt.minimize(
                    self._loss, var_list=self._metrics, tape=tape
                )
            train_op()
            results = {name: metric.result() for name, metric in metrics.items()}
            if (i % 10 == 0) or i == num_iterations:
                print(
                    "\r iteration %d: " % i
                    + ", ".join(
                        ["%s=%f" % (k, v) for r in results for k, v in r.items()]
                    ),
                    end="",
                )
                iterations.append(i)
                for metric_val, result in zip(metrics_vals, results):
                    for k, v in result.items():
                        metric_val[k].append(v)

        for k, v in self._embedding_vars.items():
            self._embeddings[k] = v.numpy()

        if plot_results:
            # Plot the metrics.
            num_subplots = len(metrics) + 1
            fig = plt.figure()
            fig.set_size_inches(num_subplots * 10, 8)
            for i, metric_vals in enumerate(metrics_vals):
                ax = fig.add_subplot(1, num_subplots, i + 1)
                for k, v in metric_vals.items():
                    ax.plot(iterations, v, label=k)
                ax.set_xlim([1, num_iterations])
                ax.legend()
        return results

In [407]:
def split_dataframe(df, holdout_fraction=0.1):
    """Splits a DataFrame into training and test sets.
    Args:
      df: a dataframe.
      holdout_fraction: fraction of dataframe rows to use in the test set.
    Returns:
      train: dataframe for training
      test: dataframe for testing
    """
    test = df.sample(frac=holdout_fraction, replace=False)
    train = df[~df.index.isin(test.index)]
    return train, test

In [408]:
def build_model(ratings, embedding_dim=3, init_stddev=1.0):
    """
    Args:
      ratings: a DataFrame of the ratings
      embedding_dim: the dimension of the embedding vectors.
      init_stddev: float, the standard deviation of the random initial embeddings.
    Returns:
      model: a CFModel.
    """
    # Split the ratings DataFrame into train and test.
    train_ratings, test_ratings = split_dataframe(ratings)
    # SparseTensor representation of the train and test datasets.
    A_train = build_rating_sparse_tensor(train_ratings)
    A_test = build_rating_sparse_tensor(test_ratings)
    # Initialize the embeddings using a normal distribution.
    U = tf.Variable(
        tf.random.normal([A_train.dense_shape[0], embedding_dim], stddev=init_stddev)
    )
    V = tf.Variable(
        tf.random.normal([A_train.dense_shape[1], embedding_dim], stddev=init_stddev)
    )
    train_loss = sparse_mean_square_error(A_train, U, V)
    test_loss = sparse_mean_square_error(A_test, U, V)
    metrics = {"train_error": train_loss, "test_error": test_loss}
    embeddings = {"user_id": U, "movie_id": V}
    print("train_loss", train_loss)
    print("test_loss", test_loss)
    return CFModel(embeddings, train_loss, [metrics])

In [409]:
# Build the CF model and train it.
model = build_model(ratings, embedding_dim=30, init_stddev=0.5)
model.train(num_iterations=1000, learning_rate=10.0)

train_loss tf.Tensor(15.585228, shape=(), dtype=float32)
test_loss tf.Tensor(15.7773285, shape=(), dtype=float32)


ValueError: Attempt to convert a value (None) with an unsupported type (<class 'NoneType'>) to a Tensor.