# Recommend Movies

## About the Data

In this example, we will use the ratings.csv and the movies.csv files. 

In the ratings.csv, the file lists each movie that has been rated when by an user. We have the following columns: <br> <br>
**userId** - user <br>
**movieId** - movie <br>
**rating** - what number value a user gives to this particular movie <br>
**timestamp** - when this rating was given <br>

In the movies.csv file, we will have the descriptors of the movies. Here is the list of columns and what they represent: <br> <br> 
**movieId** - movie <br>
**title** - the title of the movie <br>
**genres** - one or more tags to describe the movie <br>


## Set up your environment

In [None]:
%pip install -r requirements.txt

## Download Data

In [30]:
"""
This script will download the necessary data for training to run.
"""

from io import BytesIO
import requests
import os
import zipfile

movielens_data_file_url = (
'http://files.grouplens.org/datasets/movielens/ml-latest-small.zip'
)
req = requests.get(movielens_data_file_url)
print('Downloading Completed')

file = zipfile.ZipFile(BytesIO(req.content))
dir = os.getcwd()
file.extractall(dir)

Downloading Completed


## Setting up the Model

In [31]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras import layers

EMBEDDING_SIZE = 50

class RecommenderNet(keras.Model):
    def __init__(self, num_users, num_movies, embedding_size, **kwargs):
        super().__init__(**kwargs)
        self.num_users = num_users
        self.num_movies = num_movies
        self.embedding_size = embedding_size
        self.user_embedding = layers.Embedding(
            num_users,
            embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=keras.regularizers.l2(1e-6),
        )
        self.user_bias = layers.Embedding(num_users, 1)
        self.movie_embedding = layers.Embedding(
            num_movies,
            embedding_size,
            embeddings_initializer='he_normal',
            embeddings_regularizer=keras.regularizers.l2(1e-6),
        )
        self.movie_bias = layers.Embedding(num_movies, 1)

    @tf.function
    def call(self, inputs):
        user_vector = self.user_embedding(inputs[:, 0])
        user_bias = self.user_bias(inputs[:, 0])
        movie_vector = self.movie_embedding(inputs[:, 1])
        movie_bias = self.movie_bias(inputs[:, 1])
        dot_user_movie = tf.tensordot(user_vector, movie_vector, 2)
        # Add all the components (including bias)
        x = dot_user_movie + user_bias + movie_bias
        # The sigmoid activation forces the rating to between 0 and 1
        return tf.nn.sigmoid(x)

In [37]:
from pathlib import Path
import pandas as pd
import numpy as np

curr_dir = Path("__file__").parent
movielens_dir = os.path.join(curr_dir, 'ml-latest-small')
ratings_file = os.path.join(movielens_dir, 'ratings.csv')
df = pd.read_csv(ratings_file)

user_ids = df['userId'].unique().tolist()
user2user_encoded = {x: i for i, x in enumerate(user_ids)}
userencoded2user = {i: x for i, x in enumerate(user_ids)}
movie_ids = df['movieId'].unique().tolist()
movie2movie_encoded = {x: i for i, x in enumerate(movie_ids)}
movie_encoded2movie = {i: x for i, x in enumerate(movie_ids)}
df['user'] = df['userId'].map(user2user_encoded)
df['movie'] = df['movieId'].map(movie2movie_encoded)

num_users = len(user2user_encoded)
num_movies = len(movie_encoded2movie)
df['rating'] = df['rating'].values.astype(np.float32)
# min and max ratings will be used to normalize the ratings later
min_rating = min(df['rating'])
max_rating = max(df['rating'])

print(
    'Number of users: {}, Number of Movies: {}, Min rating: {}, Max rating: {}'.format(
        num_users, num_movies, min_rating, max_rating,
    ),
)
df = df.sample(frac=1, random_state=42)
x = df[['user', 'movie']].values
# Normalize the targets between 0 and 1. Makes it easy to train.
y = df['rating'].apply(
    lambda x: (x - min_rating) /
    (max_rating - min_rating),
).values

x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.33)

model = RecommenderNet(num_users, num_movies, EMBEDDING_SIZE)
model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(), optimizer=keras.optimizers.Adam(learning_rate=0.001),
)
model.fit(
    x=x_train,
    y=y_train,
    batch_size=64,
    epochs=5,
    verbose=1,
    validation_data=(x_val, y_val),
)


Number of users: 610, Number of Movies: 9724, Min rating: 0.5, Max rating: 5.0
Epoch 1/5




Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x15aafed60>

## Predictions

In [38]:
user_id = 1

In [40]:
movie_df = pd.read_csv(os.path.join(movielens_dir, 'movies.csv'))
movies_watched_by_user = df[df.userId == user_id]
movies_not_watched = movie_df[
    ~movie_df['movieId'].isin(movies_watched_by_user.movieId.values)
]['movieId']
movies_not_watched = list(
    set(movies_not_watched).intersection(set(movie2movie_encoded.keys())),
)
movies_not_watched = [
    [movie2movie_encoded.get(x)] for x in movies_not_watched
]
user_encoder = user2user_encoded.get(user_id)
user_movie_array = np.hstack(
    ([[user_encoder]] * len(movies_not_watched), movies_not_watched),
)
ratings = model.predict(user_movie_array).flatten()
top_ratings_indices = ratings.argsort()[-10:][::-1]
recommended_movie_ids = [
    movie_encoded2movie.get(movies_not_watched[x][0]) for x in top_ratings_indices
]

print(f'Showing recommendations for user: {user_id}')
print('====' * 9)
print('Top 10 movie recommendations')
print('----' * 8)
recommended_movies = movie_df[
    movie_df['movieId'].isin(
        recommended_movie_ids,
    )
]
for row in recommended_movies.itertuples():
    print(row.title, ':', row.genres)


Showing recommendations for user: 1
Top 10 movie recommendations
--------------------------------
Shawshank Redemption, The (1994) : Crime|Drama
Rear Window (1954) : Mystery|Thriller
One Flew Over the Cuckoo's Nest (1975) : Drama
Brazil (1985) : Fantasy|Sci-Fi
Godfather: Part II, The (1974) : Crime|Drama
Amadeus (1984) : Drama
Raging Bull (1980) : Drama
Good Will Hunting (1997) : Drama|Romance
Fear and Loathing in Las Vegas (1998) : Adventure|Comedy|Drama
Snatch (2000) : Comedy|Crime|Thriller
