In [1]:
import pandas as pd
import numpy as np
from zipfile import ZipFile
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from pathlib import Path
import matplotlib.pyplot as plt
import os
import tempfile
LOCAL_DIR = os.getcwd()
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'




In [2]:
# Reading ratings file
ratings_df = pd.read_csv('ratings.csv', sep='\t', encoding='latin-1', 
                      usecols=['user_id', 'movie_id', 'user_emb_id', 'movie_emb_id', 'rating'])
max_userid = ratings_df['user_id'].drop_duplicates().max()
max_movieid = ratings_df['movie_id'].drop_duplicates().max()

# Reading ratings file
users_df = pd.read_csv('users.csv', sep='\t', encoding='latin-1', 
                    usecols=['user_id', 'gender', 'zipcode', 'age_desc', 'occ_desc'])

# Reading ratings file
movies_df = pd.read_csv('movies.csv', sep='\t', encoding='latin-1', usecols=['movie_id', 'title', 'genres'])
dataset = pd.merge(pd.merge(movies_df, ratings_df),users_df)

In [3]:
user_ids = ratings_df["user_id"].unique().tolist()
user2user_encoded = {x: i for i, x in enumerate(user_ids)}
userencoded2user = {i: x for i, x in enumerate(user_ids)}

movie_ids = ratings_df["movie_id"].unique().tolist()
movie2movie_encoded = {x: i for i, x in enumerate(movie_ids)}
movie_encoded2movie = {i: x for i, x in enumerate(movie_ids)}

ratings_df["user"] = ratings_df["user_id"].map(user2user_encoded)
ratings_df["movie"] = ratings_df["movie_id"].map(movie2movie_encoded)

num_users = len(user2user_encoded)
num_movies = len(movie_encoded2movie)
ratings_df["rating"] = ratings_df["rating"].values.astype(np.float32)

# min and max ratings will be used to normalize the ratings later
min_rating = min(ratings_df["rating"])
max_rating = max(ratings_df["rating"])
print(
    "Number of users: {}, Number of Movies: {}, Min rating: {}, Max rating: {}".format(
        num_users, num_movies, min_rating, max_rating
    )
)
ratings_df = ratings_df.sample(frac=1, random_state=42)

Number of users: 6040, Number of Movies: 3706, Min rating: 1.0, Max rating: 5.0


In [4]:
x = ratings_df[["user", "movie"]].values
# Normalize the targets between 0 and 1. Makes it easy to train.
y = ratings_df["rating"].apply(lambda x: (x - min_rating) / (max_rating - min_rating)).values
# Assuming training on 90% of the data and validating on 10%.
train_indices = int(0.9 * ratings_df.shape[0])
x_train, x_val, y_train, y_val = (
    x[:train_indices],
    x[train_indices:],
    y[:train_indices],
    y[train_indices:],
)

In [5]:
EMBEDDING_SIZE = 32
class RecommenderNet(keras.Model):
    def __init__(self, num_users, num_movies, embedding_size, **kwargs):
        super(RecommenderNet, self).__init__(**kwargs)
        self.num_users = num_users
        self.num_movies = num_movies
        self.embedding_size = embedding_size
        self.user_embedding = layers.Embedding(
            num_users,
            embedding_size,
            embeddings_initializer="he_normal",
            embeddings_regularizer=keras.regularizers.l2(1e-6),
            mask_zero=True
        )
        self.user_bias = layers.Embedding(num_users, 1)
        self.movie_embedding = layers.Embedding(
            num_movies,
            embedding_size,
            embeddings_initializer="he_normal",
            embeddings_regularizer=keras.regularizers.l2(1e-6),
            mask_zero=True
        )
        self.movie_bias = layers.Embedding(num_movies, 1)
    def call(self, inputs):
        user_vector = self.user_embedding(inputs[:, 0])
        user_bias = self.user_bias(inputs[:, 0])
        movie_vector = self.movie_embedding(inputs[:, 1])
        movie_bias = self.movie_bias(inputs[:, 1])
        dot_user_movie = tf.tensordot(user_vector, movie_vector, 2)
        # Add all the components (including bias)
        x = dot_user_movie + user_bias + movie_bias
        # The sigmoid activation forces the rating to between 0 and 1
        return tf.nn.sigmoid(x)

In [6]:
model = RecommenderNet(num_users, num_movies, EMBEDDING_SIZE)
model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(), optimizer=keras.optimizers.Adam(lr=0.001)
)
history = model.fit(
    x=x_train,
    y=y_train,
    batch_size=64,
    epochs=5,
    verbose=1,
    validation_data=(x_val, y_val),
)
model.summary()
test_loss = model.evaluate(x_val, y_val)
print('\\nTest Loss: {}'.format(test_loss))


Epoch 1/5


  super(Adam, self).__init__(name, **kwargs)


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Model: "recommender_net"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       multiple                  193280    
                                                                 
 embedding_1 (Embedding)     multiple                  6040      
                                                                 
 embedding_2 (Embedding)     multiple                  118592    
                                                                 
 embedding_3 (Embedding)     multiple                  3706      
                                                                 
Total params: 321,618
Trainable params: 321,618
Non-trainable params: 0
_________________________________________________________________
\nTest Loss: 0.6170669198036194


In [7]:
print("Testing Model with 1 user")

user_id = "new_user"
movies_watched_by_user = ratings_df.sample(5)

movies_not_watched = movies_df[
    ~movies_df["movie_id"].isin(movies_watched_by_user.movie_id.values)
]["movie_id"]

movies_not_watched = list(
    set(movies_not_watched).intersection(set(movie2movie_encoded.keys()))
)

movies_not_watched = [[movie2movie_encoded.get(x)] for x in movies_not_watched]

user_movie_array = np.hstack(
    ([[0]] * len(movies_not_watched), movies_not_watched)
)

ratings = model.predict(user_movie_array).flatten()
top_ratings_indices = ratings.argsort()[-10:][::-1]

recommended_movie_ids = [
    movie_encoded2movie.get(movies_not_watched[x][0]) for x in top_ratings_indices
]



Testing Model with 1 user


In [8]:
print("Showing recommendations for user: {}".format(user_id))
print("====" * 9)
print("Movies with high ratings from user")
print("----" * 8)

top_movies_user = (
    movies_watched_by_user.sort_values(by="rating", ascending=False)
    .head(5)
    .movie_id.values
)

movie_df_rows = movies_df[movies_df["movie_id"].isin(top_movies_user)]

for row in movie_df_rows.itertuples():
    print(row.title, ":", row.genres)



Showing recommendations for user: new_user
Movies with high ratings from user
--------------------------------
City of Lost Children, The (1995) : Adventure|Sci-Fi
His Girl Friday (1940) : Comedy
Gentleman's Agreement (1947) : Drama
Lethal Weapon 2 (1989) : Action|Comedy|Crime|Drama
Loser (2000) : Comedy|Romance


In [9]:
print("----" * 8)
print("Top 10 movie recommendations")
print("----" * 8)
recommended_movies = movies_df[movies_df["movie_id"].isin(recommended_movie_ids)]
for row in recommended_movies.itertuples():
    print(row.title, ":", row.genres)


--------------------------------
Top 10 movie recommendations
--------------------------------
Silence of the Lambs, The (1991) : Drama|Thriller
Wallace & Gromit: The Best of Aardman Animation (1996) : Animation
Close Shave, A (1995) : Animation|Comedy|Thriller
Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1963) : Sci-Fi|War
African Queen, The (1951) : Action|Adventure|Romance|War
Paths of Glory (1957) : Drama|War
Kolya (1996) : Comedy
Waiting for Guffman (1996) : Comedy
Sixth Sense, The (1999) : Thriller
Requiem for a Dream (2000) : Drama


In [10]:
print("==="* 9)
print("Saving Model")
print("==="* 9)
MODEL_DIR = tempfile.gettempdir()
version = 1
export_path = os.path.join('C:\\Temp', f"ai-model\\model\\{version}")
print('export_path = {}\\n'.format(export_path))
tf.keras.models.save_model(
    model,
    export_path,
    overwrite=True,
    include_optimizer=True,
    save_format=None,
    signatures=None,
    options=None
)

Saving Model
export_path = C:\Temp\ai-model\model\1\n
INFO:tensorflow:Assets written to: C:\Temp\ai-model\model\1\assets


#### TODO - Inspect the files Saved

- How to build an application to make recommendations using the model?
- How to serve Tensorflow models? 
- Is there an extensiÃ³n of Tensorflow that allows to run our model using HTTP requests?
- How to package the model?