In [32]:
# install Tensorflow
!pip3 install tensorflow



In [33]:
# automatically restart kernel
import IPython
app = IPython.Application.instance()
app.kernel.do_shutdown(True)

{'status': 'ok', 'restart': True}

In [1]:
# load libs
import numpy as np
import tensorflow as tf

print(tf.__version__)

2.13.0


In [2]:
# initialize database for further modeling
users = ['User1', 'User2', 'User3', 'User4']
movies = ['Movie1', 'Movie2', 'Movie3', 'Movie4', 'Movie5', 'Movie6']
features = ['Genre1', 'Genre2', 'Genre3', 'Genre4', 'Genre5']

In [3]:
# the users_movies matrix: each row represents a user's rating (scale - [1,10] ,0 - not seen) for the different movies
users_movies = tf.constant([
                [3,  7,  9,  0, 0, 0],
                [0,  0, 9,  0, 7, 4],
                [0,  7,  0,  0, 4, 8],
                [9, 8,  0,  4, 0, 0]], dtype=tf.float32)

In [4]:
# features of the movies one-hot encoded: columns - features (['Genre1', 'Genre2', 'Genre3', 'Genre4', 'Genre5']), 
# rows - movies
movies_feats = tf.constant([
                [1, 0, 0, 0, 1],
                [1, 0, 1, 1, 0],
                [1, 1, 0, 0, 1],
                [0, 0, 1, 1, 0],
                [0, 0, 0, 0, 1],
                [1, 1, 0, 0, 0],
                ], dtype=tf.float32)

In [5]:
# computing the user_feature (genres) matrix,
# a matrix containing each user's embedding in the five-dimensional feature space
# the matrix multiplication of the users_movies tensor with the movies_feats tensor
users_feats = tf.matmul(users_movies, movies_feats)
users_feats

<tf.Tensor: shape=(4, 5), dtype=float32, numpy=
array([[19.,  9.,  7.,  7., 12.],
       [13., 13.,  0.,  0., 16.],
       [15.,  8.,  7.,  7.,  4.],
       [17.,  0., 12., 12.,  9.]], dtype=float32)>

In [6]:
# normalizing each user feature vector to sum to 1
users_feats = users_feats / tf.reduce_sum(users_feats, axis=1, keepdims=True)
users_feats

<tf.Tensor: shape=(4, 5), dtype=float32, numpy=
array([[0.35185185, 0.16666667, 0.12962963, 0.12962963, 0.22222222],
       [0.30952382, 0.30952382, 0.        , 0.        , 0.3809524 ],
       [0.36585367, 0.19512194, 0.17073171, 0.17073171, 0.09756097],
       [0.34      , 0.        , 0.24      , 0.24      , 0.18      ]],
      dtype=float32)>

In [7]:
# ranking feature relevance for each user
top_users_features = tf.nn.top_k(users_feats, len(features))[1]
top_users_features

<tf.Tensor: shape=(4, 5), dtype=int32, numpy=
array([[0, 4, 1, 2, 3],
       [4, 0, 1, 2, 3],
       [0, 1, 2, 3, 4],
       [0, 2, 3, 4, 1]])>

In [8]:
# printing genres preference by user
for i in range(len(users)):
    feature_names = [features[int(index)] for index in top_users_features[i]]
    print('{}: {}'.format(users[i], feature_names))

User1: ['Genre1', 'Genre5', 'Genre2', 'Genre3', 'Genre4']
User2: ['Genre5', 'Genre1', 'Genre2', 'Genre3', 'Genre4']
User3: ['Genre1', 'Genre2', 'Genre3', 'Genre4', 'Genre5']
User4: ['Genre1', 'Genre3', 'Genre4', 'Genre5', 'Genre2']


In [9]:
# determining movie recommendations (the dot product as the similarity measure)
# the matrix multiplication of the users_feats tensor with the transposed movies_feats tensor
users_ratings = tf.matmul(users_feats, tf.transpose(movies_feats))
users_ratings

<tf.Tensor: shape=(4, 6), dtype=float32, numpy=
array([[0.5740741 , 0.6111111 , 0.7407407 , 0.25925925, 0.22222222,
        0.5185185 ],
       [0.6904762 , 0.30952382, 1.        , 0.        , 0.3809524 ,
        0.61904764],
       [0.46341464, 0.7073171 , 0.65853655, 0.34146342, 0.09756097,
        0.5609756 ],
       [0.52      , 0.82      , 0.52      , 0.48      , 0.18      ,
        0.34      ]], dtype=float32)>

In [10]:
# applying a mask to the all_users_ratings matrix to focus only on the ratings for new movies 
# (excluding those already seen before)
users_unseen_movies = tf.equal(users_movies, tf.zeros_like(users_movies))
ignore_matrix = tf.zeros_like(tf.cast(users_movies, tf.float32))

users_ratings_new = tf.where(
    users_unseen_movies,
    users_ratings,
    ignore_matrix)

users_ratings_new

<tf.Tensor: shape=(4, 6), dtype=float32, numpy=
array([[0.        , 0.        , 0.        , 0.25925925, 0.22222222,
        0.5185185 ],
       [0.6904762 , 0.30952382, 0.        , 0.        , 0.        ,
        0.        ],
       [0.46341464, 0.        , 0.65853655, 0.34146342, 0.        ,
        0.        ],
       [0.        , 0.        , 0.52      , 0.        , 0.18      ,
        0.34      ]], dtype=float32)>

In [12]:
# printing top 3 rated movies for each user
top_movies = tf.nn.top_k(users_ratings_new, 3)[1]
top_movies

<tf.Tensor: shape=(4, 3), dtype=int32, numpy=
array([[5, 3, 4],
       [0, 1, 2],
       [2, 0, 3],
       [2, 5, 4]])>

In [16]:
# printing recommended movies for each user
for i in range(len(users)):
    movie_names = [movies[index] for index in top_movies[i]]
    print('{}: {}'.format(users[i], movie_names))

User1: ['Movie6', 'Movie4', 'Movie5']
User2: ['Movie1', 'Movie2', 'Movie3']
User3: ['Movie3', 'Movie1', 'Movie4']
User4: ['Movie3', 'Movie6', 'Movie5']
