In [3]:
import os
# Hide GPU from visible devices
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

import tensorflow as tf
# Verify it only sees the CPU
print("Devices detected:", tf.config.list_physical_devices())

2026-02-07 13:24:09.400207: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2026-02-07 13:24:09.838222: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2026-02-07 13:24:11.238763: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


Devices detected: [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]


2026-02-07 13:24:12.627014: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


In [4]:
# import the library
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split

In [5]:
# input the data
movies = pd.read_csv("ml-latest-small/movies.csv")
users = pd.read_csv("ml-latest-small/ratings.csv")

In [6]:
# see and understand the data features
#movies.info()
#users.info()
print(movies.head(3))
print(users.head(3))

   movieId                    title  \
0        1         Toy Story (1995)   
1        2           Jumanji (1995)   
2        3  Grumpier Old Men (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                   Adventure|Children|Fantasy  
2                               Comedy|Romance  
   userId  movieId  rating  timestamp
0       1        1     4.0  964982703
1       1        3     4.0  964981247
2       1        6     4.0  964982224


In [7]:
#merge the 2 data sets to get related features
data = pd.merge(
    users,
    movies,
    on="movieId",
    how="left"
)

In [8]:
data.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,1,4.0,964982703,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,1,3,4.0,964981247,Grumpier Old Men (1995),Comedy|Romance
2,1,6,4.0,964982224,Heat (1995),Action|Crime|Thriller
3,1,47,5.0,964983815,Seven (a.k.a. Se7en) (1995),Mystery|Thriller
4,1,50,5.0,964982931,"Usual Suspects, The (1995)",Crime|Mystery|Thriller


In [9]:
# derive two data sets for two towers and target values
X_userId = data["userId"].values
X_movieId = data["movieId"].values
X_genres = data["genres"].str.get_dummies(sep='|').values # encode the genres using one-hot encode
y_rating = data["rating"].values

# split the data into train and test
(X_uid_train, X_uid_test, 
 X_mid_train, X_mid_test, 
 X_gen_train, X_gen_test, 
 y_train, y_test) = train_test_split(
    X_userId, X_movieId, X_genres, y_rating, 
    test_size=0.2, 
    random_state=42
)

In [10]:
# build the two towers using tensorflow
import tensorflow as tf
tf.keras.backend.clear_session()

In [11]:

# user tower to generate user vector
num_user =X_uid_train.max() + 1 # for the Embedding stage
user_id_input = tf.keras.layers.Input(shape=(1,), name="user_id")
user_emb = tf.keras.layers.Embedding(num_user, 32)(user_id_input)
user_vec = tf.keras.layers.Flatten()(user_emb)
user_vec = tf.keras.layers.Dense(64, activation="relu")(user_vec)
user_output = tf.keras.layers.Dense(32, activation="linear")(user_vec)

user_tower_model = tf.keras.Model(inputs=user_id_input, outputs=user_output)

# movie tower to generate movie vector
num_movie = X_mid_train.max() + 1
movie_id_input = tf.keras.layers.Input(shape=(1,), name="movie_id")
movie_genre_input = tf.keras.layers.Input(shape=(20,), name="movie_genre")
movie_id_emb = tf.keras.layers.Embedding(num_movie, 32)(movie_id_input)# embed the movie id
movie_id_vec = tf.keras.layers.Flatten()(movie_id_emb)
merge_data = tf.keras.layers.concatenate([movie_id_vec, movie_genre_input])
movie_vec = tf.keras.layers.Dense(64, activation="relu")(merge_data)
movie_output = tf.keras.layers.Dense(32, activation="linear")(movie_vec)

movie_tower_model = tf.keras.Model(inputs=[movie_id_input, movie_genre_input], outputs=movie_output)

In [12]:
# we create the vectors for both user and movie
user_vector = user_tower_model(user_id_input)
movie_vector = movie_tower_model([movie_id_input, movie_genre_input])

# calculate the dot product (similarity)
dot_product = tf.keras.layers.Dot(axes=1)([user_vector, movie_vector])

# construct the final model 
model = tf.keras.Model(
    inputs=[user_id_input, movie_id_input, movie_genre_input],
    outputs=dot_product
)
model.compile(optimizer="Adam", loss="mse")

In [13]:
# trian the model
model.fit(
    x = [
        X_uid_train,
        X_mid_train,
        X_gen_train
        
    ],

    y = y_train,
    epochs=10,
    batch_size=32
)

Epoch 1/10
[1m2521/2521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 13ms/step - loss: 0.9859
Epoch 2/10
[1m2521/2521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 13ms/step - loss: 0.7011
Epoch 3/10
[1m2521/2521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 13ms/step - loss: 0.6344
Epoch 4/10
[1m2521/2521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 13ms/step - loss: 0.5806
Epoch 5/10
[1m2521/2521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 13ms/step - loss: 0.5262
Epoch 6/10
[1m2521/2521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 13ms/step - loss: 0.4773
Epoch 7/10
[1m2521/2521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 13ms/step - loss: 0.4351
Epoch 8/10
[1m2521/2521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 13ms/step - loss: 0.4013
Epoch 9/10
[1m2521/2521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 14ms/step - loss: 0.3737
Epoch 10/10
[1m2521/2521[0m [32m━━━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7bb778d1ac00>

In [14]:
# test the model
loss = model.evaluate (
    x = [
        X_uid_test,
        X_mid_test,
        X_gen_test
    ],
    y = y_test
)
print(f"Final Test MSE: {loss:.4f}")
print(f"RMSE: {loss**0.5:.4f}")

[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 950us/step - loss: 0.8253
Final Test MSE: 0.8253
RMSE: 0.9085


In [15]:
data.head(3)

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,1,4.0,964982703,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,1,3,4.0,964981247,Grumpier Old Men (1995),Comedy|Romance
2,1,6,4.0,964982224,Heat (1995),Action|Crime|Thriller


In [29]:
# Top K-Recommendation test using Retrieval and Ranking method 
# first we need to prepare a unique movies and their corresponding genre features to create the 
unique_movies = data[["movieId", "title", "genres"]].drop_duplicates("movieId")

# Gets the features ready for the model
MovieIds = unique_movies["movieId"].values
genres = unique_movies["genres"].str.get_dummies(sep="|").values

# create the movie matrix
movie_vectors = movie_tower_model.predict(
    [MovieIds, genres], verbose=1
)

print(f"Movie Vector Shape: {movie_vectors.shape}")

[1m304/304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 558us/step
Movie Vector Shape: (9724, 32)


In [30]:
type(movie_vectors)

numpy.ndarray

In [36]:
user_id = 1
user_vec = user_tower_model.predict(np.array([user_id]), verbose=0)
scores = np.dot(user_vec, movie_vectors.T)
print(scores[0][:5])

[4.6952124 4.2724767 3.7920017 4.3589573 4.332677 ]


In [23]:
# select userId and recommend
def recommend_top_k(user_id, k = 5):
    # create user vector for selected user Id
    user_vec = user_tower_model(np.array([user_id]), verbose=0)

    # calculate dot product which are scores
    scores = np.dot(user_vec, movie_vectors.T)

    # get the indices from scores
    top_indices = np.argsort(scores[0])[-k:][::-1] #argsort sorts low to high, we will take the last k and reverse them

    # Map indices back into MoviesId and title
    recommended_ids = MovieIds[top_indices]
    recommended_titles = unique_movies[unique_movies["movieId"].isin(recommended_ids)]["title"].values

    return recommended_titles, recommended_ids

In [41]:
recommended_movies, recommended_ids = recommend_top_k(2, k = 5)
for i, title in enumerate(recommended_movies):
    print(f"{i + 1}. {title}")

1. The Jinx: The Life and Deaths of Robert Durst (2015)
2. I Am a Sex Addict (2005)
3. Connections (1978)
4. Blue Planet II (2017)
5. De platte jungle (1978)


In [48]:
top_movies = data[data["userId"]==2].sort_values(by="rating", ascending=False)
print(top_movies[["title", "rating", "genres"]].head(10))

                                                 title  rating  \
241                               Step Brothers (2008)     5.0   
254                    Wolf of Wall Street, The (2013)     5.0   
260  The Jinx: The Life and Deaths of Robert Durst ...     5.0   
250                                     Warrior (2011)     5.0   
248                                  Inside Job (2010)     5.0   
259                          Mad Max: Fury Road (2015)     5.0   
240                            Dark Knight, The (2008)     4.5   
247                                   Town, The (2010)     4.5   
242                        Inglourious Basterds (2009)     4.5   
234                           Good Will Hunting (1997)     4.5   

                               genres  
241                            Comedy  
254                Comedy|Crime|Drama  
260                       Documentary  
250                             Drama  
248                       Documentary  
259  Action|Adventure|Sci-Fi|Thri