In [28]:
# TASK 4: RECOMMENDATION SYSTEM
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Embedding, Input, Dot, Flatten, Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [29]:
# Load data
ratings = pd.read_csv("/content/drive/MyDrive/Data/ratings.csv")
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [30]:
# Encode user and movie IDs
user_ids = ratings['userId'].unique().tolist()
movie_ids = ratings['movieId'].unique().tolist()

user2user_encoded = {x: i for i, x in enumerate(user_ids)}
movie2movie_encoded = {x: i for i, x in enumerate(movie_ids)}

ratings['user'] = ratings['userId'].map(user2user_encoded)
ratings['movie'] = ratings['movieId'].map(movie2movie_encoded)

num_users = len(user2user_encoded)
num_movies = len(movie2movie_encoded)

X = ratings[['user', 'movie']].values
y = ratings['rating'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [31]:
embedding_size = 50

user_input = Input(shape=(1,))
user_embedding = Embedding(num_users, embedding_size)(user_input)
user_vec = Flatten()(user_embedding)

movie_input = Input(shape=(1,))
movie_embedding = Embedding(num_movies, embedding_size)(movie_input)
movie_vec = Flatten()(movie_embedding)

dot = Dot(axes=1)([user_vec, movie_vec])
model = Model(inputs=[user_input, movie_input], outputs=dot)
model.compile(optimizer='adam', loss='mean_squared_error')

model.summary()

In [32]:
history = model.fit(
    [X_train[:, 0], X_train[:, 1]],
    y_train,
    epochs=10,
    validation_data=([X_test[:, 0], X_test[:, 1]], y_test)
)

Epoch 1/10
[1m2521/2521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 7ms/step - loss: 11.7749 - val_loss: 2.8117
Epoch 2/10
[1m2521/2521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 7ms/step - loss: 2.0222 - val_loss: 1.5843
Epoch 3/10
[1m2521/2521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 7ms/step - loss: 1.0875 - val_loss: 1.3642
Epoch 4/10
[1m2521/2521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 8ms/step - loss: 0.8159 - val_loss: 1.2922
Epoch 5/10
[1m2521/2521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 6ms/step - loss: 0.6869 - val_loss: 1.2665
Epoch 6/10
[1m2521/2521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 7ms/step - loss: 0.5809 - val_loss: 1.2587
Epoch 7/10
[1m2521/2521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 6ms/step - loss: 0.5054 - val_loss: 1.2498
Epoch 8/10
[1m2521/2521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 6ms/step - loss: 0.4285 - val_loss: 1.2627
Epoch 9/10
[1m

In [33]:
y_pred = model.predict([X_test[:, 0], X_test[:, 1]])
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"Test RMSE: {rmse:.4f}")

[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
Test RMSE: 1.1295


In [35]:
# Recommend top 5 movies for a user
user_id = 5  # Any valid user
encoded_user_id = user2user_encoded[user_id]

# Predict ratings for all movies the user hasn't seen
seen_movies = ratings[ratings['userId'] == user_id]['movieId'].map(movie2movie_encoded).values
unseen_movies = [m for m in range(num_movies) if m not in seen_movies]

user_array = np.array([encoded_user_id] * len(unseen_movies))
movie_array = np.array(unseen_movies)

preds = model.predict([user_array, movie_array])
top_indices = preds.flatten().argsort()[-5:][::-1]
top_movie_encoded_ids = [unseen_movies[i] for i in top_indices]

# Map back to original movieId
inv_movie_map = {v: k for k, v in movie2movie_encoded.items()}
recommended_ids = [inv_movie_map[x] for x in top_movie_encoded_ids]

# Load movie names
movies = pd.read_csv("/content/drive/MyDrive/Data/movies.csv")
recommended_movies = movies[movies['movieId'].isin(recommended_ids)]
print(recommended_movies[['title']])

[1m303/303[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
                                     title
2665                     Bossa Nova (2000)
2880        I'm the One That I Want (2000)
3281                  Game of Death (1978)
3505        Phantom of the Paradise (1974)
5454  Hush... Hush, Sweet Charlotte (1964)
