In [27]:
# Download and extract dataset
!curl https://files.grouplens.org/datasets/movielens/ml-latest-small.zip -o ml-latest-small.zip

import zipfile
with zipfile.ZipFile('ml-latest-small.zip', 'r') as zip_ref:
    zip_ref.extractall('data')

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  955k  100  955k    0     0   451k      0  0:00:02  0:00:02 --:--:--  453k


In [28]:
import pandas as pd
import sys
import os

# Add project root directory to Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

In [35]:
# Normal imports for functions
from src.data.preprocessing import preprocess_data
from src.models.neural_cf import MovieRecommender
from src.utils.evaluation import calculate_metrics

# Import the modules for reloading
import src.data.preprocessing as preprocess_data_module
import src.models.neural_cf as MovieRecommender_module
import src.utils.evaluation as calculate_metrics_module

# Reload the modules after making changes to them
import importlib
importlib.reload(preprocess_data_module)
importlib.reload(MovieRecommender_module)
importlib.reload(calculate_metrics_module)



<module 'src.utils.evaluation' from '/Users/abowkis/Documents/movie_recommender/movie-recs/src/utils/evaluation.py'>

In [30]:
# Load and preprocess data
processed_data = preprocess_data('data/ml-latest-small/ratings.csv', 
                                         'data/ml-latest-small/movies.csv', 0.1)

In [31]:
# Extract user IDs, movie IDs and ratings for training and testing data split
print(processed_data['train_data'].head(4))
user_ids_train = processed_data['train_data']['userId'].values
movie_ids_train = processed_data['train_data']['movieId'].values
ratings_train = processed_data['train_data']['scaled_rating'].values
user_ids_test = processed_data['test_data']['userId'].values
movie_ids_test = processed_data['test_data']['movieId'].values
ratings_test = processed_data['test_data']['scaled_rating'].values

# Make training input and target
X_train = (user_ids_train, movie_ids_train)  # Tuple of user and movie IDs to pass to model
y_train = ratings_train  # Target ratings (scaled)

     userId  movieId  rating   timestamp  scaled_rating  \
114     104     3054     5.0  1446573558       1.000000   
319     554       85     3.0  1491094544       0.555556   
596     578     2810     4.5  1529899650       0.888889   
105     217     1597     3.5  1230061175       0.666667   

                               title    genres  
114  Pokémon: The First Movie (1998)   Fantasy  
319        Angels and Insects (1995)   Romance  
596              Perfect Blue (1997)   Mystery  
105         Conspiracy Theory (1997)  Thriller  


In [32]:
# Initialize model
recommender = MovieRecommender(processed_data['num_users'], processed_data['num_movies'])
# Compile model
recommender.compile_model(learning_rate=0.001)
# Train model
history = recommender.train(X_train=X_train, y_train=y_train, validation_split=0.2, batch_size=64, epochs=100, patience=3)

Epoch 1/100
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 91ms/step - loss: 0.0757 - mae: 0.2307 - mse: 0.0757 - val_loss: 0.0612 - val_mae: 0.2000 - val_mse: 0.0612
Epoch 2/100
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - loss: 0.0504 - mae: 0.1855 - mse: 0.0504 - val_loss: 0.0446 - val_mae: 0.1681 - val_mse: 0.0446
Epoch 3/100
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 23ms/step - loss: 0.0356 - mae: 0.1485 - mse: 0.0356 - val_loss: 0.0362 - val_mae: 0.1437 - val_mse: 0.0362
Epoch 4/100
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 0.0243 - mae: 0.1184 - mse: 0.0243 - val_loss: 0.0318 - val_mae: 0.1322 - val_mse: 0.0318
Epoch 5/100
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 0.0199 - mae: 0.1039 - mse: 0.0199 - val_loss: 0.0293 - val_mae: 0.1258 - val_mse: 0.0293
Epoch 6/100
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms

In [33]:
# Evaluate model
brief_metrics = recommender.evaluate(X_train, y_train)
print("Test metrics:", brief_metrics)
# Make predictions on test set
predicted_ratings = recommender.predict(user_ids_test, movie_ids_test)
print("Predicted ratings:", predicted_ratings[:5])

Test metrics: {'loss': 0.010240898467600346, 'compile_metrics': 0.06360569596290588}
Predicted ratings: [[0.15407795]
 [0.7414754 ]
 [0.52134883]
 [0.7243876 ]
 [0.83748215]]


In [36]:
# Create dataframe with actual and predicted ratings for test set to pass to calculate_metrics function
df_predictions = pd.DataFrame({
    'userId': user_ids_test,        
    'movieId': movie_ids_test,      
    'actual_rating': ratings_test,  
    'predicted_rating': predicted_ratings.flatten()
})
full_metrics = calculate_metrics(df_predictions)
print("Full metrics:", full_metrics)

Full metrics: {'debug! rmse': 0.1568717024227837, 'mae': 0.10487136238200427, 'precision@k': 0.12835051546391754, 'recall@k': 0.7474226804123711}
