In [None]:
# 1. Uninstall NumPy and related packages
!pip uninstall -y numpy pandas scikit-surprise

# 2. Install compatible versions
!pip install numpy==1.26.4 pandas scikit-surprise

import os
os.kill(os.getpid(), 9)

Found existing installation: numpy 1.26.4
Uninstalling numpy-1.26.4:
  Successfully uninstalled numpy-1.26.4
Found existing installation: pandas 2.2.2
Uninstalling pandas-2.2.2:
  Successfully uninstalled pandas-2.2.2
Found existing installation: scikit-surprise 1.1.4
Uninstalling scikit-surprise-1.1.4:
  Successfully uninstalled scikit-surprise-1.1.4
Collecting numpy==1.26.4
  Using cached numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
Collecting pandas
  Downloading pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (91 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.2/91.2 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting scikit-surprise
  Using cached scikit_surprise-1.1.4-cp312-cp312-linux_x86_64.whl
Using cached numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.0 MB)
Downloading pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.

In [2]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import GridSearchCV, train_test_split


ratings_df = pd.read_csv('ratings.csv')

# Step 3: Prepare the Data for Surprise
reader = Reader(rating_scale=(ratings_df.rating.min(), ratings_df.rating.max()))
data = Dataset.load_from_df(ratings_df[['userId', 'movieId', 'rating']], reader)

trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Hyperparameter Tuning with GridSearchCV
param_grid = {
    'n_epochs': [10, 20, 30],
    'lr_all': [0.002, 0.005, 0.01],
    'reg_all': [0.02, 0.05, 0.1]
}

gs = GridSearchCV(SVD, param_grid, measures=['rmse'], cv=3, joblib_verbose=2, n_jobs=-1)
gs.fit(data)

print("Best RMSE score attained: ", gs.best_score['rmse'])
print("Best hyperparameters: ", gs.best_params['rmse'])

# Train Model with Best Hyperparameters
best_params = gs.best_params['rmse']
final_model = SVD(**best_params)
final_model.fit(trainset)

#Evaluate
from surprise import accuracy

predictions = final_model.test(testset)
rmse = accuracy.rmse(predictions)
print(f'Final Model RMSE on test set: {rmse:.4f}')

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:   25.7s
[Parallel(n_jobs=-1)]: Done  81 out of  81 | elapsed:  1.2min finished


Best RMSE score attained:  0.8636980629963467
Best hyperparameters:  {'n_epochs': 30, 'lr_all': 0.01, 'reg_all': 0.1}
RMSE: 0.8626
Final Model RMSE on test set: 0.8626


Hyperparameter tuning is critical before deploying a machine learning model because it helps find the best combination of settings that maximize the model's performance on your data. Using default parameters can lead to suboptimal results, that may result in poor accuracy, overfitting, or underfitting.