In [None]:
from surprise import SVD, KNNBasic, accuracy
from surprise.model_selection import train_test_split
import pandas as pd
import numpy as np
from surprise import Reader
from surprise import Dataset, Reader, SVD, KNNBasic, accuracy

# Load dataset
recommendation_df = pd.read_csv('recommendation_exclusive.csv')
# Set up the reader to interpret the ratings within the dataset's rating range
reader = Reader(rating_scale=(recommendation_df['Rating'].min(), recommendation_df['Rating'].max()))
data = Dataset.load_from_df(recommendation_df[['User_ID', 'ISBN', 'Rating']], reader)
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Load the test dataset
test_df = pd.read_csv('goodbooks_exclusive.csv')

print(test_df.head())

# Calculate the number of unique users and books
num_users = test_df['User_ID'].nunique()
num_books = test_df['ISBN'].nunique()

print(f"\nTotal Number of Users: {num_users}")
print(f"Total Number of Books: {num_books}\n")

# Correct the format of test_data to have exactly three elements per tuple
test_data = [(row['User_ID'], row['ISBN'], row['Rating']) for _, row in test_df.iterrows()]

# Define parameter grids
svd_params = {
    'n_factors': [5, 10, 20], # Number of latent factors
    'lr_all': [0.005, 0.01], # Learning rate for all parameters
    'reg_all': [0.02, 0.05] # Regularization term for all parameters
}

knn_params = {
    'k': [5, 10, 20], # Number of neighbors
    'min_k': [1, 5], # Minimum number of neighbors to consider
    'similarity': ['cosine', 'pearson'] # Similarity measure
}

# Function to train and evaluate SVD with different parameters
def train_evaluate_svd(params):
    best_rmse = float('inf')
    best_params = None
    for n_factors in params['n_factors']:
        for lr_all in params['lr_all']:
            for reg_all in params['reg_all']:
                # Initialize and train SVD model
                svd = SVD(n_factors=n_factors, lr_all=lr_all, reg_all=reg_all, random_state=42)
                svd.fit(trainset)

                # Test the model on test data and calculate RMSE
                predictions = svd.test(test_data)
                rmse = accuracy.rmse(predictions, verbose=False)

                # Print and update the best parameters if this RMSE is lower
                print(f"SVD Params: n_factors={n_factors}, lr_all={lr_all}, reg_all={reg_all} -> RMSE: {rmse}")
                if rmse < best_rmse:
                    best_rmse = rmse
                    best_params = (n_factors, lr_all, reg_all)
    print(f"Best SVD Params: n_factors={best_params[0]}, lr_all={best_params[1]}, reg_all={best_params[2]} with RMSE: {best_rmse}")

# Function to train and evaluate KNNBasic (User and Item-based CF) with different parameters
def train_evaluate_knn(params, user_based):
    best_rmse = float('inf')
    best_params = None
    for k in params['k']:
        for min_k in params['min_k']:
            for similarity in params['similarity']:
                # Initialize and train KNNBasic model with specified parameters
                knn = KNNBasic(k=k, min_k=min_k, sim_options={'name': similarity, 'user_based': user_based}, verbose=False)
                knn.fit(trainset)

                # Test the model and calculate RMSE
                predictions = knn.test(test_data)
                rmse = accuracy.rmse(predictions, verbose=False)
                cf_type = "User-Based" if user_based else "Item-Based"

                # Print and update best parameters if this RMSE is lower
                print(f"{cf_type} CF Params: k={k}, min_k={min_k}, similarity={similarity} -> RMSE: {rmse}")
                if rmse < best_rmse:
                    best_rmse = rmse
                    best_params = (k, min_k, similarity)
    cf_type = "User-Based" if user_based else "Item-Based"
    print(f"Best {cf_type} CF Params: k={best_params[0]}, min_k={best_params[1]}, similarity={best_params[2]} with RMSE: {best_rmse}")

# Run the parameter grid search on the SVD model
print("Evaluating SVD...")
train_evaluate_svd(svd_params)

# Run the parameter grid search on the KNNBasic model for user-based collaborative filtering
print("\nEvaluating User-Based CF...")
train_evaluate_knn(knn_params, user_based=True)

# Run the parameter grid search on the KNNBasic model for item-based collaborative filtering
print("\nEvaluating Item-Based CF...")
train_evaluate_knn(knn_params, user_based=False)
