# User Based Collaborative Filtering

The User-Based Collaborative Filtering Recommender is a system that suggests items based on the preferences of similar users to the target.

i.e. users who agreed in the past tend to agree again in the future.

We analyse user ratings of courses to identify users with similar 'tastes' and leverage their past opinions to predict what the current user might like.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from coursemate.dataset import Dataset
from coursemate.model import UserBasedCF

pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', 1000)


In [2]:
dataset = Dataset('data/Coursera_courses.csv', 'data/Coursera.csv', 'data/Coursera_reviews.csv')
dataset.set_interaction_counts(3, 50)
dataset.show_dataset_details()

Loading Coursera courses...
Loading Coursera reviews...
Segmenting out students with less than 3 or more than 50 reviews...
30719 students, 468 courses, 174219 reviews
Sparsity: 1.21%
Duplicates: 4.54%


In [3]:
dataset.set_train_test_split_by_user()

# Instantiate the UserBasedCF model
user_cf_model = UserBasedCF()

# Fit the model with the training data
# Ensure that dataset.train_ratings is available and properly set up before this step
user_cf_model.fit(dataset.train_ratings)

Setting the train-test split by user...


In [4]:
# Get recommendations for a particular user
user_id = 'By Kelvin k'
recommendations = user_cf_model.recommend(user_id, k=5)

In [5]:
recommendations

['aws-fundamentals-going-cloud-native',
 'information-security-data',
 'sql-for-data-science',
 'python-basics',
 'intro-sql']

In [3]:
dataset.set_train_test_split_random(0.999)
# Instantiate the UserBasedCF model
user_cf_model = UserBasedCF()

Setting the train-test split randomly...


In [4]:
# Fit the model with the training data
# Ensure that dataset.train_ratings is available and properly set up before this step
user_cf_model.fit(dataset.train_ratings)

### Evaluation

In [5]:
def hit_rate(model, test_set, k=5):
    hits = 0
    total = 0

    for user_id in tqdm(test_set['reviewers'].unique()):
        # Check if the user_id exists in the user similarity matrix
        if user_id not in model.user_similarity_matrix.index:
            continue  # Skip this user if not in the training set

        user_actual = test_set[test_set['reviewers'] == user_id]
        user_actual_top_k = user_actual.nlargest(k, 'rating')['course_id']
        user_predicted_top_k = model.recommend(user_id, k)

        hits += 1 if len(set(user_predicted_top_k) & set(user_actual_top_k)) > 0 else 0
        total += 1
        
        # print(f"Hit Rate: {hits / total if total > 0 else 0}")
        
        # if total == 500: # Stop after 500 users
        #     break

    return hits / total if total > 0 else 0

In [6]:
hit_rate(user_cf_model, dataset.test_ratings, k=10)

100%|██████████| 175/175 [00:06<00:00, 25.26it/s]


0.22857142857142856

In [None]:
# Work in progress

def precision_at_k(model, test_set, k=5):
    hits = 0
    total = 0
    skipped_users = 0

    for user_id in test_set['reviewers'].unique():
        if user_id not in model.user_similarity_matrix.index:
            print(f"Skipping usr not found in training set: {user_id}")
            skipped_users += 1
            continue

        user_actual = test_set[test_set['reviewers'] == user_id]
        user_actual_top_k = user_actual.nlargest(k, 'rating')['course_id']
        user_predicted_top_k = model.recommend(user_id, k)

        hits += len(set(user_predicted_top_k) & set(user_actual_top_k))
        total += k

    print(f"Processed {len(test_set['reviewers'].unique()) - skipped_users} users, Skipped {skipped_users} users")
    return hits / total if total > 0 else 0

# Test the model
# precision = precision_at_k(user_cf_model, dataset.test_ratings, k=5)
# print(f"Precision@5: {precision}")


Skipping usr not found in training set: By Maggie P
Skipping usr not found in training set: By Ricardo I
Skipping usr not found in training set: By Rithvika L
Skipping usr not found in training set: By VIJAY N


KeyboardInterrupt: 