# Item Based Collaborative Filtering

Suggests items by identifying relationships between different items based on user ratings.

This approach calculates the similarity between items using user ratings. Here, we assume that if users liked a particular item, they are more likely to enjoy items that are similar to it.

In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from coursemate.dataset import Dataset
from coursemate.model import ItemBasedCF

pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', 1000)


In [7]:
dataset = Dataset('data/Coursera_courses.csv', 'data/Coursera.csv', 'data/Coursera_reviews.csv')
dataset.set_interaction_counts(3, 50)
dataset.show_dataset_details()

Loading Coursera courses...
Loading Coursera reviews...
Segmenting out students with less than 3 or more than 50 reviews...
30719 students, 468 courses, 174219 reviews
Sparsity: 1.21%
Duplicates: 4.54%


In [9]:
dataset.set_train_test_split_by_user()

# Instantiate the ItemBasedCF model
item_cf_model = ItemBasedCF(dataset.course_set)

# Fit the model with the training data
item_cf_model.fit(dataset.train_ratings)

Setting the train-test split by user...


In [14]:
# Use the model to get recommendations for a given user
user_previous_courses = tuple(dataset.train_ratings[dataset.train_ratings['reviewers'] == 'By Robert S']['course_id'])
recommendations = item_cf_model.recommend(user_previous_courses, k=5)

recommendations

['learning-how-to-learn',
 'python',
 'neural-networks-deep-learning',
 'technical-support-fundamentals',
 'python-data']

### Evaluation

In [None]:
def hit_rate(model, test_set, k=5):
    hits = 0
    total = 0

    for user_id in test_set['reviewers'].unique():
        prev_courses = set(test_set[test_set['reviewers'] == user_id]['course_id'])

        if not prev_courses:
            continue
        
        user_predicted_top_k = set(model.recommend(list(prev_courses), k))
        user_actual_top_k = set(test_set[test_set['reviewers'] == user_id].nlargest(k, 'rating')['course_id'])

        hits += 1 if len(user_predicted_top_k & user_actual_top_k) > 0 else 0
        total += 1
        
        # print(user_predicted_top_k)
        # print(user_actual_top_k)
        
        print(f"Hits: {hits}")

    hit_rate = hits / total if total > 0 else 0
    print(f"Final Hit Rate: {hit_rate}")
    return hit_rate

In [None]:
cf_model = ItemBasedCF(dataset.course_set)

cf_model.fit(dataset.train_ratings)


hit_rate(cf_model, dataset.test_ratings, k=10)