In [3]:
import pandas as pd
import numpy as np

data = {
    'user': ['user1', 'user1', 'user1', 'user2', 'user2', 'user2', 'user3', 'user3'],
    'item': ['item1', 'item2', 'item3', 'item1', 'item2', 'item4', 'item2', 'item4'],
    'rating': [5, 3, 4, 4, 2, 5, 5, 4]
}

df = pd.DataFrame(data)

## User-Based Collaborative Filtering

In [4]:
user_item_matrix = df.pivot(index='user', columns='item', values='rating')
user_item_matrix = user_item_matrix.fillna(0)
print(user_item_matrix)

item   item1  item2  item3  item4
user                             
user1    5.0    3.0    4.0    0.0
user2    4.0    2.0    0.0    5.0
user3    0.0    5.0    0.0    4.0


In [5]:
# calculate similarity between users

from sklearn.metrics.pairwise import cosine_similarity

# calculate cosine similarities between users
user_similarity = cosine_similarity(user_item_matrix)
user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)
print(user_similarity_df)

user      user1     user2     user3
user                               
user1  1.000000  0.548128  0.331295
user2  0.548128  1.000000  0.698430
user3  0.331295  0.698430  1.000000


In [6]:
# recommend items to user by aggregating the ratings of the items that similar users have liked
def get_user_recommendations(target_user, user_similarity_df, user_item_matrix, top_n=2):
    # find similar users
    similar_users = user_similarity_df[target_user].sort_values(ascending=False)
    # Weighted sum of ratings from similar users
    weighted_sum = np.dot(similar_users, user_item_matrix)
    # get target user's rating history
    user_ratings = user_item_matrix.loc[target_user]
    # items the user hasn't rated yet
    unrated_items = user_ratings[user_ratings == 0]
    # filter the weighted sum of unrated items
    recommendations = weighted_sum[user_ratings == 0]
    # sort recommendations and return top_n items
    recommendations = pd.Series(recommendations, index=unrated_items.index)
    return recommendations.sort_values(ascending=False).head(top_n)

In [9]:
recommendations = get_user_recommendations('user2', user_similarity_df, user_item_matrix)
print(f"Recommendations for user2: {recommendations}")

Recommendations for user2: item
item3    4.0
dtype: float64


## Item-based collaborative filtering

This is done by calculating similarities between items instead of users

In [10]:
item_similarity = cosine_similarity(user_item_matrix.T)
item_similarity_df = pd.DataFrame(item_similarity, index=user_item_matrix.columns, columns=user_item_matrix.columns)

print(item_similarity_df)

item      item1     item2     item3     item4
item                                         
item1  1.000000  0.582699  0.780869  0.487805
item2  0.582699  1.000000  0.486664  0.760042
item3  0.780869  0.486664  1.000000  0.000000
item4  0.487805  0.760042  0.000000  1.000000


In [11]:
def get_item_recommendations(target_user, item_similarity_df, user_item_matrix, top_n=2):
    # get user ratings
    user_ratings = user_item_matrix.loc[target_user]
    # calculated weighted sum of item similarities
    weighted_sum = np.dot(user_ratings, item_similarity_df)
    # items the user hasn't rated yet
    unrated_items = user_ratings[user_ratings == 0]
    # filter the weighted sum for unrated items
    recommendations = weighted_sum[user_ratings == 0]
    # sort recommendations & return top_n items
    recommendations = pd.Series(recommendations, index=unrated_items.index)
    return recommendations.sort_values(ascending=False).head(top_n)

recommendations = get_item_recommendations('user3', item_similarity_df, user_item_matrix)
print(f"Item based recommendations for user3: {recommendations}")

Item based recommendations for user3: item
item1    4.864713
item3    2.433321
dtype: float64
