In [1]:
import numpy as np
import pandas as pd

In [2]:
data = np.array([[2, 3, 0, 1, 3, 8], [0, 3, 1, 4, 6, 7], [3, 0, 0, 3, 4, 6], [9, 5, 1, 5, 0, 7], [3, 4, 6, 7, 9, 9], [4, 0, 1, 4, 8, 0], [2, 4, 0, 0, 0, 8]])
df = pd.DataFrame(data, columns=[f"I{i+1}" for i in range(len(data[0]))], index=[f"U{i+1}" for i in range(len(data))])
df

Unnamed: 0,I1,I2,I3,I4,I5,I6
U1,2,3,0,1,3,8
U2,0,3,1,4,6,7
U3,3,0,0,3,4,6
U4,9,5,1,5,0,7
U5,3,4,6,7,9,9
U6,4,0,1,4,8,0
U7,2,4,0,0,0,8


In [3]:
def calculate_cosine_similarity(vector1, vector2):
    non_zero_mask = (vector1 != 0) & (vector2 != 0)
    return np.dot(vector1[non_zero_mask], vector2[non_zero_mask]) / (np.linalg.norm(vector1[non_zero_mask]) * np.linalg.norm(vector2[non_zero_mask]))

def predict_rating(df, target_user, target_item, method='user', top_n_users_or_items=3):
    if method == 'user':
        user_similarity_and_ratings = {
            user: (
                calculate_cosine_similarity(df.loc[target_user], df.loc[user]), 
                df.loc[user, target_item]
            ) 
            for user in df.index 
            if user != target_user
        }
        top_n_similar_users_and_ratings = sorted(user_similarity_and_ratings.items(), key=lambda x: x[1][0], reverse=True)[:top_n_users_or_items]
        weighted_ratings = [score[1][0]*score[1][1] for score in top_n_similar_users_and_ratings if score[1][1]]
        total_similarity_scores = [score[1][0] for score in top_n_similar_users_and_ratings if score[1][1]]
        return sum(weighted_ratings) / sum(total_similarity_scores)
        
    elif method == 'item':
        item_similarity_and_ratings = {
            item: (
                calculate_cosine_similarity(df[target_item], df[item]), 
                df.loc[target_user, item]
            ) 
            for item in df.columns 
            if item != target_item
        }
        top_n_similar_items_and_ratings = sorted(item_similarity_and_ratings.items(), key=lambda x: x[1][0], reverse=True)[:top_n_users_or_items]
        weighted_ratings = [score[1][0]*score[1][1] for score in top_n_similar_items_and_ratings if score[1][1]]
        total_similarity_scores = [score[1][0] for score in top_n_similar_items_and_ratings if score[1][1]]
        return sum(weighted_ratings) / sum(total_similarity_scores)


In [4]:
print(predict_rating(df, 'U1', 'I3')) 
print(predict_rating(df, 'U1', 'I3', 'item'))

1.0
3.9647254783365797


In [5]:
# BONUS
def get_recommendations(df, recommendation_threshold=5, method='user'):
    recommendations = {}

    for user in df.index:
        user_recommendations = []
        for item in df.columns:
            if df.loc[user, item] == 0:  # only recommend items not yet rated by the user
                predicted_rating = predict_rating(df, user, item, method, 5)
                if predicted_rating > recommendation_threshold:
                    user_recommendations.append((item, predicted_rating))
        recommendations[user] = sorted(user_recommendations, key=lambda x: x[1], reverse=True)

    return recommendations

In [6]:
get_recommendations(df)

{'U1': [],
 'U2': [],
 'U3': [],
 'U4': [('I5', 6.686058101498526)],
 'U5': [],
 'U6': [('I6', 7.203885650649584)],
 'U7': [('I5', 6.010464489531713)]}