In [3]:
import pandas as pd
import numpy as np
import math
from scipy.spatial import distance

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [4]:
#Calculate the cosine similarity score between all user ratings of book one and all user raing of book two
def calculate_sim_score_books(b1_user_ratings, b2_user_ratings):
    numerator = np.dot(b1_user_ratings, b2_user_ratings)
    denominator = math.sqrt(np.dot(b1_user_ratings, b1_user_ratings)) * math.sqrt(np.dot(b2_user_ratings, b2_user_ratings))
    return round(numerator/denominator,2)    


In [5]:
#Predict the rating for the target book as the weighted average of ratings from the most similar books, weighted
#by their similarity scores
def calculate_weighted_average(sim_scores, ratings):
    return np.dot(sim_scores, ratings)/sum(sim_scores)

In [6]:
#Item-Item Collaborative Filtering
def CFItemToItem(csv_file_name, target_u, target_b, k_books=3):

    #read the book ratings csv
    book_ratings = pd.read_csv(csv_file_name)
    
    #get the target book index
    target_b_index = int(target_b[-1]) - 1
    
    #Consider only the books that target user has rated
    target_u_all_rated_books = list(book_ratings[target_u])
    
    dic_book_sim = {}
    for index in range(len(target_u_all_rated_books)):
        if target_u_all_rated_books[index] == 0:
            dic_book_sim["b" + str(index + 1)] = 0
        elif index == target_b_index:
            dic_book_sim["b" + str(index + 1)] = 0
        else:
            
            #calculate similarity between target book and target user rated book by considering only
            #users who have rated book books
            target_book_user_scores = list(book_ratings[book_ratings["books"] == target_b].iloc[0])[1:]
            target_user_book_ratings = list(book_ratings.iloc[index])[1:]
            
            #use cosing similarity to measure between books
            sim_score_books = calculate_sim_score_books(target_book_user_scores, target_user_book_ratings)
            dic_book_sim["b" + str(index + 1)] = sim_score_books

    #Extract the top 3 book ratings
    sorted_dic_book_sim = dict(sorted(dic_book_sim.items(), key=lambda item: item[1], reverse=True)[:k_books])
    sim_scores = list(sorted_dic_book_sim.values())
    sorted_dic_book_sim_values_indices = [int(item[-1]) - 1 for item in list(sorted_dic_book_sim.keys())]
    ratings = [target_u_all_rated_books[x] for x in sorted_dic_book_sim_values_indices]
    weighted_average = round(calculate_weighted_average(sim_scores, ratings), 2)
    
    return sim_scores,ratings,dic_book_sim, sorted_dic_book_sim, weighted_average

In [8]:
target_u, target_b = "u5", "b2" 
sim_scores,ratings,dic_book_sim, sorted_dic_book_sim, weighted_average = CFItemToItem("book_ratings.csv", target_u, target_b, k_books=3)
print("sim_scores")
sim_scores

print("\n ratings")
ratings

print("\n dic_book_sim")
dic_book_sim

print("\n sorted_dic_book_sim")
sorted_dic_book_sim

print("\n weighted_average")
weighted_average


sim_scores


[0.85, 0.66, 0.64]


 ratings


[3, 8, 1]


 dic_book_sim


{'b1': 0,
 'b2': 0,
 'b3': 0.48,
 'b4': 0.53,
 'b5': 0.64,
 'b6': 0.66,
 'b7': 0.85,
 'b8': 0.56}


 sorted_dic_book_sim


{'b7': 0.85, 'b6': 0.66, 'b5': 0.64}


 weighted_average


3.94