In [6]:
#Importing required libraries
import pandas as pd
import numpy as np

#Class definition
class InMemoryRecommender:
# Constructor to initialize class variables
        # Constructor to initialize class variables
    def __init__(self, data_file):     # Defining Constructor to initialize class variables
        # Read data from CSV file
        self.data = pd.read_csv(data_file, nrows=30)   # Reading data from CSV file and storing in data variable
        # Get unique users and items
        self.users = self.data['Id'].unique()  # Getting unique values of user Ids from data and storing in users variable
        self.items = self.data['book_id'].unique() # Getting unique values of item Ids from data and storing in items variable
        # Create user-item matrix
        self.user_item_matrix = pd.pivot_table(self.data, values='Rating', index='Id', columns='book_id')  # Creating user-item matrix using pivot table method
        self.user_item_matrix = self.user_item_matrix.fillna(0)   # Filling missing values with 0

        # Compute item-item similarity matrices
        self.item_jaccard = self.compute_jaccard_users_similarity()   # Computing Jaccard users similarity matrix
        self.item_cosine = self.compute_cosine_similarity()     # Computing cosine similarity matrix for items
        self.item_pearson = self.compute_pearson_similarity()   # Computing Pearson similarity matrix for items
        
    

    # Method to compute Jaccard item similarity matrix
#     def compute_jaccard_items_similarity(self):   
#         # Get non-zero rows of user-item matrix
#         #item_users = self.user_item_matrix.apply(lambda row: np.nonzero(row.values)[0], axis=0)   
#         item_users = self.user_item_matrix.apply(lambda row: list(self.items[row.values.astype(bool)]), axis=1)

#         print(item_users)
#         item_jaccard = np.zeros((len(self.items), len(self.items)))  
        
#         if not item_users.empty and not np.isnan(self.user_item_matrix.values).any():
#             # Compute Jaccard similarity for each item pair
#             for i in range(len(self.items)):
#                 for j in range(i+1, len(self.items)):
#                     item_i_users = set(item_users.iloc[0,i])
#                     item_j_users = set(item_users.iloc[0,j])
#                     intersection = item_i_users.intersection(item_j_users)
#                     union = item_i_users.union(item_j_users)
#                     if len(union) > 0:
#                         jaccard = float(len(intersection))/len(union)    
#                     else:
#                         jaccard = 0
#                     item_jaccard[i,j] = jaccard    
#                     item_jaccard[j,i] = jaccard    
#         else:
#             item_jaccard = None

#         return item_jaccard   # Returning Jaccard similarity matrix
    
    # Method to compute Jaccard users similarity matrix
    def compute_jaccard_users_similarity(self):
        # Get non-zero columns of user-item matrix
        user_items = self.user_item_matrix.apply(lambda row: list(self.items[row.values.astype(bool)]), axis=1)
        user_jaccard = np.zeros((len(self.users), len(self.users)))

        if not user_items.empty and not np.isnan(self.user_item_matrix.values).any():
            # Compute Jaccard similarity for each user pair
            for i in range(len(self.users)):
                for j in range(i+1, len(self.users)):
                    user_i_items = set(user_items.iloc[i])
                    user_j_items = set(user_items.iloc[j])
                    intersection = user_i_items.intersection(user_j_items)
                    union = user_i_items.union(user_j_items)
                    if len(union) > 0:
                        jaccard = float(len(intersection))/len(union)
                    else:
                        jaccard = 0
                    user_jaccard[i,j] = jaccard
                    user_jaccard[j,i] = jaccard
        else:
            user_jaccard = None

        return user_jaccard   # Returning Jaccard similarity matrix


    def compute_cosine_similarity(self):
        # Convert user-item matrix to Numpy array
        user_item_matrix_np = self.user_item_matrix.values

        # Compute cosine similarity matrix
        item_cosine = np.dot(user_item_matrix_np.T, user_item_matrix_np) / (np.sqrt(np.sum(np.square(user_item_matrix_np), axis=0)).reshape(-1,1) * np.sqrt(np.sum(np.square(user_item_matrix_np), axis=0)))

        # Returning cosine similarity matrix
        return item_cosine

# Method to compute Pearson similarity matrix
    def compute_pearson_similarity(self):
        item_pearson = np.zeros((len(self.items), len(self.items)))
        # Compute Pearson similarity for each item pair
        for i in range(len(self.items)):
            for j in range(i+1, len(self.items)):
                item_i_ratings = self.user_item_matrix.iloc[:, i]
                item_j_ratings = self.user_item_matrix.iloc[:, j]
                mask = np.logical_and(item_i_ratings.values != 0, item_j_ratings.values != 0)
                if np.sum(mask) > 0:
                    item_i_ratings_masked = item_i_ratings[mask] - np.mean(item_i_ratings[mask])
                    item_j_ratings_masked = item_j_ratings[mask] - np.mean(item_j_ratings[mask])
                    pearson = np.dot(item_i_ratings_masked, item_j_ratings_masked) / (np.sqrt(np.sum(np.square(item_i_ratings_masked))) * np.sqrt(np.sum(np.square(item_j_ratings_masked))))
                else:
                    pearson = 0
                item_pearson[i,j] = pearson
                item_pearson[j,i] = pearson

        return item_pearson


In [7]:
# # read the data from the CSV file
# path = r'C:\Users\ENG.TEDOM\PycharmProjects\ML\book_data\book1-100k.csv'
# data = open(path, encoding='cp855')
# data = pd.read_csv(data)

# # create a new column with unique IDs for each book
# data['book_id'] = data['Name'].astype('category').cat.codes

# # save the modified data to a new CSV file
# data.to_csv('book1-100k-with-ids.csv', index=False)

In [8]:
path = r'C:\Users\ENG.TEDOM\PycharmProjects\ML\book_data\book1-100k-with-ids.csv'
data = open(path, encoding='cp855')

In [9]:
recommendation = InMemoryRecommender(data)

In [10]:
print(recommendation.compute_jaccard_users_similarity())
print('-------------------------------------------------')
print(recommendation.compute_cosine_similarity())
print('--------------------------------------------------')
print(recommendation.compute_pearson_similarity())

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 