In [5]:
%config IPCompleter.greedy=True

In [6]:
import numpy as np
from scipy.spatial.distance import cosine
from scipy.stats import pearsonr

# Function to calculate similarity between two items
def calculate_similarity(item1, item2, method='cosine'):
    common = (~np.isnan(item1) & ~np.isnan(item2))
    if not np.any(common):
        return np.nan
    if method == 'cosine':
        return round(1 - cosine(item1[common], item2[common]), 4)
    elif method == 'pearson':
        if np.std(item1[common]) == 0 or np.std(item2[common]) == 0:
            return np.nan
        return round(pearsonr(item1[common], item2[common])[0], 4)

# Function to generate the item-item similarity matrix
def generate_similarity_matrix(ratings, method='cosine'):
    num_items = ratings.shape[1]
    similarity_matrix = np.zeros((num_items, num_items))
    for i in range(num_items):
        for j in range(i, num_items):
            similarity = calculate_similarity(ratings[:, i], ratings[:, j], method)
            similarity_matrix[i, j] = similarity
            similarity_matrix[j, i] = similarity  # Mirror similarity
    return np.round(similarity_matrix, 4)

# Predict rating with a comprehensive approach
def predict_rating(user_index, item_index, ratings_matrix, num_neighbors, method='cosine'):
    similarity_matrix = generate_similarity_matrix(ratings_matrix, method)
    similarities, item_averages = similarity_matrix[item_index], np.nanmean(ratings_matrix, axis=0)
    sorted_indices = np.argsort(-similarities)
    top_indices = sorted_indices[1:num_neighbors+1]  # Exclude self similarity
    
    numerator, denominator = 0, 0
    for idx in top_indices:
        if not np.isnan(ratings_matrix[user_index, idx]):
            numerator += similarities[idx] * (ratings_matrix[user_index, idx] - item_averages[idx])
            denominator += abs(similarities[idx])
    
    predicted = item_averages[item_index] + numerator / denominator if denominator else np.nan
    return round(predicted, 4), top_indices, np.round(similarities, 4), np.round(item_averages, 4)

# Define the ratings matrix
ratings_matrix = np.array([
    [2, np.nan, 4, 2, 4],
    [4, 5, 3, 4, 2],
    [5, 3, 4, 2, 1],
    [5, 4, 2, 1, 3],
    [5, 2, 3, 4, 2]
], dtype=float)

# Example usage
user_index = 0 # Note: Counting starts from 0, so first user is 0
item_index = 1 # Note: Counting starts from 0, so first item is 0
num_neighbors = 2 
method = 'cosine'

predicted_rating, top_indices, similarities, item_averages = predict_rating(user_index, item_index, ratings_matrix, num_neighbors, method)

print(f"Predicted rating for User {user_index + 1} on Item {item_index + 1}: {predicted_rating}")
print("\nTop similar items and their similarities:")
for idx in top_indices:
    print(f"Item {idx + 1} Similarity: {similarities[idx]}")

print("\nAverage ratings for all items:")
for index, avg_rating in enumerate(item_averages):
    print(f"Item {index + 1}: Average Rating = {avg_rating}")


Predicted rating for User 1 on Item 2: 3.2031

Top similar items and their similarities:
Item 5 Similarity: 0.9302
Item 1 Similarity: 0.9272

Average ratings for all items:
Item 1: Average Rating = 4.2
Item 2: Average Rating = 3.5
Item 3: Average Rating = 3.2
Item 4: Average Rating = 2.6
Item 5: Average Rating = 2.4
