### 1. ITEM  BASED COLLABORATIVE FILTERING, 2. USER BASED COLLABORATIVE FILTERING

#### 1. ITEM BASED COLLABORATIVE FILTERING
MAKE SURE YOU DEFINE - SOMEWHERE BELOW THE MIDDLE OF THE SCRIPT THAT FOLLOWS: 
RATINGS MATRIX, USER INDEX AND ITEM INDEX (REMEMBER 0 IS THE FIRST IN BOTH CASES), 
AS WELL AS NUMBER OF NEIGHBOURS AND SIMILARITY METRIC (PEARSON OR COSINE)

In [1]:
import numpy as np
from scipy.spatial.distance import cosine
from scipy.stats import pearsonr

# Function to calculate similarity between two items
def calculate_similarity(item1, item2, method='cosine'):
    common = (~np.isnan(item1) & ~np.isnan(item2))
    if not np.any(common):
        return np.nan
    if method == 'cosine':
        return round(1 - cosine(item1[common], item2[common]), 4)
    elif method == 'pearson':
        if np.std(item1[common]) == 0 or np.std(item2[common]) == 0:
            return np.nan
        return round(pearsonr(item1[common], item2[common])[0], 4)

# Function to generate the item-item similarity matrix
def generate_similarity_matrix(ratings, method='cosine'):
    num_items = ratings.shape[1]
    similarity_matrix = np.zeros((num_items, num_items))
    for i in range(num_items):
        for j in range(i, num_items):
            similarity = calculate_similarity(ratings[:, i], ratings[:, j], method)
            similarity_matrix[i, j] = similarity
            similarity_matrix[j, i] = similarity  # Mirror similarity
    return np.round(similarity_matrix, 4)

# Predict rating with a comprehensive approach
def predict_rating(user_index, item_index, ratings_matrix, num_neighbors, method='cosine'):
    similarity_matrix = generate_similarity_matrix(ratings_matrix, method)
    similarities, item_averages = similarity_matrix[item_index], np.nanmean(ratings_matrix, axis=0)
    sorted_indices = np.argsort(-similarities)
    top_indices = sorted_indices[1:num_neighbors+1]  # Exclude self similarity
    
    numerator, denominator = 0, 0
    for idx in top_indices:
        if not np.isnan(ratings_matrix[user_index, idx]):
            numerator += similarities[idx] * (ratings_matrix[user_index, idx] - item_averages[idx])
            denominator += abs(similarities[idx])
    
    predicted = item_averages[item_index] + numerator / denominator if denominator else np.nan
    return round(predicted, 4), top_indices, np.round(similarities, 4), np.round(item_averages, 4)

#....................................... INPUT YOUR DATA HERE.........................................
# Input Ratings Matrix, User Index, Item Index, Number of Neighbors, Similarity Metric

# Define the ratings matrix
ratings_matrix = np.array([
     [3, 0, 3, 3],
    [5, 4, 0, 2],
    [1, 2, 4, 2],
    [3, np.nan, 1, 0],
    [2, 2, 0, 1],
], dtype=float)

# Example usage
user_index = 3 # Note: Counting starts from 0, so first user is 0
item_index = 1 # Note: Counting starts from 0, so first item is 0
num_neighbors = 2 
method = 'cosine'
#.....................................................................................................................



predicted_rating, top_indices, similarities, item_averages = predict_rating(user_index, item_index, ratings_matrix, num_neighbors, method)

print(f"Predicted rating for User {user_index + 1} on Item {item_index + 1}: {predicted_rating}")
print("\nTop similar items and their similarities:")
for idx in top_indices:
    print(f"Item {idx + 1} Similarity: {similarities[idx]}")

print("\nAverage ratings for all items:")
for index, avg_rating in enumerate(item_averages):
    print(f"Item {index + 1}: Average Rating = {avg_rating}")


Predicted rating for User 4 on Item 2: 1.4041

Top similar items and their similarities:
Item 1 Similarity: 0.8498
Item 4 Similarity: 0.6736

Average ratings for all items:
Item 1: Average Rating = 2.8
Item 2: Average Rating = 2.0
Item 3: Average Rating = 1.6
Item 4: Average Rating = 1.6


#### 2. USER BASED COLLABORATIVE FILTERING
MAKE SURE YOU DEFINE - SOMEWHERE BELOW THE MIDDLE OF THE SCRIPT THE FOLLOWS: RATINGS MATRIX, USER INDEX AND ITEM INDEX (REMEMBER 0 IS THE FIRST IN BOTH CASES), NUMBER OF NEIGHBOURS AND SIMILARITY METRIC (PEARSON OR COSINE)SER BASED COLLABORATIVE FILTERING


In [2]:
import numpy as np
from scipy.spatial.distance import cosine
from scipy.stats import pearsonr

# Function to calculate similarity between two users
def calculate_similarity(user1, user2, method='cosine'):
    common = (~np.isnan(user1) & ~np.isnan(user2))
    if not any(common):
        return np.nan
    if method == 'cosine':
        return round(1 - cosine(user1[common], user2[common]), 4)
    elif method == 'pearson':
        if np.std(user1[common]) == 0 or np.std(user2[common]) == 0:
            return np.nan
        return round(pearsonr(user1[common], user2[common])[0], 4)

# Function to generate the user-user similarity matrix
def generate_similarity_matrix(ratings, method='cosine'):
    num_users = ratings.shape[0]
    similarity_matrix = np.zeros((num_users, num_users))
    for i in range(num_users):
        for j in range(i, num_users):
            similarity = calculate_similarity(ratings[i], ratings[j], method)
            similarity_matrix[i, j] = similarity
            similarity_matrix[j, i] = similarity  # Mirror similarity
    return np.round(similarity_matrix, 4)

# Predict rating using user-based CF
def predict_rating(user_index, item_index, ratings_matrix, num_neighbors, method='cosine'):
    similarity_matrix = generate_similarity_matrix(ratings_matrix, method)
    similarities, user_averages = similarity_matrix[user_index], np.nanmean(ratings_matrix, axis=1)
    sorted_indices = np.argsort(-similarities)
    top_indices = sorted_indices[1:num_neighbors+1]  # Exclude self similarity
    
    numerator, denominator = 0, 0
    for idx in top_indices:
        if not np.isnan(ratings_matrix[idx, item_index]):
            numerator += similarities[idx] * (ratings_matrix[idx, item_index] - user_averages[idx])
            denominator += abs(similarities[idx])
    
    predicted = user_averages[user_index] + numerator / denominator if denominator else np.nan
    return round(predicted, 4), top_indices, np.round(similarities, 4), np.round(user_averages, 4)



#....................................... INPUT YOUR DATA HERE.........................................
# Input Ratings Matrix, User Index, Item Index, Number of Neighbors, Similarity Metric

ratings_matrix = np.array([
    [3, 0, 3, 3],
    [5, 4, 0, 2],
    [1, 2, 4, 2],
    [3, np.nan, 1, 0],
    [2, 2, 0, 1],
], dtype=float)

# Example usage
user_index = 3 # Note: Counting starts from 0, so first user is 0
item_index = 1 # Note: Counting starts from 0, so first item is 0
num_neighbors = 2 
method = 'cosine'
#.....................................................................................................................



predicted_rating, top_indices, similarities, user_averages = predict_rating(user_index, item_index, ratings_matrix, num_neighbors, method)

print(f"Predicted rating for User {user_index + 1} on Item {item_index + 1}: {predicted_rating}")
print("\nTop similar users and their similarities:")
for idx in top_indices:
    print(f"User {idx + 1} Similarity: {similarities[idx]}")

print("\nAverage ratings for all users:")
for index, avg_rating in enumerate(user_averages):
    print(f"User {index + 1}: Average Rating = {avg_rating}")


Predicted rating for User 4 on Item 2: 2.338

Top similar users and their similarities:
User 2 Similarity: 0.8808
User 5 Similarity: 0.8485

Average ratings for all users:
User 1: Average Rating = 2.25
User 2: Average Rating = 2.75
User 3: Average Rating = 2.25
User 4: Average Rating = 1.3333
User 5: Average Rating = 1.25
