In [4]:
import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors

# 1. Load Data
def load_data(file_path):
    data = pd.read_csv(file_path)
    return data

# 2. Create a user-item matrix
def create_user_item_matrix(data):
    """
    Create a user-item matrix where rows are users and columns are items.
    """
    user_item_matrix = data.pivot_table(index='user_id', columns='item_id', values='rating')
    return user_item_matrix.fillna(0)  # Replace NaN with 0 (no interaction)

# 3. Fit KNN model on the item matrix
def fit_knn_model(user_item_matrix, n_neighbors=5):
    """
    Fit a KNN model using the item-user matrix (transpose of user-item matrix).
    """
    n_neighbors = min(n_neighbors, len(user_item_matrix.columns) - 1)  # Ensure n_neighbors is valid
    knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=n_neighbors)
    knn.fit(user_item_matrix.T)  # Transpose so items are rows
    return knn

# 4. Generate Predictions using KNN
def generate_predictions(user_item_matrix, knn_model, user_id, n_recommendations=5):
    """
    Generate item recommendations for a given user based on item similarity using KNN.
    """
    user_ratings = user_item_matrix.loc[user_id]
    
    # Get items the user has rated
    rated_items = user_ratings[user_ratings > 0].index.tolist()
    recommendations = {}

    for item in rated_items:
        # Find neighbors for each rated item
        item_index = user_item_matrix.columns.get_loc(item)
        n_neighbors = knn_model.n_neighbors
        distances, indices = knn_model.kneighbors(user_item_matrix.T.iloc[item_index].values.reshape(1, -1), n_neighbors=n_neighbors + 1)
        
        for i in range(1, len(indices[0])):
            similar_item = user_item_matrix.columns[indices[0][i]]
            score = 1 - distances[0][i]  # Convert distance to similarity score
            if similar_item not in rated_items:
                recommendations[similar_item] = recommendations.get(similar_item, 0) + score

    # Sort recommendations by aggregated similarity scores
    sorted_recommendations = sorted(recommendations.items(), key=lambda x: x[1], reverse=True)
    
    return pd.DataFrame(sorted_recommendations, columns=['item_id', 'similarity_score'])

# 5. Main function to run the recommendation system
def main(file_path, user_id):
    data = load_data(file_path)
    print(f"data=\n{data}\n")
    
    user_item_matrix = create_user_item_matrix(data)
    print(f"user_item_matrix=\n{user_item_matrix}\n")
    
    knn_model = fit_knn_model(user_item_matrix)
    recommendations = generate_predictions(user_item_matrix, knn_model, user_id)
    
    return recommendations


In [7]:

if __name__ == "__main__":
    # Example file path (replace with actual CSV path)
    file_path = 'ratingsKNN.csv'
    
    user_id = 1
    recommendations = main(file_path, user_id)
    print(f"Recommendations for User {user_id}:")
    print(recommendations.head(10))


data=
   user_id  item_id  rating
0        1      101       5
1        1      102       4
2        1      103       5
3        2      101       4
4        2      104       3
5        3      102       3
6        3      105       4
7        4      104       5
8        5      103       4
9        5      105       3

user_item_matrix=
item_id  101  102  103  104  105
user_id                         
1        5.0  4.0  5.0  0.0  0.0
2        4.0  0.0  0.0  3.0  0.0
3        0.0  3.0  0.0  0.0  4.0
4        0.0  0.0  0.0  5.0  0.0
5        0.0  0.0  4.0  0.0  3.0

Recommendations for User 1:
   item_id  similarity_score
0      105          0.854817
1      104          0.321403
