In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.neighbors import NearestNeighbors

def recommend_products(user_id, products, ratings, n=5):
    """
    Returns a list of n recommended products for the given user_id.
    """
    # Compute the pairwise distances between users
    user_dists = pairwise_distances(ratings, metric='correlation')

    # Get the indices of the products rated by the user
    user_ratings = ratings[user_id, :]
    rated_product_indices = np.where(user_ratings > 0)[0]

    # Compute the distances between the user and all other users who have rated the same products
    product_dists = user_dists[user_id, :]
    product_dists = np.array([product_dists[i] for i in rated_product_indices])

    # Get the indices of the top n closest users
    top_n_user_indices = np.argsort(product_dists)[:n]

    # Get the products rated by the closest users
    closest_user_ratings = ratings[top_n_user_indices, :]

    # Compute the average rating for each product
    avg_ratings = np.mean(closest_user_ratings, axis=0)

    # Get the indices of the top n recommended products
    top_n_product_indices = np.argsort(avg_ratings)[::-1][:n]

    # Return the recommended products
    return [products[i] for i in top_n_product_indices]

# Example usage
products = ['product1', 'product2', 'product3', 'product4', 'product5']
ratings = np.array([[5, 4, 0, 2, 3], [3, 4, 4, 5, 5], [4, 3, 2, 4, 5], [4, 5, 3, 5, 5], [5, 5, 5, 4, 4]])
user_id = 0

recommended_products = recommend_products(user_id, products, ratings)
print("Recommended products for user {}: {}".format(user_id, recommended_products))

# load the dataset into a pandas dataframe
df = pd.read_csv("data.csv")

# Exploratory Data Analysis (EDA)
# get the number of unique users and items
n_users = df.user_id.nunique()
n_items = df.item_id.nunique()
print("Number of unique users: ", n_users)
print("Number of unique items: ", n_items)

# create a pivot table to get the user-item interaction matrix
user_item_matrix = df.pivot(index="user_id", columns="item_id", values="rating").fillna(0)

# get the sparsity of the user-item matrix
sparsity = round(1.0 - len(df) / float(n_users * n_items), 3)
print("Sparsity: ", sparsity)

# visualize the distribution of ratings
sns.countplot(df["rating"])
plt.show()

# Split the dataset into training and test sets
train_data, test_data = train_test_split(df, test_size=0.25)

# create a user-item matrix for the training data
train_user_item_matrix = train_data.pivot(index="user_id", columns="item_id", values="rating").fillna(0)

# create a user-item matrix for the test data
test_user_item_matrix = test_data.pivot(index="user_id", columns="item_id", values="rating").fillna(0)

# create an instance of the NearestNeighbors class
model_knn = NearestNeighbors(metric="cosine", algorithm="brute")
model_knn.fit(train_user_item_matrix)

# set the number of neighbors to use for kNN
k = 5

# get the top k nearest neighbors for each user
neighbors = model_knn.kneighbors(train_user_item_matrix, n_neighbors=k, return_distance=False)

# get the average rating of the top k nearest neighbors for each user
neighbor_ratings = np.zeros((n_users, n_items))
for user in range(n_users):
    neighbor_ratings[user, :] = train_user_item_matrix[neighbors[user]].mean(axis=0)

# get the predicted ratings for each user
pred_ratings = np.zeros((n_users, n_items))
for user in range(n_users):
    for item in range(n_items):
        pred_ratings[user, item] = neighbor_ratings[user, :][train_user_item_matrix[user] > 0].mean()



FileNotFoundError: ignored