In [1]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix

# Load the dataset
data = pd.read_csv('user_item_interactions.csv')

# Create a user-item interaction matrix
user_item_matrix = data.pivot_table(index='User_ID', columns='Item_ID', aggfunc=lambda x: 1, fill_value=0)

# Convert the user-item matrix to a sparse matrix for efficiency
user_item_matrix_sparse = csr_matrix(user_item_matrix.values)

# Initialize the KNN model with n_neighbors=3
model_knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=3, n_jobs=-1)
model_knn.fit(user_item_matrix_sparse)

def get_user_recommendations(user_id, user_item_matrix, model, top_n=3):
    if user_id not in user_item_matrix.index:
        return f"User {user_id} not found."

    user_row = user_item_matrix.loc[user_id].values.reshape(1, -1)
    user_row_sparse = csr_matrix(user_row)

    distances, indices = model.kneighbors(user_row_sparse, n_neighbors=model.n_neighbors + 1)

    similar_users_indices = indices.flatten()[1:]  # Exclude the user's own index
    similar_users = user_item_matrix.index[similar_users_indices].tolist()

    recommendations = {}
    for i, user in enumerate(similar_users):
        # Get items interacted with by the similar user that the target user has not interacted with
        similar_user_items = set(user_item_matrix.columns.get_level_values('Item_ID')[user_item_matrix.loc[user] == 1])
        target_user_items = set(user_item_matrix.columns.get_level_values('Item_ID')[user_item_matrix.loc[user_id] == 1])
        new_recommendations = similar_user_items - target_user_items
        for item in new_recommendations:
            if item not in recommendations:
                recommendations[item] = 1
            else:
                recommendations[item] += 1

    # Sort recommendations by frequency of appearance among neighbors
    sorted_recommendations = sorted(recommendations.items(), key=lambda item: item[1], reverse=True)

    return [item for item, count in sorted_recommendations[:top_n]]

# Example: Get recommendations for User U3
user_id_to_recommend = 'U3'
recommendations = get_user_recommendations(user_id_to_recommend, user_item_matrix, model_knn)
print(f"Recommendations for User {user_id_to_recommend}: {recommendations}")



Recommendations for User U3: ['I4', 'I5']


In [2]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, make_scorer
from sklearn.model_selection import GridSearchCV

# Load the dataset
data = pd.read_csv('user_item_interactions.csv')

# Create a user-item interaction matrix
user_item_matrix = data.pivot_table(index='User_ID', columns='Item_ID', aggfunc=lambda x: 1, fill_value=0)
user_item_matrix_sparse = csr_matrix(user_item_matrix.values)

# Split data into training and validation sets (e.g., 80% train, 20% validation)
train_matrix, val_matrix = train_test_split(user_item_matrix, test_size=0.2, random_state=42)
train_matrix_sparse = csr_matrix(train_matrix.values)
val_matrix_sparse = csr_matrix(val_matrix.values)

def precision_at_k(y_true, y_pred, k=3):
    # This is a simplified precision@k for demonstration.
    # A more robust implementation would handle cases with fewer than k relevant items.
    true_positives = sum(1 for item in y_pred[:k] if item in y_true)
    return true_positives / k if len(y_pred) >= k else 0

# Define a scorer based on our recommendation task
def recommendation_precision_scorer(estimator, X, y=None):
    total_precision = 0
    for user_idx in range(X.shape[0]):
        user_id = train_matrix.index[user_idx] # Assuming X corresponds to train users
        if user_id in val_matrix.index:
            true_interactions = set(val_matrix.columns[val_matrix.loc[user_id] == 1])
            if true_interactions:
                _, indices = estimator.kneighbors(X[user_idx], n_neighbors=estimator.n_neighbors + 1)
                similar_user_indices = indices.flatten()[1:]
                recommendations = set()
                for neighbor_idx in similar_user_indices:
                    neighbor_id = train_matrix.index[neighbor_idx]
                    neighbor_interactions = set(train_matrix.columns[train_matrix.loc[neighbor_id] == 1])
                    recommendations.update(neighbor_interactions - true_interactions) # Recommend items not in true
                top_k_recommendations = list(recommendations)[:3] # Consider top 3 recommendations for precision@3
                total_precision += precision_at_k(true_interactions, top_k_recommendations, k=3)
    return total_precision / X.shape[0] if X.shape[0] > 0 else 0

# Define the parameter grid for k
param_grid = {'n_neighbors': list(range(2, 11))} # Test k values from 2 to 10

# Initialize KNN model
knn = NearestNeighbors(metric='cosine', algorithm='brute', n_jobs=-1)

# Use GridSearchCV for finding the optimal k based on our precision scorer
grid_search = GridSearchCV(knn, param_grid, scoring=recommendation_precision_scorer, cv=2) # Use a small cv for demonstration
grid_search.fit(train_matrix_sparse)

print(f"Best k value: {grid_search.best_params_['n_neighbors']}")

# You can then use the best k value to train your final KNN model
best_k = grid_search.best_params_['n_neighbors']
final_model_knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=best_k, n_jobs=-1)
final_model_knn.fit(train_matrix_sparse)

# You can now use final_model_knn for making recommendations

Best k value: 2
