In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Read data from the CSV file
df = pd.read_csv('data.csv')

# Ensure product_id values are unique
df = df.drop_duplicates(subset=['user_id', 'product_id'])

# Convert categorical variables to numerical values
df['search_history'] = df['search_history'].astype('category').cat.codes
df['browse_history'] = df['browse_history'].astype('category').cat.codes
df['occupation'] = df['occupation'].astype('category').cat.codes

# Creating a user-item matrix
user_item_matrix = df.pivot(index='user_id', columns='product_id', values='product_rating').fillna(0)

# Calculate cosine similarity between products
product_similarity = cosine_similarity(user_item_matrix.T)

# Define a function to get product recommendations
def get_recommendations(user_id, num_recommendations=2):
    user_ratings = user_item_matrix.loc[user_id]
    similar_scores = product_similarity.dot(user_ratings)

    user_searches = df[df['user_id'] == user_id].set_index('product_id')['num_searches']
    user_salary = df[df['user_id'] == user_id]['salary'].values[0]

    # Calculate weighted scores based on number of searches and salary
    weighted_scores = []
    for idx in range(len(user_item_matrix.columns)):
        product_id = user_item_matrix.columns[idx]
        product_score = similar_scores[idx] * user_searches.get(product_id, 0) + user_salary
        weighted_scores.append(product_score)

    recommended_product_indices = np.argsort(weighted_scores)[-num_recommendations:]
    recommended_products = [df.loc[df['product_id'] == user_item_matrix.columns[idx], 'product_name'].values[0] for idx in recommended_product_indices]
    return recommended_products

import time
from tabulate import tabulate

# Take input for the number of recommendations for all users
num_recommendations = int(input("Enter the number of recommendations for all users: "))

# Display a loading message for 2-3 seconds
print("Loading...")
time.sleep(2)  # Sleep for 2 seconds

# Create a list to store user recommendations
user_recommendations = []

# Test the recommender system for all users starting from user_id 1001
for user_id in range(1001, 1001 + len(user_item_matrix)):
    recommendations = get_recommendations(user_id, num_recommendations)
    username = df[df['user_id'] == user_id]['username'].values[0]  # Get the username from the data
    recommended_products = ', '.join(recommendations)  # Join recommended products with a comma
    user_recommendations.append([username, recommended_products])

# Print the table with headers
headers = ["Username", "Recommended Products"]
print(tabulate(user_recommendations, headers, tablefmt="fancy_grid"))


Loading...
╒════════════╤════════════════════════╕
│ Username   │ Recommended Products   │
╞════════════╪════════════════════════╡
│ user1      │ Laptop, Headphones     │
├────────────┼────────────────────────┤
│ user2      │ Pen, Book              │
├────────────┼────────────────────────┤
│ user3      │ Phone, Laptop          │
├────────────┼────────────────────────┤
│ user4      │ Tablet, Phone          │
├────────────┼────────────────────────┤
│ user5      │ Shirt, Water Bottle    │
├────────────┼────────────────────────┤
│ user6      │ Phone, Laptop          │
├────────────┼────────────────────────┤
│ user7      │ Laptop, Phone          │
├────────────┼────────────────────────┤
│ user8      │ Mouse, Tablet          │
├────────────┼────────────────────────┤
│ user9      │ Shoes, Water Bottle    │
├────────────┼────────────────────────┤
│ user10     │ Keyboard, Mouse        │
├────────────┼────────────────────────┤
│ user11     │ Water Bottle, Umbrella │
├────────────┼───────────────

In [2]:
# Initialize variables for evaluation
total_users = len(user_item_matrix)
correct_recommendations = 0
total_recommendations = 0  # Track the total number of recommendations made

# Evaluate the model for all users
for user_id in range(1001, 1001 + total_users):  # Start from user_id 1001
    recommendations = get_recommendations(user_id, num_recommendations=2)

    # Get the actual products that the user has interacted with
    actual_interactions = df[(df['user_id'] == user_id) & (df['product_rating'] > 0)]['product_name'].tolist()

    # Calculate the number of correct recommendations
    for product in recommendations:
        total_recommendations += 1
        if product in actual_interactions:
            correct_recommendations += 1

# Calculate accuracy
accuracy = correct_recommendations / total_recommendations  # Total correct recommendations divided by total recommendations made
print(f"Model Accuracy: {accuracy * 100:.2f}%")


Model Accuracy: 100.00%
