In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

data = [
    {
        'user_id': 1,
        'username': 'user1',
        'salary': 80000,
        'interactions': [
            {'product_id': 101, 'product_name': 'Laptop', 'search_history': 'electronics', 'browse_history': 'laptops', 'product_rating': 4, 'num_searches': 10, 'price': 1000},
            {'product_id': 102, 'product_name': 'Shirt', 'search_history': 'clothing', 'browse_history': 'shoes', 'product_rating': 5, 'num_searches': 5, 'price': 30},
            {'product_id': 103, 'product_name': 'Phone', 'search_history': 'electronics', 'browse_history': 'smartphones', 'product_rating': 3, 'num_searches': 8, 'price': 500},
            {'product_id': 104, 'product_name': 'Watch', 'search_history': 'clothing', 'browse_history': 'watches', 'product_rating': 4, 'num_searches': 3, 'price': 150},
            {'product_id': 105, 'product_name': 'Headphones', 'search_history': 'electronics', 'browse_history': 'audio', 'product_rating': 4, 'num_searches': 12, 'price': 80},
        ],
        'occupation': 'engineer'
    },
    {
        'user_id': 2,
        'username': 'user2',
        'salary': 55000,
        'interactions': [
            {'product_id': 102, 'product_name': 'Shirt', 'search_history': 'clothing', 'browse_history': 'shoes', 'product_rating': 5, 'num_searches': 7, 'price': 25},
            {'product_id': 103, 'product_name': 'Phone', 'search_history': 'electronics', 'browse_history': 'smartphones', 'product_rating': 4, 'num_searches': 6, 'price': 450},
            {'product_id': 105, 'product_name': 'Hat', 'search_history': 'clothing', 'browse_history': 'hats', 'product_rating': 3, 'num_searches': 4, 'price': 15},
            {'product_id': 106, 'product_name': 'Sneakers', 'search_history': 'clothing', 'browse_history': 'shoes', 'product_rating': 4, 'num_searches': 9, 'price': 60},
            {'product_id': 107, 'product_name': 'Backpack', 'search_history': 'travel', 'browse_history': 'backpacks', 'product_rating': 5, 'num_searches': 2, 'price': 40},
        ],
        'occupation': 'teacher'
    },
    {
        'user_id': 3,
        'username': 'user3',
        'salary': 35000,
        'interactions': [
            {'product_id': 101, 'product_name': 'Laptop', 'search_history': 'electronics', 'browse_history': 'laptops', 'product_rating': 4, 'num_searches': 10, 'price': 900},
            {'product_id': 103, 'product_name': 'Phone', 'search_history': 'electronics', 'browse_history': 'smartphones', 'product_rating': 3, 'num_searches': 8, 'price': 550},
            {'product_id': 104, 'product_name': 'Watch', 'search_history': 'clothing', 'browse_history': 'watches', 'product_rating': 5, 'num_searches': 3, 'price': 120},
            {'product_id': 106, 'product_name': 'Sunglasses', 'search_history': 'accessories', 'browse_history': 'sunglasses', 'product_rating': 4, 'num_searches': 5, 'price': 30},
            {'product_id': 108, 'product_name': 'Wallet', 'search_history': 'accessories', 'browse_history': 'wallets', 'product_rating': 3, 'num_searches': 2, 'price': 20},
        ],
        'occupation': 'student'
    },
    {
        'user_id': 4,
        'username': 'user4',
        'salary': 95000,
        'interactions': [
            {'product_id': 101, 'product_name': 'Laptop', 'search_history': 'electronics', 'browse_history': 'laptops', 'product_rating': 4, 'num_searches': 10, 'price': 1100},
            {'product_id': 104, 'product_name': 'Watch', 'search_history': 'electronics', 'browse_history': 'smartphones', 'product_rating': 4, 'num_searches': 3, 'price': 200},
            {'product_id': 107, 'product_name': 'Backpack', 'search_history': 'travel', 'browse_history': 'backpacks', 'product_rating': 5, 'num_searches': 6, 'price': 70},
            {'product_id': 109, 'product_name': 'Water Bottle', 'search_history': 'fitness', 'browse_history': 'hydration', 'product_rating': 4, 'num_searches': 7, 'price': 200},
            {'product_id': 110, 'product_name': 'Umbrella', 'search_history': 'accessories', 'browse_history': 'umbrellas', 'product_rating': 3, 'num_searches': 2, 'price': 500},
        ],
        'occupation': 'doctor'
    },
    # ... (add more users)
]


# Convert the array format data into a flat DataFrame
interactions = []
for user_data in data:
    for interaction in user_data['interactions']:
        interaction['user_id'] = user_data['user_id']
        interaction['occupation'] = user_data['occupation']
        interaction['salary'] = user_data['salary']
        interactions.append(interaction)

df = pd.DataFrame(interactions)

# Ensure product_id values are unique
df = df.drop_duplicates(subset=['user_id', 'product_id'])

# Convert categorical variables to numerical values
df['search_history'] = df['search_history'].astype('category').cat.codes
df['browse_history'] = df['browse_history'].astype('category').cat.codes
df['occupation'] = df['occupation'].astype('category').cat.codes

# Rest of the code remains the same...

# Creating a user-item matrix
user_item_matrix = df.pivot(index='user_id', columns='product_id', values='product_rating').fillna(0)

# Calculate cosine similarity between products
product_similarity = cosine_similarity(user_item_matrix.T)

# Define a function to get product recommendations
def get_recommendations(user_id, num_recommendations=2):
    user_ratings = user_item_matrix.loc[user_id]
    similar_scores = product_similarity.dot(user_ratings)

    user_searches = df[df['user_id'] == user_id].set_index('product_id')['num_searches']
    user_salary = df[df['user_id'] == user_id]['salary'].values[0]

    # Calculate weighted scores based on number of searches and salary
    weighted_scores = []
    for idx in range(len(user_item_matrix.columns)):
        product_id = user_item_matrix.columns[idx]
        product_score = similar_scores[idx] * user_searches.get(product_id, 0) + user_salary
        weighted_scores.append(product_score)

    recommended_product_indices = np.argsort(weighted_scores)[-num_recommendations:]
    recommended_products = [df.loc[df['product_id'] == user_item_matrix.columns[idx], 'product_name'].values[0] for idx in recommended_product_indices]
    return recommended_products

# Test the recommender system for all users
for user_id in range(1, len(user_item_matrix) + 1):
    recommendations = get_recommendations(user_id, num_recommendations=2)
    username = data[user_id - 1]['username']  # Get the username from the data
    print(f"Recommended products for {username} : {recommendations}")

Recommended products for user1 : ['Laptop', 'Headphones']
Recommended products for user2 : ['Shirt', 'Sneakers']
Recommended products for user3 : ['Phone', 'Laptop']
Recommended products for user4 : ['Water Bottle', 'Laptop']


In [2]:
# Initialize variables for evaluation
total_users = len(user_item_matrix)
correct_recommendations = 0

# Evaluate the model for all users
for user_id in range(1, total_users + 1):
    recommendations = get_recommendations(user_id, num_recommendations=2)

    # Get the actual products that the user has interacted with
    actual_interactions = df[(df['user_id'] == user_id) & (df['product_rating'] > 0)]['product_name'].tolist()

    # Calculate the number of correct recommendations
    for product in recommendations:
        if product in actual_interactions:
            correct_recommendations += 1

# Calculate accuracy
accuracy = correct_recommendations / (total_users * 2)  # Total users * number of recommendations per user
print(f"Model Accuracy: {accuracy * 100:.2f}%")

Model Accuracy: 100.00%
