In [60]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
import sklearn

# Load the dataset
file_path = 'synthetic_products.csv'
product_data = pd.read_csv(file_path)

# Select relevant features for recommendation: 'category', 'brand', 'price', 'rating'
features = product_data[['category', 'brand', 'price', 'rating']]

# One-hot encode the categorical columns: 'category' and 'brand'
if sklearn.__version__ >= '0.22':
    encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
else:
    encoder = OneHotEncoder(sparse=False, handle_unknown='ignore')
encoded_features = encoder.fit_transform(features[['category', 'brand']])

# Normalize the 'price' and 'rating' columns for better comparison
scaler = MinMaxScaler()
price_rating_normalized = scaler.fit_transform(features[['price', 'rating']])

# Combine the encoded categorical features with the normalized numerical features
combined_features = np.hstack((encoded_features, price_rating_normalized))

# Calculate the cosine similarity between all products
similarity_matrix = cosine_similarity(combined_features)

# Function to get recommendations
def get_recommendations(product_id, num_recommendations=5):
    # Get the index of the product by matching product_id
    idx = product_data.index[product_data['product_id'] == product_id].tolist()
    
    if not idx:
        print(f"Product ID {product_id} not found.")
        return None
    
    idx = idx[0]
    
    # Get similarity scores for this product
    similarity_scores = list(enumerate(similarity_matrix[idx]))
    
    # Sort products by similarity scores (excluding the product itself)
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)[1:num_recommendations+1]
    
    # Get the recommended product indices
    recommended_indices = [i[0] for i in similarity_scores]
    
    # Return the recommended products
    return product_data.iloc[recommended_indices][['product_id', 'name', 'category', 'brand', 'price', 'rating']]

# Test the recommendation system with a sample product
sample_product_id = 'P1924'
recommendations = get_recommendations(sample_product_id)

if recommendations is not None:
    print(f"Recommendations for product {sample_product_id}:")
    print(recommendations)
else:
    print("No recommendations available.")

Recommendations for product P1924:
    product_id              name  category    brand   price  rating
205      P6624     Premium Dress  Clothing  PlayFun  148.04     3.9
874      P9495    Incredible Hat  Clothing  PlayFun   32.26     3.7
258      P4135       Amazing Hat  Clothing  PlayFun   33.68     4.3
803      P4981     Awesome Jeans  Clothing  PlayFun  160.89     4.5
86       P8640  Incredible Shoes  Clothing  PlayFun   46.06     4.5


# 1. Import Required Libraries

In [64]:
# Cell 1: Importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
import sklearn


# 2. Load the Dataset

In [67]:
# Cell 2: Load the dataset
file_path = 'synthetic_products.csv'
product_data = pd.read_csv(file_path)
product_data.head()  # Display the first few rows of the dataset


Unnamed: 0,product_id,name,category,brand,description,price,rating,num_reviews
0,P1226,Incredible Pillow,Home & Garden,BookWorm,A high-quality home & garden product. This Inc...,476.17,3.2,885
1,P3436,Premium T-shirt,Clothing,BookWorm,A high-quality clothing product. This Premium ...,66.35,1.4,556
2,P7441,Incredible Smartphone,Electronics,FashionStyle,A high-quality electronics product. This Incre...,1870.37,1.4,754
3,P2272,Awesome Smartwatch,Electronics,SportsMaster,A high-quality electronics product. This Aweso...,899.32,3.5,539
4,P2687,Premium Smartphone,Electronics,HomeComfort,A high-quality electronics product. This Premi...,1587.55,4.1,536


# 3. Select Relevant Features

In [70]:
# Cell 3: Select relevant features for the recommendation
features = product_data[['category', 'brand', 'price', 'rating']]


# 4. One-hot Encoding and Normalization

In [73]:


# Normalize the 'price' and 'rating' columns for better comparison
scaler = MinMaxScaler()
price_rating_normalized = scaler.fit_transform(features[['price', 'rating']])

# Combine the encoded categorical features with the normalized numerical features
combined_features = np.hstack((encoded_features, price_rating_normalized))

# Calculate the cosine similarity between all products
similarity_matrix = cosine_similarity(combined_features)

# 5. Calculate Cosine Similarity

In [76]:
# Cell 5: Calculate the cosine similarity matrix
similarity_matrix = cosine_similarity(combined_features)


# 6. Define Recommendation Function

In [79]:
# Cell 6: Define function to get recommendations based on similarity
def get_recommendations(product_id, num_recommendations=5):
    # Get the index of the product by matching product_id
    idx = product_data.index[product_data['product_id'] == product_id].tolist()
    
    if not idx:
        print(f"Product ID {product_id} not found.")
        return None
    
    idx = idx[0]
    
    # Get similarity scores for this product
    similarity_scores = list(enumerate(similarity_matrix[idx]))
    
    # Sort products by similarity scores (excluding the product itself)
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)[1:num_recommendations+1]
    
    # Get the recommended product indices
    recommended_indices = [i[0] for i in similarity_scores]
    
    # Return the recommended products
    return product_data.iloc[recommended_indices][['product_id', 'name', 'category', 'brand', 'price', 'rating']]


# 7. Test the Recommendation System

In [82]:
# Cell 7: Test the recommendation system
sample_product_id = input("Enter the Product ID to get recommendations: ")
recommendations = get_recommendations(sample_product_id)

if recommendations is not None:
    print(f"Recommendations for product {sample_product_id}:")
    print(recommendations)
else:
    print("No recommendations available.")


Enter the Product ID to get recommendations:  P1226


Recommendations for product P1226:
    product_id             name       category     brand   price  rating
640      P4651     Awesome Sofa  Home & Garden  BookWorm  445.28     3.4
565       P147  Incredible Vase  Home & Garden  BookWorm  438.40     3.5
739      P5724  Fantastic Plant  Home & Garden  BookWorm  431.39     2.9
858      P8601     Amazing Sofa  Home & Garden  BookWorm  334.24     3.1
708      P9291    Amazing Plant  Home & Garden  BookWorm  423.64     2.9
