In [3]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity

# Load the dataset
file_path = "Dataset.csv"  # Change to your file path
df = pd.read_csv(file_path, encoding="latin1")

# Encode categorical variables
label_encoders = {}
for col in ["Age", "Gender", "Education"]:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Convert YES/NO in product categories to 1/0
product_columns = [col for col in df.columns if "Product_Category" in col]
df[product_columns] = df[product_columns].apply(lambda x: x.map({"YES": 1, "NO": 0}))

# Select features for recommendation (demographics + purchase history)
features = ["Age", "Gender", "Education"] + product_columns
user_profiles = df[features]

# Compute similarity matrix
similarity_matrix = cosine_similarity(user_profiles)

# Function to get product recommendations for a given user index
def recommend_products(user_index, top_n=3):
    similar_users = similarity_matrix[user_index].argsort()[-(top_n + 1):-1][::-1]
    recommended_products = df.iloc[similar_users][product_columns].mean().sort_values(ascending=False)
    return recommended_products.index[recommended_products > 0.5].tolist()  # Recommend frequently bought products

# Example: Get recommendations for the first user
user_id = 0
recommended_products = recommend_products(user_id)
print(f"Recommended products for user {user_id}: {recommended_products}")




Recommended products for user 0: ['Product_Category_Appliances', 'Product_Category_Electronics', 'Product_Category_Groceries', 'Product_Category_Personal_Care', 'Product_Category_Clothing']
