In [1]:
import random
import json
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

%load_ext jupyter_black

In [3]:
num_items = 5
num_customers = 100

# Generate customer IDs and related information
customer_ids = [id for id in range(1, num_customers + 1)]
ages = [random.randint(18, 50) for _ in range(num_customers)]
genders = ["male", "female"]

# Generate unique product IDs for each category
categories = ["Electronics", "Clothing", "Home & Kitchen", "Beauty"]
unique_product_ids = {
    category: [f"{category[:3].upper()}{i:03d}" for i in range(1, num_items + 1)]
    for category in categories
}

product_description = {
    "Electronics": {
        "ELE001": "SonicBlast Bluetooth Speaker",
        "ELE002": "PowerPulse Wireless Earbuds",
        "ELE003": "RapidCharge Portable Power Bank",
        "ELE004": "SmartTech 4K Ultra HD TV",
        "ELE005": "EcoCharge Solar Charger",
    },
    "Clothing": {
        "CLO001": "Classic Comfort Cotton T-Shirt",
        "CLO002": "Athleisure Performance Leggings",
        "CLO003": "Elegance Women's Evening Dress",
        "CLO004": "Summer Breeze Floral Skirt",
        "CLO005": "Urban Style Denim Jacket",
    },
    "Home & Kitchen": {
        "HOM001": "Cozy Haven Memory Foam Mattress",
        "HOM002": "Gourmet Chef Stainless Steel Cookware Set",
        "HOM003": "Elegant Home Bamboo Storage Organizer",
        "HOM004": "Luxury Living Cotton Bath Towel Set",
        "HOM005": "Deluxe Kitchen Mixer Pro",
    },
    "Beauty": {
        "BEA001": "RadiantSkin Anti-Aging Cream",
        "BEA002": "LuxeLips Hydrating Lip Balm",
        "BEA003": "GlowBeauty Vitamin C Serum",
        "BEA004": "SilkTouch Hair Straightener",
        "BEA005": "PureZen Aromatherapy Essential Oils Set",
    },
}


# Generate purchase dates
purchase_dates = [f"2023-01-{d:02d}" for d in range(1, 32)]
random.shuffle(purchase_dates)
purchase_dates *= (num_customers // len(purchase_dates)) + 1
purchase_dates = purchase_dates[:num_customers]

# Generate data
data = []
for customer_id in customer_ids:
    product_categories = random.choice(categories)
    product_id = random.choice(unique_product_ids[product_categories])
    # Access and print the product description
    for _, products in product_description.items():
        if product_id in products:
            product_desc = products[product_id]
            continue
    purchase_date = random.choice(purchase_dates)
    price = random.randint(20, 1000)
    ratings = round(random.uniform(3.5, 5.0), 1)
    page_views = random.randint(10, 50)
    time_spent = random.randint(60, 240)
    age = random.choice(ages)
    gender = random.choice(genders)
    data.append(
        [
            purchase_date,
            customer_id,
            product_id,
            product_desc,
            product_categories,
            price,
            ratings,
            page_views,
            time_spent,
            age,
            gender,
        ]
    )

# Create DataFrame
df = pd.DataFrame(
    data,
    columns=[
        "purchase_date",
        "customer_id",
        "product_id",
        "product_description",
        "category",
        "price",
        "ratings",
        "page_views",
        "time_spent",
        "age",
        "gender",
    ],
)
df.to_csv("./dataset/dataset.csv", index=False)

In [4]:
df.head()

In [None]:
product_description = {
    "Electronics": {
        "ELE001": "SonicBlast Bluetooth Speaker",
        "ELE002": "PowerPulse Wireless Earbuds",
        "ELE003": "RapidCharge Portable Power Bank",
        "ELE004": "SmartTech 4K Ultra HD TV",
        "ELE005": "EcoCharge Solar Charger",
    },
    "Clothing": {
        "CLO001": "Classic Comfort Cotton T-Shirt",
        "CLO002": "Athleisure Performance Leggings",
        "CLO003": "Elegance Women's Evening Dress",
        "CLO004": "Summer Breeze Floral Skirt",
        "CLO005": "Urban Style Denim Jacket",
    },
    "Home & Kitchen": {
        "HOM001": "Cozy Haven Memory Foam Mattress",
        "HOM002": "Gourmet Chef Stainless Steel Cookware Set",
        "HOM003": "Elegant Home Bamboo Storage Organizer",
        "HOM004": "Luxury Living Cotton Bath Towel Set",
        "HOM005": "Deluxe Kitchen Mixer Pro",
    },
    "Beauty": {
        "BEA001": "RadiantSkin Anti-Aging Cream",
        "BEA002": "LuxeLips Hydrating Lip Balm",
        "BEA003": "GlowBeauty Vitamin C Serum",
        "BEA004": "SilkTouch Hair Straightener",
        "BEA005": "PureZen Aromatherapy Essential Oils Set",
    },
}

# Write the dictionary to a JSON file
with open('./dataset/product_description.json', 'w') as json_file:
    json.dump(product_description, json_file, indent=4)

In [5]:
df = pd.read_csv("./dataset/dataset.csv")
df["purchase_date"] = pd.to_datetime(df["purchase_date"])


def calculate_rfm(df):
    # Current date
    current_date = df["purchase_date"].max() + pd.Timedelta(days=1)

    # RFM Calculation
    rfm = (
        df.groupby("customer_id")
        .agg(
            {
                "purchase_date": lambda x: (current_date - x.max()).days,
                "product_id": "count",
                "price": "sum",
            }
        )
        .rename(
            columns={
                "purchase_date": "recency",
                "product_id": "frequency",
                "price": "monetary",
            }
        )
        .reset_index()
    )

    return rfm


def preprocess_data(data):
    # Handle missing values
    data.fillna(0, inplace=True)

    # Encode categorical variables
    data_encoded = pd.get_dummies(data, columns=["gender"], prefix="encoded", dtype=int)

    # Scale numerical features
    scaler = MinMaxScaler()
    numerical_cols = ["ratings", "page_views", "time_spent", "age"]
    data_encoded[numerical_cols] = scaler.fit_transform(data_encoded[numerical_cols])

    return data_encoded


def compute_cosine_similarity(data):
    # Calculate RFM
    rfm = calculate_rfm(data)
    rfm_matrix = rfm[["recency", "frequency", "monetary"]]

    preprocessed_data = preprocess_data(data)

    # Combine preprocessed_data with RFM
    combined_data = pd.concat([preprocessed_data, rfm_matrix], axis=1)

    used_columns = [
        "ratings",
        "page_views",
        "time_spent",
        "age",
        "encoded_female",
        "encoded_male",
        "recency",
        "frequency",
        "monetary",
    ]

    cosine_sim = cosine_similarity(combined_data[used_columns])
    cosine_sim_df = pd.DataFrame(
        cosine_sim, index=rfm["customer_id"], columns=rfm["customer_id"]
    )
    return cosine_sim_df


def recommend_products(customer_id, df, top_n=3):

    # Compute Cosine Similarity
    cosine_sim_df = compute_cosine_similarity(df)

    similar_customers = (
        cosine_sim_df[customer_id].sort_values(ascending=False).index[1:]
    )

    recommended_products = []
    for similar_customer in similar_customers[: top_n + 1]:
        products = df[df["customer_id"] == similar_customer]["product_id"].values
        recommended_products.extend(products)

    recommended_products = list(
        set(recommended_products)
        - set(df[df["customer_id"] == customer_id]["product_id"].values)
    )

    # Read the dictionary from the JSON file
    with open("./dataset/product_description.json", "r") as json_file:
        product_description = json.load(json_file)

    # Result list to store dictionaries
    result_list = []

    for product_id in recommended_products[:top_n]:
        for category, products in product_description.items():
            if product_id in products:
                result_dict = {
                    "product id": product_id,
                    "product categories": category,
                    "product description": products[product_id],
                }
                result_list.append(result_dict)
                break

    return result_list

In [6]:
# Recommend products for a given customer
customer_id = 69
top_n = 3
recommended_products = recommend_products(customer_id, df, top_n)
print(f"Recommended products for customer {customer_id}: {recommended_products}")
pd.DataFrame(recommended_products)

Recommended products for customer 69: [{'product id': 'ELE003', 'product categories': 'Electronics', 'product description': 'RapidCharge Portable Power Bank'}, {'product id': 'HOM004', 'product categories': 'Home & Kitchen', 'product description': 'Luxury Living Cotton Bath Towel Set'}, {'product id': 'HOM003', 'product categories': 'Home & Kitchen', 'product description': 'Elegant Home Bamboo Storage Organizer'}]


Unnamed: 0,product id,product categories,product description
0,ELE003,Electronics,RapidCharge Portable Power Bank
1,HOM004,Home & Kitchen,Luxury Living Cotton Bath Towel Set
2,HOM003,Home & Kitchen,Elegant Home Bamboo Storage Organizer


In [7]:
# Recommend products for a given customer
customer_id = 89
top_n = 3
recommended_products = recommend_products(customer_id, df, top_n)
print(f"Recommended products for customer {customer_id}: {recommended_products}")
pd.DataFrame(recommended_products)

Recommended products for customer 89: [{'product id': 'ELE005', 'product categories': 'Electronics', 'product description': 'EcoCharge Solar Charger'}, {'product id': 'ELE004', 'product categories': 'Electronics', 'product description': 'SmartTech 4K Ultra HD TV'}, {'product id': 'CLO002', 'product categories': 'Clothing', 'product description': 'Athleisure Performance Leggings'}]


Unnamed: 0,product id,product categories,product description
0,ELE005,Electronics,EcoCharge Solar Charger
1,ELE004,Electronics,SmartTech 4K Ultra HD TV
2,CLO002,Clothing,Athleisure Performance Leggings


In [8]:
# Recommend products for a given customer
customer_id = 28
top_n = 3
recommended_products = recommend_products(customer_id, df, top_n)
print(f"Recommended products for customer {customer_id}: {recommended_products}")
pd.DataFrame(recommended_products)

Recommended products for customer 28: [{'product id': 'ELE005', 'product categories': 'Electronics', 'product description': 'EcoCharge Solar Charger'}, {'product id': 'CLO001', 'product categories': 'Clothing', 'product description': 'Classic Comfort Cotton T-Shirt'}, {'product id': 'HOM002', 'product categories': 'Home & Kitchen', 'product description': 'Gourmet Chef Stainless Steel Cookware Set'}]


Unnamed: 0,product id,product categories,product description
0,ELE005,Electronics,EcoCharge Solar Charger
1,CLO001,Clothing,Classic Comfort Cotton T-Shirt
2,HOM002,Home & Kitchen,Gourmet Chef Stainless Steel Cookware Set
