In [1]:
import json
import random
import numpy as np

# Define constants for dataset generation
NUM_CUSTOMERS = 50000

# Define membership levels
MEMBERSHIP_LEVELS = ["Bronze", "Silver", "Gold", "Platinum"]

# Define product categories
PRODUCT_CATEGORIES = [
    "Household appliances",
    "Furniture",
    "Tools",
    "Stationery and sports equipment",
    "Jewelry and cosmetics",
    "Toys",
    "Car accessories",
    "Electrical appliances",
    "Gifts",
    "Computer and mobile phone accessories"
]

# Function to generate random customer data
def generate_customer_data():
    customer_id = random.randint(100000, 999999)
    age = random.randint(18, 80)
    gender = random.choice(["Male", "Female", "Other"])
    income = round(random.uniform(15000, 120000), 2)  # Annual income
    membership_level = random.choices(MEMBERSHIP_LEVELS, weights=[50, 30, 15, 5])[0]
    total_spent = round(random.uniform(100, 10000), 2)
    visits_per_year = random.randint(1, 50)
    average_basket_size = round(random.uniform(10, 1000), 2)
    preferred_categories = random.sample(PRODUCT_CATEGORIES, random.randint(1, 5))
    online_shopper = random.choice([True, False])
    promotion_response_rate = round(random.uniform(0, 1), 2)  # Proportion of promotions responded to
    loyalty_points = random.randint(0, 5000)

    return {
        "customer_id": customer_id,
        "age": age,
        "gender": gender,
        "income": income,
        "membership_level": membership_level,
        "total_spent": total_spent,
        "visits_per_year": visits_per_year,
        "average_basket_size": average_basket_size,
        "preferred_categories": preferred_categories,
        "online_shopper": online_shopper,
        "promotion_response_rate": promotion_response_rate,
        "loyalty_points": loyalty_points
    }

# Generate dataset
def generate_dataset(num_customers):
    return [generate_customer_data() for _ in range(num_customers)]

# Save dataset to JSON file
def save_to_json(data, filename):
    with open(filename, "w") as file:
        json.dump(data, file, indent=4)

# Generate and save the dataset
data = generate_dataset(NUM_CUSTOMERS)
save_to_json(data, "customer_segmentation_dataset.json")

print("Dataset generated and saved to 'customer_segmentation_dataset.json'")


Dataset generated and saved to 'customer_segmentation_dataset.json'
