In [6]:
import pandas as pd
import numpy as np
import random
from datetime import datetime

# Define constants
store_locations = [
    "Madison Avenue", "Soho", "Westfield World Trade Center",
    "Rodeo Drive", "South Coast Plaza", "San Francisco Centre",
    "Bal Harbour Shops", "Aventura Mall", "NorthPark Center", "The Galleria",
    "Oak Street", "Yorkdale Shopping Centre", "Pacific Centre",
    "El Palacio de Hierro (Polanco)", "El Palacio de Hierro (Santa Fe)"
]

handbag_types = [
    "Ella Tote", "Perry Tote", "Gemini Link Tote", "Kira Chevron Tote",
    "T Monogram Tote", "Robinson Tote", "McGraw Tote", "Britten Tote"
]

colors = ["Multi", "Black", "Blue", "Brown", "Gray", "Green", "Pink", "Purple", "Red", "White", "Yellow", "Beige", "Metallic"]
materials = ["Canvas", "Leather", "Jacquard", "Pebbled Leather", "Coated Canvas"]
bag_sizes = ["Mini", "Small", "Medium", "Large"]

manufacturing_cities = ["Guangzhou", "Shenzhen", "Ho Chi Minh City", "Hanoi", "Da Nang"]
lead_time_range = (30, 50)
shipping_time_range = (14, 18)
seasonal_demand = {"Winter": 0.8, "Spring": 1.2, "Summer": 1.0, "Fall": 1.1}
overfill_threshold = 10
underfill_threshold = -10

# Generate synthetic data
def generate_dataset(n_samples=1000):
    data = []
    for _ in range(n_samples):
        store = random.choice(store_locations)
        handbag = random.choice(handbag_types)
        color = random.choice(colors)
        material = random.choice(materials)
        size = random.choice(bag_sizes)
        manufacturing_city = random.choice(manufacturing_cities)
        lead_time = random.randint(*lead_time_range)
        shipping_time = random.randint(*shipping_time_range)
        total_lead_time = lead_time + shipping_time

        season = random.choice(list(seasonal_demand.keys()))
        demand_multiplier = seasonal_demand[season]
        base_demand = random.randint(50, 300)
        adjusted_demand = int(base_demand * demand_multiplier)
        inventory_on_hand = random.randint(0, adjusted_demand + 50)

        forecast_multiplier = 1 + np.random.uniform(-0.2, 0.2)
        forecast_demand = int(adjusted_demand * forecast_multiplier)
        diff = inventory_on_hand - forecast_demand

        flag = "Balanced"
        if diff > overfill_threshold:
            flag = "Overfill"
        elif diff < underfill_threshold:
            flag = "Underfill"

        data.append({
            "Store": store, "Handbag Type": handbag, "Color": color, "Material": material,
            "Size": size, "Manufacturing City": manufacturing_city, "Lead Time (Days)": lead_time,
            "Shipping Time (Days)": shipping_time, "Total Lead Time (Days)": total_lead_time,
            "Season": season, "Base Demand": base_demand, "Adjusted Demand": adjusted_demand,
            "Inventory On Hand": inventory_on_hand, "Forecast Demand": forecast_demand,
            "Difference": diff, "Flag": flag
        })
    return pd.DataFrame(data)

# Define comments for each category
categories = {
    "Customer Feedback": [
        "Stylish but needs more compartments.", "Perfect size for travel.",
        "Could use better stitching on the straps.", "The material feels premium and durable.",
        "Customers appreciated the lightweight design.", "Wish this came in more vibrant colors.",
        "Loved the eco-friendly packaging.", "A bit pricey but worth it for the quality.",
        "Would love a matching wallet option.", "Handles started to wear out quickly.",
        "Disappointed with the zipper quality.", "The stitching is immaculate.",
        "More casual than expected but still lovely.", "Great bag, but the straps are too short.",
        "Could use a waterproof version."
    ],
    "Operational Notes": [
        "Shipment delayed due to weather.", "Inventory system update caused delays.",
        "Unexpected surge in demand last week.", "Supplier contract renegotiated for better terms.",
        "High demand in spring; consider increasing supply.", "Lead time has improved significantly.",
        "Warehouse capacity exceeded during peak season.", "Operations team praised for efficiency.",
        "Extra stock ordered for Black Friday.", "Production line upgrades underway.",
        "Limited space in the warehouse.", "Improved coordination with logistics partners.",
        "Overstock issue at Aventura Mall.", "Shortage of popular color variants.",
        "Shipping rates increased this quarter."
    ],
    "Marketing Promotions": [
        "Flash sale increased online traffic.", "Social media ads led to a spike in orders.",
        "Seasonal ads performed exceptionally well.", "Holiday promotions drove significant sales.",
        "Free shipping offer increased cart sizes.", "New campaign resonated well with younger audiences.",
        "Pre-order strategy worked well for new launches.", "Pop-up stores generated buzz.",
        "Collaboration with influencers boosted demand.", "Email marketing led to a 20% increase in sales.",
        "Loyalty program perks increased customer retention.", "Limited-time discounts drove foot traffic.",
        "TV commercials boosted brand visibility.", "Promotional bundles attracted more buyers.",
        "Cross-promotions with other brands saw success."
    ],
    "Customer Preferences": [
        "Most customers preferred the Mini size.", "Leather handbags are the top choice this season.",
        "Bright colors are requested for summer collections.", "Shoppers seek lightweight yet spacious bags.",
        "Eco-conscious buyers prefer recycled materials.", "Metallic finishes remain popular for evening wear.",
        "Pastel colors are trending this spring.", "Classic designs are making a comeback.",
        "Requests for laptop-friendly sizes increased.", "Demand for gender-neutral designs is rising.",
        "Seasonal collections are always eagerly awaited.", "Minimalist designs are trending this season.",
        "More interest in bags with anti-theft features.", "Demand for vegan leather has grown.",
        "Customers like detachable pouches."
    ]
}

# Generate dataset
dataset = generate_dataset(100000)

# Add comment columns
for category, comments in categories.items():
    dataset[category] = dataset.apply(lambda _: random.choice(comments), axis=1)

# Save the dataset
output_path = "tory_burch_inventory_dataset_with_comments.csv"
dataset.to_csv(output_path, index=False)

print(f"Dataset saved to {output_path}")


Dataset saved to tory_burch_inventory_dataset_with_comments.csv
