In [1]:
import pandas as pd
import numpy as np


In [2]:
cities = [
    "Manchester", "Salford", "Stockport", "Bolton",
    "Wigan", "Preston", "Liverpool", "Chester",
    "Blackpool", "Lancaster"
]

regions = ["Greater Manchester", "Greater Manchester", "Greater Manchester",
           "Greater Manchester", "Greater Manchester", "Lancashire",
           "Merseyside", "Cheshire", "Lancashire", "Lancashire"]

city_df = pd.DataFrame({
    "city": cities,
    "region": regions
})


In [3]:
business_types = ["Confectionery Store", "Bakery"]
formats = ["High Street", "Mall", "Market Stall", "Transport Hub"]
position_segments = ["Value", "Premium", "Artisanal", "Vegan/Free From", "Family"]

def generate_businesses(n=50):
    records = []
    for i in range(n):
        city = np.random.choice(city_df["city"])
        btype = np.random.choice(business_types, p=[0.45, 0.55])
        fmt = np.random.choice(formats)
        pos = np.random.choice(position_segments)
        records.append({
            "business_id": f"BUS{str(i).zfill(3)}",
            "business_type": btype,
            "city": city,
            "format": fmt,
            "positioning": pos
        })
    return pd.DataFrame(records)

businesses = generate_businesses(80)


In [4]:
bakery_items = [
    ("Croissant", 2.60), ("Bread Loaf", 3.20), ("Coffee", 2.30),
    ("Cake Slice", 4.50), ("Sandwich", 5.00)
]

confectionery_items = [
    ("Handmade Chocolates", 5.20), ("Pick & Mix (100g)", 2.80),
    ("Fudge (100g)", 3.00), ("Gift Box", 12.00), ("Hot Chocolate", 3.50)
]


In [5]:
menu = []

for _, b in businesses.iterrows():
    if b["business_type"] == "Bakery":
        items = bakery_items
    else:
        items = confectionery_items

    for item, price in items:
        cost = round(price * np.random.uniform(0.4, 0.6), 2)
        gross_margin = round(price - cost, 2)
        menu.append({
            "business_id": b["business_id"],
            "item": item,
            "price": price,
            "cost": cost,
            "gross_margin": gross_margin
        })

menu_df = pd.DataFrame(menu)


In [7]:
customer_segments = ["Students", "Families", "Tourists", "Commuters", "Retirees"]

# one Dirichlet draw per business (row)
dirichlet_samples = np.random.dirichlet(
    np.ones(len(customer_segments)),
    size=len(businesses)
)

# assign each segment to its own column
for i, seg in enumerate(customer_segments):
    businesses[f"seg_{seg}"] = dirichlet_samples[:, i]


In [8]:
def simulate_sales(businesses, menu, weeks=10):
    rows = []
    for _, b in businesses.iterrows():
        biz_menu = menu_df[menu_df.business_id == b.business_id]
        avg_customers = np.random.randint(100, 500)
        
        for w in range(1, weeks+1):
            cust = int(avg_customers * np.random.uniform(0.8, 1.3))
            items = np.random.poisson(1.8, cust)
            total_items = items.sum()
            
            prices = np.random.choice(biz_menu.price, total_items)
            costs  = np.random.choice(biz_menu.cost, total_items)
            
            revenue = prices.sum()
            cogs = costs.sum()
            profit = revenue - cogs
            
            rows.append({
                "business_id": b.business_id,
                "week": w,
                "customers": cust,
                "revenue": revenue,
                "cogs": cogs,
                "profit": profit
            })
    return pd.DataFrame(rows)

weekly_sales = simulate_sales(businesses, menu_df)


In [9]:
# Weekly aggregated per business
weekly_summary = weekly_sales.groupby("business_id").agg({
    "customers": "mean",
    "revenue": "mean",
    "profit": "mean"
}).reset_index()

# Yearly projections (52 weeks)
weekly_summary["year_revenue"] = weekly_summary["revenue"] * 52
weekly_summary["year_profit"] = weekly_summary["profit"] * 52


In [10]:
compare = weekly_summary.merge(
    businesses[["business_id", "business_type"]],
    on="business_id"
)

agg_compare = compare.groupby("business_type").agg({
    "revenue": "median",
    "profit": "median",
    "year_revenue": "median",
    "year_profit": "median"
}).reset_index()

agg_compare


Unnamed: 0,business_type,revenue,profit,year_revenue,year_profit
0,Bakery,2041.74,1033.67,106170.48,53750.84
1,Confectionery Store,2683.01,1331.985,139516.52,69263.22


In [11]:
sales_with_demo = weekly_sales.merge(
    businesses[["business_id"] + [f"seg_{s}" for s in customer_segments]],
    on="business_id"
)
