In [2]:
import numpy as np
import pandas as pd

SEED = 42
rng = np.random.default_rng(SEED)


In [3]:
howard_titles = [
    {"title": "Conan: The Tower of the Elephant", "series": "Conan", "popularity": 1.35},
    {"title": "Conan: Queen of the Black Coast", "series": "Conan", "popularity": 1.30},
    {"title": "Conan: Red Nails", "series": "Conan", "popularity": 1.20},
    {"title": "Conan: The Phoenix on the Sword", "series": "Conan", "popularity": 1.25},
    {"title": "Solomon Kane: Skulls in the Stars", "series": "Solomon Kane", "popularity": 0.75},
    {"title": "Kull: The Shadow Kingdom", "series": "Kull", "popularity": 0.60},
    {"title": "Robert E. Howard: Horror & Dark Fantasy Collection", "series": "Collection", "popularity": 0.95},
]

titles_df = pd.DataFrame(howard_titles)
titles_df


Unnamed: 0,title,series,popularity
0,Conan: The Tower of the Elephant,Conan,1.35
1,Conan: Queen of the Black Coast,Conan,1.3
2,Conan: Red Nails,Conan,1.2
3,Conan: The Phoenix on the Sword,Conan,1.25
4,Solomon Kane: Skulls in the Stars,Solomon Kane,0.75
5,Kull: The Shadow Kingdom,Kull,0.6
6,Robert E. Howard: Horror & Dark Fantasy Collec...,Collection,0.95


In [4]:
retailers = [
    {
        "retailer": "Waterstones (England)",
        "stores_england": 180,
        "avg_weekly_footfall_per_store": 5200,
        "fantasy_interest_rate": 0.020,
        "conversion_rate": 0.055,
        "gross_margin": 0.43,
        "format_mix": {"Paperback": 0.70, "Hardback": 0.15, "Mass Market": 0.10, "eBook": 0.05},
    },
    {
        "retailer": "WHSmith (England)",
        "stores_england": 500,
        "avg_weekly_footfall_per_store": 2600,
        "fantasy_interest_rate": 0.010,
        "conversion_rate": 0.040,
        "gross_margin": 0.36,
        "format_mix": {"Paperback": 0.55, "Hardback": 0.10, "Mass Market": 0.30, "eBook": 0.05},
    },
]

pd.DataFrame(retailers)[["retailer","stores_england","gross_margin"]]


Unnamed: 0,retailer,stores_england,gross_margin
0,Waterstones (England),180,0.43
1,WHSmith (England),500,0.36


In [5]:
format_pricing = {
    "Mass Market": {"list_price": 8.99,  "avg_discount": 0.10},
    "Paperback":   {"list_price": 10.99, "avg_discount": 0.12},
    "Hardback":    {"list_price": 18.99, "avg_discount": 0.15},
    "eBook":       {"list_price": 6.99,  "avg_discount": 0.25},
}

def effective_unit_price(fmt):
    fp = format_pricing[fmt]
    return fp["list_price"] * (1 - fp["avg_discount"])

{fmt: round(effective_unit_price(fmt), 2) for fmt in format_pricing}


{'Mass Market': 8.09, 'Paperback': 9.67, 'Hardback': 16.14, 'eBook': 5.24}

In [6]:
age_bands = ["Under 18", "18-24", "25-34", "35-44", "45-54", "55-64", "65+"]
genders = ["Female", "Male", "Non-binary/Other"]


In [7]:
demo_profiles = {
    "Waterstones (England)": {
        "age_probs": np.array([0.04, 0.12, 0.24, 0.22, 0.18, 0.12, 0.08]),
        "gender_probs": np.array([0.46, 0.50, 0.04]),
        "avg_basket_multiplier": 1.10
    },
    "WHSmith (England)": {
        "age_probs": np.array([0.03, 0.10, 0.20, 0.22, 0.20, 0.15, 0.10]),
        "gender_probs": np.array([0.48, 0.48, 0.04]),
        "avg_basket_multiplier": 1.05
    }
}

def sample_demographics(retailer_name, n):
    prof = demo_profiles[retailer_name]
    ages = rng.choice(age_bands, size=n, p=prof["age_probs"])
    gens = rng.choice(genders, size=n, p=prof["gender_probs"])
    return ages, gens


In [8]:
weeks = np.arange(1, 53)
seasonality = np.ones(52)

seasonality[23:34] *= 1.10   # summer boost
seasonality[47:52] *= 1.35   # holiday boost
seasonality[0:3]   *= 0.90   # early-year dip

seasonality[:12]


array([0.9, 0.9, 0.9, 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. ])

In [9]:
def pick_format(format_mix, n):
    formats = list(format_mix.keys())
    probs = np.array([format_mix[f] for f in formats], dtype=float)
    probs = probs / probs.sum()
    return rng.choice(formats, size=n, p=probs)


In [10]:
rows = []

# Title probabilities (weighted by popularity)
weights = titles_df["popularity"].to_numpy()
weights = weights / weights.sum()

title_to_series = titles_df.set_index("title")["series"].to_dict()

for r in retailers:
    for wk in weeks:
        seasonal = seasonality[wk - 1]

        # demand model: stores * footfall * interest * conversion
        footfall = r["stores_england"] * r["avg_weekly_footfall_per_store"]
        expected_orders = footfall * r["fantasy_interest_rate"] * r["conversion_rate"] * seasonal

        # actual orders (random around expected)
        orders = int(rng.poisson(max(expected_orders, 1)))

        if orders == 0:
            continue

        chosen_titles = rng.choice(titles_df["title"].to_numpy(), size=orders, p=weights)
        chosen_series = [title_to_series[t] for t in chosen_titles]
        formats = pick_format(r["format_mix"], orders)

        ages, gens = sample_demographics(r["retailer"], orders)

        basket_base = demo_profiles[r["retailer"]]["avg_basket_multiplier"]
        basket_mult = rng.normal(loc=basket_base, scale=0.05, size=orders).clip(0.90, 1.35)

        for i in range(orders):
            fmt = formats[i]
            unit_price = effective_unit_price(fmt) * basket_mult[i]

            revenue = unit_price
            cost = revenue * (1 - r["gross_margin"])
            profit = revenue - cost

            rows.append({
                "week": wk,
                "retailer": r["retailer"],
                "title": chosen_titles[i],
                "series": chosen_series[i],
                "format": fmt,
                "customer_age_band": ages[i],
                "customer_gender": gens[i],
                "revenue_gbp": round(revenue, 2),
                "cost_gbp": round(cost, 2),
                "profit_gbp": round(profit, 2),
            })

sales = pd.DataFrame(rows)
sales.head(10)


Unnamed: 0,week,retailer,title,series,format,customer_age_band,customer_gender,revenue_gbp,cost_gbp,profit_gbp
0,1,Waterstones (England),Kull: The Shadow Kingdom,Kull,Mass Market,35-44,Male,9.27,5.28,3.99
1,1,Waterstones (England),Solomon Kane: Skulls in the Stars,Solomon Kane,Mass Market,18-24,Female,8.86,5.05,3.81
2,1,Waterstones (England),Conan: The Tower of the Elephant,Conan,Hardback,55-64,Female,18.88,10.76,8.12
3,1,Waterstones (England),Robert E. Howard: Horror & Dark Fantasy Collec...,Collection,Paperback,55-64,Male,10.75,6.13,4.62
4,1,Waterstones (England),Solomon Kane: Skulls in the Stars,Solomon Kane,Paperback,Under 18,Female,10.94,6.24,4.71
5,1,Waterstones (England),Solomon Kane: Skulls in the Stars,Solomon Kane,Paperback,25-34,Female,10.77,6.14,4.63
6,1,Waterstones (England),Conan: The Tower of the Elephant,Conan,Paperback,55-64,Female,10.76,6.13,4.63
7,1,Waterstones (England),Conan: Red Nails,Conan,Paperback,35-44,Female,10.28,5.86,4.42
8,1,Waterstones (England),Conan: Red Nails,Conan,Paperback,18-24,Female,10.98,6.26,4.72
9,1,Waterstones (England),Robert E. Howard: Horror & Dark Fantasy Collec...,Collection,Paperback,25-34,Male,12.01,6.85,5.17


In [11]:
weekly_summary = (
    sales.groupby(["retailer", "week"], as_index=False)
         .agg(units_sold=("title", "count"),
              revenue_gbp=("revenue_gbp", "sum"),
              profit_gbp=("profit_gbp", "sum"))
)

weekly_summary.head(10)


Unnamed: 0,retailer,week,units_sold,revenue_gbp,profit_gbp
0,WHSmith (England),1,466,4700.04,1691.99
1,WHSmith (England),2,473,4867.96,1752.5
2,WHSmith (England),3,482,4799.24,1727.76
3,WHSmith (England),4,538,5470.92,1969.51
4,WHSmith (England),5,505,5135.62,1848.81
5,WHSmith (England),6,500,5042.58,1815.37
6,WHSmith (England),7,553,5481.14,1973.18
7,WHSmith (England),8,538,5420.74,1951.53
8,WHSmith (England),9,511,5123.87,1844.49
9,WHSmith (England),10,525,5349.8,1925.97


In [12]:
yearly_summary = (
    sales.groupby("retailer", as_index=False)
         .agg(units_sold=("title", "count"),
              revenue_gbp=("revenue_gbp", "sum"),
              profit_gbp=("profit_gbp", "sum"))
         .sort_values("profit_gbp", ascending=False)
)

yearly_summary


Unnamed: 0,retailer,units_sold,revenue_gbp,profit_gbp
1,Waterstones (England),56027,631984.83,271752.32
0,WHSmith (England),28292,285324.36,102717.37


In [13]:
age_demo = (
    sales.groupby(["retailer", "customer_age_band"], as_index=False)
         .agg(units=("title", "count"),
              revenue_gbp=("revenue_gbp", "sum"),
              profit_gbp=("profit_gbp", "sum"))
)

gender_demo = (
    sales.groupby(["retailer", "customer_gender"], as_index=False)
         .agg(units=("title", "count"),
              revenue_gbp=("revenue_gbp", "sum"),
              profit_gbp=("profit_gbp", "sum"))
)

age_demo, gender_demo


(                 retailer customer_age_band  units  revenue_gbp  profit_gbp
 0       WHSmith (England)             18-24   2878     28900.77    10404.45
 1       WHSmith (England)             25-34   5607     56624.06    20384.82
 2       WHSmith (England)             35-44   6323     63884.79    22998.35
 3       WHSmith (England)             45-54   5585     56317.75    20274.78
 4       WHSmith (England)             55-64   4258     43001.34    15480.38
 5       WHSmith (England)               65+   2783     27775.82     9999.33
 6       WHSmith (England)          Under 18    858      8819.83     3175.26
 7   Waterstones (England)             18-24   6738     76003.73    32681.42
 8   Waterstones (England)             25-34  13354    150802.27    64844.66
 9   Waterstones (England)             35-44  12397    139717.11    60077.82
 10  Waterstones (England)             45-54  10201    115289.00    49574.24
 11  Waterstones (England)             55-64   6695     75351.38    32401.17