In [1]:
# Hugo Boss Dataset in Jupyter Notebook
# =====================================

# Step 1 — Import Libraries
import pandas as pd
import numpy as np

# Step 2 — Create Base Financial Dataset
financial_data = {
    "Year": [2022, 2023, 2024],
    "Revenue_EUR_B": [4.2, 4.54, 4.3],            # in billion euros
    "Gross_Margin_%": [61.5, 61.5, 61.8],        # reported around 61–62%
    "Operating_Margin_%": [9.2, 9.8, 8.4],       # from annual reports
    "Net_Profit_Margin_%": [5.31, 5.30, 5.97],   # approximate
}

df_financial = pd.DataFrame(financial_data)
df_financial


Unnamed: 0,Year,Revenue_EUR_B,Gross_Margin_%,Operating_Margin_%,Net_Profit_Margin_%
0,2022,4.2,61.5,9.2,5.31
1,2023,4.54,61.5,9.8,5.3
2,2024,4.3,61.8,8.4,5.97


In [2]:
# Step 3 — Create Regional Sales Breakdown
region_sales = {
    "Region": ["EMEA", "Americas", "Asia/Pacific", "Other"],
    "Sales_Share_%": [61, 24, 13, 2]   # EMEA ~61%, Americas ~24%, APAC ~13%
}

df_region = pd.DataFrame(region_sales)
df_region

Unnamed: 0,Region,Sales_Share_%
0,EMEA,61
1,Americas,24
2,Asia/Pacific,13
3,Other,2


In [3]:
# Step 4 — Simulate Customer Demographics
# (NOTE: Hugo Boss does not publish exact demographics, so this is synthetic)

np.random.seed(42)
num_customers = 1000

demo_data = {
    "Customer_ID": range(1, num_customers+1),
    "Age": np.random.choice(range(18, 70), size=num_customers),
    "Gender": np.random.choice(["Male", "Female", "Non-Binary"], size=num_customers, p=[0.55, 0.4, 0.05]),
    "Income_Bracket": np.random.choice(
        ["<50k EUR", "50–100k EUR", "100–200k EUR", ">200k EUR"],
        size=num_customers,
        p=[0.1, 0.3, 0.4, 0.2]
    ),
    "Region": np.random.choice(["EMEA", "Americas", "Asia/Pacific", "Other"], size=num_customers),
}

df_demographics = pd.DataFrame(demo_data)
df_demographics.head()

Unnamed: 0,Customer_ID,Age,Gender,Income_Bracket,Region
0,1,56,Male,50–100k EUR,EMEA
1,2,69,Male,50–100k EUR,Other
2,3,46,Male,>200k EUR,Other
3,4,32,Male,>200k EUR,Asia/Pacific
4,5,60,Male,100–200k EUR,EMEA


In [5]:
import numpy as np

# Set seed for reproducibility (optional but recommended)
np.random.seed(42)

# Create empty column first
df_demographics["Estimated_Annual_Spend_EUR"] = 0.0

# Create masks
mask1 = df_demographics["Income_Bracket"] == "<50k EUR"
mask2 = df_demographics["Income_Bracket"] == "50–100k EUR"
mask3 = df_demographics["Income_Bracket"] == "100–200k EUR"
mask4 = df_demographics["Income_Bracket"] == ">200k EUR"

# Assign spend based on income bracket
df_demographics.loc[mask1, "Estimated_Annual_Spend_EUR"] = \
    np.random.normal(500, 150, mask1.sum())

df_demographics.loc[mask2, "Estimated_Annual_Spend_EUR"] = \
    np.random.normal(1200, 200, mask2.sum())

df_demographics.loc[mask3, "Estimated_Annual_Spend_EUR"] = \
    np.random.normal(2000, 300, mask3.sum())

df_demographics.loc[mask4, "Estimated_Annual_Spend_EUR"] = \
    np.random.normal(3500, 500, mask4.sum())

# Clip negative values properly
df_demographics["Estimated_Annual_Spend_EUR"] = \
    df_demographics["Estimated_Annual_Spend_EUR"].clip(lower=0)

df_demographics.head()

Unnamed: 0,Customer_ID,Age,Gender,Income_Bracket,Region,Estimated_Annual_Spend_EUR
0,1,56,Male,50–100k EUR,EMEA,1251.510078
1,2,69,Male,50–100k EUR,Other,1185.110817
2,3,46,Male,>200k EUR,Other,3548.060388
3,4,32,Male,>200k EUR,Asia/Pacific,3268.862356
4,5,60,Male,100–200k EUR,EMEA,1603.930038


In [6]:
# Step 6 — Aggregate Demographic Spend by Region & Income
agg_spend = df_demographics.groupby(["Region", "Income_Bracket"])["Estimated_Annual_Spend_EUR"].mean().reset_index()
agg_spend

Unnamed: 0,Region,Income_Bracket,Estimated_Annual_Spend_EUR
0,Americas,100–200k EUR,1965.940492
1,Americas,50–100k EUR,1208.613354
2,Americas,<50k EUR,442.903876
3,Americas,>200k EUR,3669.637948
4,Asia/Pacific,100–200k EUR,1993.920773
5,Asia/Pacific,50–100k EUR,1239.665022
6,Asia/Pacific,<50k EUR,479.275406
7,Asia/Pacific,>200k EUR,3554.905706
8,EMEA,100–200k EUR,2003.185607
9,EMEA,50–100k EUR,1196.50745


In [7]:
# Step 7 — Save Datasets
df_financial.to_csv("hugoboss_financial_summary.csv", index=False)
df_region.to_csv("hugoboss_region_sales.csv", index=False)
df_demographics.to_csv("hugoboss_customer_demographics.csv", index=False)
agg_spend.to_csv("hugoboss_spend_by_demo.csv", index=False)

print("Datasets saved successfully!")

Datasets saved successfully!
