
# 01 - Synthetic FX pricing dataset (realistic simulation)

This notebook generates a synthetic FX dataset with realistic economics:

- Daily data per country / branch / channel / currency / direction.
- Margins by segment around a typical level (e.g. 9–14%).
- Turnover driven by:
  - base demand per segment,
  - day-of-week effect,
  - controlled random noise,
  - price elasticity: turnover decreases when margin increases.
- GP in EUR as margin_rate * turnover_eur.
- ATV and transactions consistent with turnover.

The output is a CSV file used by the rest of the project.


In [48]:
# ============================================
# Imports and global configuration
# ============================================

import os
import numpy as np
import pandas as pd

from datetime import datetime

# Reproducibility
RANDOM_SEED = 42
rng = np.random.default_rng(RANDOM_SEED)

# Output path
OUTPUT_DIR = "../data"
os.makedirs(OUTPUT_DIR, exist_ok=True)
OUTPUT_FILE = os.path.join(OUTPUT_DIR, "synthetic_pricing_daily.csv")

# Date range for the simulation (one full year)
DATE_START = "2025-01-01"
DATE_END   = "2025-12-31"
dates = pd.date_range(DATE_START, DATE_END, freq="D")

print("Dates:", dates.min().date(), "to", dates.max().date())


Dates: 2025-01-01 to 2025-12-31


In [49]:
# ============================================
# Business structure (countries, branches, channels, currencies)
# ============================================

countries = ["UK", "ES", "SG", "AU"]

# Simple branch structure: each country has 3 branches per channel
BRANCHES_PER_COUNTRY = 3

channels = ["Airport", "High Street"]
directions = ["BUY", "SELL"]
currencies = ["USD", "EUR", "GBP", "JPY", "AUD"]

# FX rates to EUR (approximate, just for scaling)
FX_TO_EUR = {
    "EUR": 1.00,
    "USD": 0.93,
    "GBP": 1.15,
    "JPY": 0.0062,
    "AUD": 0.60
}

In [50]:
# ============================================
# Margin, elasticity and base demand configuration
# ============================================

# Typical margin level per (channel, direction)
BASE_MARGIN = {
    ("Airport", "BUY"):  0.11,
    ("Airport", "SELL"): 0.09,
    ("High Street", "BUY"):  0.10,
    ("High Street", "SELL"): 0.08,
}

# Currency adjustment to the typical margin
CURRENCY_MARGIN_ADJ = {
    "EUR": 0.00,
    "USD": 0.002,
    "GBP": 0.003,
    "JPY": -0.001,
    "AUD": -0.002,
}

# Margin noise (standard deviation)
MARGIN_SIGMA = 0.007  # ~0.7 pp

# Global safety bounds for margin
MARGIN_MIN = 0.06
MARGIN_MAX = 0.18

# Elasticity exponent per channel/direction:
# turnover(m) = base_turnover * (m / base_margin) ** (-elasticity)
ELASTICITY = {
    ("Airport", "BUY"):  1.4,
    ("Airport", "SELL"): 1.3,
    ("High Street", "BUY"):  1.4,
    ("High Street", "SELL"): 1.3,
}

# Base daily turnover level in EUR per (country, channel)
# These are "typical" daily volumes, before margin adjustment and day-of-week effects.
BASE_DAILY_TURNOVER = {
    ("UK", "Airport"):       12000,
    ("UK", "High Street"):    9000,
    ("ES", "Airport"):        9000,
    ("ES", "High Street"):    7000,
    ("SG", "Airport"):       11000,
    ("SG", "High Street"):    8000,
    ("AU", "Airport"):       10000,
    ("AU", "High Street"):    7500,
}

# Currency share within each country (just relative weights)
CURRENCY_SHARE = {
    "UK": {"USD": 0.35, "EUR": 0.25, "GBP": 0.25, "JPY": 0.10, "AUD": 0.05},
    "ES": {"USD": 0.25, "EUR": 0.40, "GBP": 0.20, "JPY": 0.10, "AUD": 0.05},
    "SG": {"USD": 0.40, "EUR": 0.20, "GBP": 0.15, "JPY": 0.20, "AUD": 0.05},
    "AU": {"USD": 0.30, "EUR": 0.20, "GBP": 0.15, "JPY": 0.10, "AUD": 0.25},
}

# Average ATV in EUR per currency
AVG_ATV_EUR = {
    "USD": 550,
    "EUR": 500,
    "GBP": 520,
    "JPY": 430,
    "AUD": 480
}

ATV_SIGMA_FRAC = 0.10  # 10% noise on ATV

# Day-of-week effect for base demand
# Monday=0, Sunday=6 (pandas weekday convention)
DOW_MULTIPLIER = {
    0: 0.90,  # Monday
    1: 0.95,
    2: 1.00,
    3: 1.05,
    4: 1.10,
    5: 1.20,  # Saturday
    6: 1.15,  # Sunday
}

# Turnover noise (lognormal style)
TURNOVER_LOG_SIGMA = 0.15 # ~30% variability on top of DOW and base

In [51]:
# ============================================
# Helper function: simulate one day for a given segment
# ============================================

def simulate_day_for_segment(date, country, branch, channel, currency, direction, rng):
    """
    Simulate one daily record for a given (country, branch, channel, currency, direction).
    Returns a dict with all relevant KPIs in EUR.
    """
    # --- Base margin for this segment ---
    base_margin = BASE_MARGIN[(channel, direction)] + CURRENCY_MARGIN_ADJ[currency]
    margin = rng.normal(loc=base_margin, scale=MARGIN_SIGMA)
    margin = float(np.clip(margin, MARGIN_MIN, MARGIN_MAX))

    # --- Base turnover level for this segment ---
    base_turn = BASE_DAILY_TURNOVER[(country, channel)]
    cur_share = CURRENCY_SHARE[country][currency]
    base_turn_segment = base_turn * cur_share

    # --- Day-of-week effect + lognormal noise ---
    dow = date.weekday()
    dow_factor = DOW_MULTIPLIER[dow]

    # lognormal-like noise: exp(N(0, sigma^2))
    noise = float(np.exp(rng.normal(loc=0.0, scale=TURNOVER_LOG_SIGMA)))

    base_turnover_today = base_turn_segment * dow_factor * noise

    # --- Elasticity effect of margin ---
    elasticity = ELASTICITY[(channel, direction)]
    volume_factor = (margin / base_margin) ** (-elasticity)

    turnover_eur = base_turnover_today * volume_factor

    # --- ATV and transactions ---
    avg_atv_currency = AVG_ATV_EUR[currency]
    atv_today = avg_atv_currency * np.exp(rng.normal(loc=0.0, scale=0.08))  # small log noise
    atv_today = max(atv_today, avg_atv_currency * 0.6)  # lower bound to avoid extreme values

    transactions = turnover_eur / atv_today
    transactions = max(transactions, 1.0)

    # --- Pax and hit_rate (just for completeness) ---
    # Assume pax is slightly higher than transactions
    pax = transactions * rng.uniform(1.1, 1.6)
    pax = max(pax, transactions)

    hit_rate = transactions / pax

    # --- GP in EUR ---
    gp_eur = margin * turnover_eur
    gp_pct = gp_eur / turnover_eur if turnover_eur > 0 else np.nan

    record = {
        "date": date,
        "country": country,
        "branch": branch,
        "channel": channel,
        "currency": currency,
        "direction": direction,
        "pax": float(pax),
        "transactions": float(transactions),
        "hit_rate": float(hit_rate),
        "ATV": float(atv_today),          # legacy naming
        "ATV_eur": float(atv_today),
        "turnover": float(turnover_eur),  # legacy naming
        "turnover_eur": float(turnover_eur),
        "margin_rate": float(margin),
        "gp": float(gp_eur),              # legacy naming
        "gp_eur": float(gp_eur),
        "gp_pct": float(gp_pct),
    }
    return record

In [52]:
# ============================================
# Main simulation loop
# ============================================

records = []

for country in countries:
    for channel in channels:
        for b in range(1, BRANCHES_PER_COUNTRY + 1):
            branch_name = f"{country}_BR_{b}"
            for currency in currencies:
                for direction in directions:
                    for date in dates:
                        rec = simulate_day_for_segment(
                            date=date,
                            country=country,
                            branch=branch_name,
                            channel=channel,
                            currency=currency,
                            direction=direction,
                            rng=rng
                        )
                        records.append(rec)

df = pd.DataFrame(records)
df.head()

Unnamed: 0,date,country,branch,channel,currency,direction,pax,transactions,hit_rate,ATV,ATV_eur,turnover,turnover_eur,margin_rate,gp,gp_eur,gp_pct
0,2025-01-01,UK,UK_BR_1,Airport,USD,BUY,8.680953,5.992303,0.690282,584.031181,584.031181,3499.691705,3499.691705,0.114133,399.430382,399.430382,0.114133
1,2025-01-02,UK,UK_BR_1,Airport,USD,BUY,11.693377,7.831966,0.669778,555.65384,555.65384,4351.861892,4351.861892,0.098343,427.974082,427.974082,0.098343
2,2025-01-03,UK,UK_BR_1,Airport,USD,BUY,10.785967,6.899122,0.639639,590.087078,590.087078,4071.082724,4071.082724,0.111882,455.482473,455.482473,0.111882
3,2025-01-04,UK,UK_BR_1,Airport,USD,BUY,12.613527,10.393314,0.823982,570.959926,570.959926,5934.165825,5934.165825,0.112462,667.369432,667.369432,0.112462
4,2025-01-05,UK,UK_BR_1,Airport,USD,BUY,9.722016,6.866644,0.706298,590.042343,590.042343,4051.611003,4051.611003,0.114581,464.238676,464.238676,0.114581


In [53]:
# ============================================
# Quick sanity checks and export
# ============================================

print("Shape:", df.shape)
print("\nSample by currency (total turnover and GP in EUR):")
print(
    df.groupby("currency")[["turnover_eur", "gp_eur"]]
      .sum()
      .assign(gp_pct=lambda x: x["gp_eur"] / x["turnover_eur"])
)

print("\nSample by country / channel (avg margin):")
print(
    df.groupby(["country", "channel"])["margin_rate"]
      .mean()
      .round(4)
)

# Ensure date is in ISO format for the CSV
df["date"] = df["date"].dt.strftime("%Y-%m-%d")

df.to_csv(OUTPUT_FILE, index=False)
print(f"\n✅ Synthetic dataset exported to: {OUTPUT_FILE}")

Shape: (87600, 17)

Sample by currency (total turnover and GP in EUR):
          turnover_eur        gp_eur    gp_pct
currency                                      
AUD       1.683244e+07  1.565533e+06  0.093007
EUR       4.450670e+07  4.225271e+06  0.094936
GBP       3.264290e+07  3.199004e+06  0.098000
JPY       2.174317e+07  2.042091e+06  0.093919
USD       5.682958e+07  5.510736e+06  0.096970

Sample by country / channel (avg margin):
country  channel    
AU       Airport        0.1004
         High Street    0.0904
ES       Airport        0.1003
         High Street    0.0905
SG       Airport        0.1004
         High Street    0.0904
UK       Airport        0.1004
         High Street    0.0903
Name: margin_rate, dtype: float64

✅ Synthetic dataset exported to: ../data\synthetic_pricing_daily.csv
