In [None]:
project_path = "/home/jupyter"
import os
import sys

sys.path.append(project_path)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import re
from google.cloud import bigquery
import plotly.graph_objects as go


from fintrans_toolbox.src import bq_utils as bq
from fintrans_toolbox.src import table_utils as t

client = bigquery.Client()

In [None]:
# Summarise the data by mcg
UK_mcc_country = '''SELECT time_period_value, mcc, spend, destination_country
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter'
and time_period_value IN ("2024Q1","2024Q2","2024Q3","2024Q4")
and merchant_channel = 'Online'
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM'
and destination_country != 'UNITED KINGDOM'
and mcc = 'All'
GROUP BY mcc, time_period_value, spend, destination_country
ORDER BY time_period_value, spend DESC'''

mcc_country_df = bq.read_bq_table_sql(client, UK_mcc_country)

mcc_country_df['year'] = mcc_country_df['time_period_value'].str[:4]

mcc_country_df.head()

In [None]:
# Map destination countries to continents
continent_map = {
    "UNITED STATES OF AMERICA": "North America",
    "CANADA": "North America",
    "REST OF  CANADA": "North America",
    "GERMANY": "Europe",
    "REPUBLIC OF IRELAND": "Europe",
    "REST OF  EUROPE": "Europe",
    "SPAIN": "Europe",
    "FRANCE": "Europe",
    "NETHERLANDS": "Europe",
    "ITALY": "Europe",
    "SWITZERLAND": "Europe",
    "POLAND": "Europe",
    "AUSTRIA": "Europe",
    "PORTUGAL": "Europe",
    "REST OF  C.E.M.E.A.": "Asia-Pacific",
    "UNITED ARAB EMIRATES": "Asia-Pacific",
    "INDIA": "Asia-Pacific",
    "AUSTRALIA": "Asia-Pacific",
    "REST OF  ASIAPAC": "Asia-Pacific",
    "REST OF  LAT.AM.": "South America",
    "SOUTH AFRICA": "Africa",
    "REST OF WORLD": "Other"
}

# Apply continent mapping
mcc_country_df["continent"] = mcc_country_df["destination_country"].map(continent_map)
mcc_country_df = mcc_country_df.dropna(subset=["continent"])

# Group by MCC and continent, then calculate spend share
grouped = mcc_country_df.groupby(["mcc", "continent"])["spend"].sum().reset_index()
total_spend = grouped.groupby("mcc")["spend"].sum().reset_index().rename(columns={"spend": "total_spend"})
grouped = grouped.merge(total_spend, on="mcc")
grouped["share"] = (grouped["spend"] / grouped["total_spend"]) * 100

# Pivot the data for plotting
pivot_df = grouped.pivot(index="mcc", columns="continent", values="share").fillna(0)

# Plotting
fig, ax = plt.subplots(figsize=(16, 10))
pivot_df.plot(kind="bar", stacked=True, ax=ax)

# Customize plot
ax.set_title("Share of Spend by Continent within Each MCC Sector (2024)", fontsize=16)
ax.set_ylabel("Share of Spend (%)", fontsize=14)
ax.set_xlabel("MCC Sector", fontsize=14)
ax.legend(loc="lower center", bbox_to_anchor=(0.5, -0.35), ncol=4)
plt.xticks(rotation=0, ha='center')  # Horizontal x-axis labels
# Save the plot
plt.tight_layout()
plt.savefig("share_of_spend_by_continent.png", dpi=300, bbox_inches="tight")

In [None]:
print(pivot_df)