In [None]:
#bubble chart, the first part of section 7

In [None]:
project_path = "/home/jupyter"
import os
import sys

sys.path.append(project_path)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
from google.cloud import bigquery

from fintrans_toolbox.src import bq_utils as bq
from fintrans_toolbox.src import table_utils as t


client = bigquery.Client()

In [None]:
# Summarise the data by mcg
UK_spending_by_country = '''SELECT time_period_value, destination_country, spend, merchant_channel
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter'
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM'
and destination_country != 'UNITED KINGDOM'
and mcg = 'All'
GROUP BY destination_country, time_period_value, spend, merchant_channel
ORDER BY time_period_value, spend DESC'''

country_year_quarter_spend_merchant_channel = bq.read_bq_table_sql(client, UK_spending_by_country)

country_year_quarter_spend_merchant_channel['year'] = country_year_quarter_spend_merchant_channel['time_period_value'].str[:4]

country_year_quarter_spend_merchant_channel.head()

In [None]:
completeness_matrix_country = (
country_year_quarter_spend_merchant_channel.pivot_table(index='destination_country', columns='time_period_value', values='spend', aggfunc='size')
    .fillna(False)
    .astype(bool)
)

incomplete_rows_country = completeness_matrix_country[~completeness_matrix_country.all(axis=1)]
incomplete_rows_country

In [None]:
country_year_quarter_spend_merchant_channel.to_csv('yearly_country_df.csv')
country_year_quarter_spend_merchant_channel_series = pd.read_csv('yearly_country_df.csv')

# Filter for Online and All channels
country_year_quarter_spend_online_series = country_year_quarter_spend_merchant_channel_series[country_year_quarter_spend_merchant_channel_series["merchant_channel"] == "Online"]
country_year_quarter_spend_All_series = country_year_quarter_spend_merchant_channel_series[country_year_quarter_spend_merchant_channel_series["merchant_channel"] == "All"]

# Aggregate spend by destination_country and year
country_year_spend_online_series = country_year_quarter_spend_online_series.groupby(["destination_country", "year"])["spend"].sum().unstack()
country_year_spend_All_series = country_year_quarter_spend_All_series.groupby(["destination_country", "year"])["spend"].sum().unstack()

# Calculate percentage of online spend out of total spend for each year
online_share = (country_year_spend_online_series / country_year_spend_All_series) * 100

# Compute the change in share from 2019 to 2024
online_share["diff"] = online_share[2024] - online_share[2019]

# Calculate percentage of total online spend abroad in 2024
country_2024_spend_online_series = country_year_spend_online_series[2024].sum()
online_share["% of total online spend abroad (2024)"] = (country_year_spend_online_series[2024] / country_2024_spend_online_series) * 100

# Prepare the final DataFrame
table_data = online_share.reset_index().rename(columns={
    2019: "2019_online_share",
    2024: "2024_online_share"
})


In [None]:
plt.figure(figsize=(12, 8))
plt.scatter(
    table_data["2019_online_share"],
    table_data["2024_online_share"],
    s=table_data["% of total online spend abroad (2024)"] * 100,
    alpha=0.6,
    edgecolors='w'
)

# Add labels for each point
for _, row in table_data.iterrows():
    plt.text(row["2019_online_share"], row["2024_online_share"], row["destination_country"],
             fontsize=8, ha='center', va='center')

# Add a 45-degree reference line
lims = [
    min(table_data["2019_online_share"].min(), table_data["2024_online_share"].min()),
    max(table_data["2019_online_share"].max(), table_data["2024_online_share"].max())
]
plt.plot(lims, lims, 'k--', alpha=0.75)

# Set axis labels and title
plt.xlabel("2019 Online Spend Share (%)")
plt.ylabel("2024 Online Spend Share (%)")
plt.title("Online Spend Share by Destination Country\n(2019 vs 2024, Bubble Size = % of Total Online Spend Abroad in 2024)")

# Add gridlines only at 50% on both axes
plt.axhline(50, color='gray', linestyle=':', linewidth=1)
plt.axvline(50, color='gray', linestyle=':', linewidth=1)
plt.grid(False)

plt.tight_layout()
plt.show()

In [None]:
# Data for Figure 6
#table_data.to_csv("Figure 6: 2019 vs 2024 Share of spend that was online, by country.csv")

In [None]:
#MCC chart, the second part of section 7

In [None]:
# table for the 6 highest mccs internation and domestic
mcc_quarter_year_spend = '''SELECT time_period_value, mcc, spend
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter'
and time_period_value != '2025Q1'  
and merchant_channel = 'Online'
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM'
and mcg != 'All'
and mcg != 'BUSINESS TO BUSINESS'
and mcc != 'All'
GROUP BY mcc, time_period_value, spend
ORDER BY time_period_value, spend DESC'''

mcc_quarter_year_spend = bq.read_bq_table_sql(client, mcc_quarter_year_spend)

mcc_quarter_year_spend['year'] = mcc_quarter_year_spend['time_period_value'].str[:4]

mcc_quarter_year_spend.head()

In [None]:
completeness_matrix_mcc = (
mcc_quarter_year_spend.pivot_table(index='mcc', columns='time_period_value', values='spend', aggfunc='size')
    .fillna(False)
    .astype(bool)
)

incomplete_rows_mcc = completeness_matrix_mcc[~completeness_matrix_mcc.all(axis=1)]
incomplete_rows_mcc

In [None]:
mcc_quarter_year_spend.to_csv('mcc_quarter_year_spend.csv')
mcc_quarter_year_spend_series = pd.read_csv('mcc_quarter_year_spend.csv')

mcc_quarter_year_spend_series_2024 = mcc_quarter_year_spend_series[mcc_quarter_year_spend_series['year'] == 2024][['mcc', 'spend']]
mcc_quarter_year_spend_series_2024.to_csv('mcc_quarter_year_spend_series_2024.csv', index=False)
mcc_2024_spend = mcc_quarter_year_spend_series_2024.groupby('mcc')['spend'].sum()
mcc_2024_perc_spend = (mcc_2024_spend / mcc_2024_spend.sum()) * 100
mcc_2024_perc_spend.columns = ['mcc', 'percentage']
mcc_2024_perc_spend.to_csv('mcc_2024_perc_spend.csv')

mcc_2024_perc_spend_series = pd.read_csv('mcc_2024_perc_spend.csv')
mcc_2024_perc_spend_series.columns = ['mcc', 'percentage']
top_6_mccs = mcc_2024_perc_spend_series.sort_values(by='percentage', ascending=False).head(6)
top_6_mccs

In [None]:
top_6_sum = top_6_mccs.sum()
top_6_sum

In [None]:
# table for the 6 highest mccs internation and domestic
mcc_quarter_year_spend_int = '''SELECT time_period_value, mcc, spend
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter'
and time_period_value != '2025Q1'  
and merchant_channel = 'Online'
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM'
and mcg != 'All'
and mcg != 'BUSINESS TO BUSINESS'
and mcc != 'All'
and destination_country != 'UNITED KINGDOM'
GROUP BY mcc, time_period_value, spend
ORDER BY time_period_value, spend DESC'''

mcc_quarter_year_spend_int = bq.read_bq_table_sql(client, mcc_quarter_year_spend_int)

mcc_quarter_year_spend_int['year'] = mcc_quarter_year_spend_int['time_period_value'].str[:4]

mcc_quarter_year_spend_int.head()

In [None]:
mcc_quarter_year_spend_int.to_csv('mcc_quarter_year_spend_int.csv')
mcc_quarter_year_spend_series_int = pd.read_csv('mcc_quarter_year_spend_int.csv')

mcc_quarter_year_spend_series_2024_int = mcc_quarter_year_spend_series_int[mcc_quarter_year_spend_series_int['year'] == 2024][['mcc', 'spend']]
mcc_2024_spend_int = mcc_quarter_year_spend_series_2024_int.groupby('mcc')['spend'].sum()
mcc_2024_perc_spend_int= (mcc_2024_spend_int / mcc_2024_spend_int.sum()) * 100
mcc_2024_perc_spend_int.columns = ['mcc', 'percentage']
mcc_2024_perc_spend_int.to_csv('mcc_2024_perc_spend_int.csv')

mcc_2024_perc_spend_int_series = pd.read_csv('mcc_2024_perc_spend_int.csv')
mcc_2024_perc_spend_int_series.columns = ['mcc', 'percentage']
top_6_mccs_int = mcc_2024_perc_spend_int_series.sort_values(by='percentage', ascending=False).head(6)
top_6_mccs_int

In [None]:
top_6_sum_int = top_6_mccs_int.sum()
top_6_sum_int

In [None]:
#the following code create the segmented bar chart by continent

In [None]:
# Summarise the data by mcg
MCC_top6_quarter_2024_country_spend = '''SELECT time_period_value, mcc, spend, destination_country
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter'
and time_period_value IN ("2024Q1","2024Q2","2024Q3","2024Q4")
and merchant_channel = 'Online'
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM'
and destination_country != 'UNITED KINGDOM'
and mcc IN ("LARGE DIGITAL GOODS MERCHANT","AIRLINES","LODGING","COMPUTER SOFTWARE STORES","TRAVEL AGENCIES","DIGITAL GOODS GAMES")
GROUP BY mcc, time_period_value, spend, destination_country
ORDER BY time_period_value, spend DESC'''

MCC_top6_quarter_2024_country_spend = bq.read_bq_table_sql(client, MCC_top6_quarter_2024_country_spend)

MCC_top6_quarter_2024_country_spend['year'] = MCC_top6_quarter_2024_country_spend['time_period_value'].str[:4]

MCC_top6_quarter_2024_country_spend.to_csv('MCC_top6_quarter_year_country_spend.csv')

MCC_top6_quarter_2024_country_spend

In [None]:
completeness_matrix_int_mcc = (
MCC_top6_quarter_2024_country_spend.pivot_table(index='mcc', columns='time_period_value', values='spend', aggfunc='size')
    .fillna(False)
    .astype(bool)
)

incomplete_rows_mcc_int = completeness_matrix_int_mcc[~completeness_matrix_mcc.all(axis=1)]
incomplete_rows_mcc_int

In [None]:
# Define the continent mapping
continent_map = {
    "UNITED STATES OF AMERICA": "North America",
    "REST OF  CANADA": "North America",
    "GERMANY": "Europe",
    "REPUBLIC OF IRELAND": "Europe",
    "REST OF  EUROPE": "Europe",
    "SPAIN": "Europe",
    "FRANCE": "Europe",
    "NETHERLANDS": "Europe",
    "ITALY": "Europe",
    "SWITZERLAND": "Europe",
    "POLAND": "Europe",
    "AUSTRIA": "Europe",
    "PORTUGAL": "Europe",
    "REST OF  C.E.M.E.A.": "C.E.M.E.A",
    "UNITED ARAB EMIRATES": "Asia-Pacific",
    "INDIA": "Asia-Pacific",
    "AUSTRALIA": "Asia-Pacific",
    "REST OF  ASIAPAC": "Asia-Pacific",
    "REST OF  LAT.AM.": "South America",
    "SOUTH AFRICA": "Africa"
}

# Map destination countries to continents
MCC_top6_quarter_2024_country_spend['continent'] = MCC_top6_quarter_2024_country_spend['destination_country'].map(continent_map).fillna('Other')
MCC_top6_quarter_2024_country_spend.to_csv('MCC_top6_quarter_year_country_spend.csv')

# Group by MCC and continent, summing the spend
MCC_top6_2024_continent_spend = MCC_top6_quarter_2024_country_spend.groupby(['mcc', 'continent'])['spend'].sum().reset_index()

# Normalize spend to get percentage share within each MCC
MCC_top6_2024_continent_spend['total_spend_per_mcc'] = MCC_top6_2024_continent_spend.groupby('mcc')['spend'].transform('sum')
MCC_top6_2024_continent_spend['spend_share'] = MCC_top6_2024_continent_spend['spend'] / MCC_top6_2024_continent_spend['total_spend_per_mcc'] * 100

# Pivot the data for plotting
chart_data = MCC_top6_2024_continent_spend.pivot(index='mcc', columns='continent', values='spend_share').fillna(0)

# Define custom colors for continents
continent_colors = {
    "North America": "#1f77b4",
    "Europe": "#ff7f0e",
    "Asia-Pacific": "#2ca02c",
    "C.E.M.E.A": "#d62728",
    "South America": "#9467bd",
    "Africa": "#8c564b",
    "Other": "#cccccc"
}

# Reorder columns to match color keys
chart_data = chart_data[sorted(chart_data.columns, key=lambda x: list(continent_colors.keys()).index(x) if x in continent_colors else 999)]

# Plot with custom colors
fig, ax = plt.subplots(figsize=(14, 12))
chart_data.plot(kind='bar', stacked=True, ax=ax, width=0.8, color=[continent_colors.get(col, '#cccccc') for col in chart_data.columns])

# Customize the plot
ax.set_ylabel('Share of Spend (%)')
ax.set_xlabel('Sector (MCC)')
ax.set_title('Share of Spend by Continent within Each MCC Sector (2024)')
plt.xticks(rotation=45, ha='right')

# Move the legend to the bottom and make it flatter and wider
ax.legend(
    loc='upper center',
    bbox_to_anchor=(0.5, -0.25),
    ncol=4,
    title='Continent',
    frameon=False
)

plt.tight_layout()
plt.savefig("stacked_bar_chart_by_continent.png")

In [None]:
continent_share = MCC_top6_2024_continent_spend.groupby("continent").agg({
    "spend": "sum",
    "total_spend_per_mcc": "sum"  # or 'max', 'min', etc., depending on your logic
}).reset_index()
continent_share["share"] = (continent_share["spend"] / continent_share["total_spend_per_mcc"]) * 100
continent_share

In [None]:
# Data for the MCC by continent chart
#chart_data.to_csv("Figure 7: Share of online spend abroad by continent in top 6 MCCs.csv")

In [None]:
country_spend_2024 = MCC_top6_quarter_2024_country_spend.groupby(['destination_country'])['spend'].sum()
country_share_2024 = (country_spend_2024 / country_spend_2024.sum()) * 100
country_share_2024.reset_index()
country_share_2024_vis = country_share_2024.to_frame()
country_share_2024_vis

In [None]:
# US Percentage of spending online in North America
USA_contribution = country_share_2024_vis.loc["UNITED STATES OF AMERICA", "spend"]
CA_contribution = country_share_2024_vis.loc["REST OF  CANADA", "spend"]
usa_percent = USA_contribution / (CA_contribution + USA_contribution) 
usa_percent

In [None]:
# Group by MCC and destination country, summing the spend
MCC_top6_2024_country_spend = MCC_top6_quarter_2024_country_spend.groupby(['mcc', 'destination_country'])['spend'].sum().reset_index()

# Normalize spend to get percentage share within each MCC
MCC_top6_2024_country_spend['total_spend_per_mcc'] = MCC_top6_2024_country_spend.groupby('mcc')['spend'].transform('sum')
MCC_top6_2024_country_spend['spend_share'] = MCC_top6_2024_country_spend['spend'] / MCC_top6_2024_country_spend['total_spend_per_mcc'] * 100

# Re-aggregate after replacing low-share countries
MCC_top6_2024_country_share = MCC_top6_2024_country_spend.groupby(['mcc', 'destination_country'])['spend'].sum().reset_index()

# Recalculate spend share after aggregation
MCC_top6_2024_country_share['total_spend_per_mcc'] = MCC_top6_2024_country_share.groupby('mcc')['spend'].transform('sum')
MCC_top6_2024_country_share['spend_share'] = MCC_top6_2024_country_share['spend'] / MCC_top6_2024_country_share['total_spend_per_mcc'] * 100

MCC_top6_2024_country_share_table = MCC_top6_2024_country_share.pivot(index='mcc', columns='destination_country', values='spend_share').fillna(0)

MCC_top6_2024_country_share_table