In [None]:
project_path = "/home/jupyter"
import os
import sys

sys.path.append(project_path)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import re
from google.cloud import bigquery
import plotly.graph_objects as go


from fintrans_toolbox.src import bq_utils as bq
from fintrans_toolbox.src import table_utils as t

client = bigquery.Client()

In [None]:
# Summarise the data by mcg
UK_mcc_country = '''SELECT time_period_value, mcc, spend, destination_country
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter'
and time_period_value IN ("2024Q1","2024Q2","2024Q3","2024Q4")
and merchant_channel = 'Online'
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM'
and destination_country != 'UNITED KINGDOM'
and mcc IN ("LARGE DIGITAL GOODS MERCHANT","AIRLINES","LODGING","COMPUTER SOFTWARE STORES","TRAVEL AGENCIES","DIGITAL GOODS GAMES")
GROUP BY mcc, time_period_value, spend, destination_country
ORDER BY time_period_value, spend DESC'''

mcc_country_df = bq.read_bq_table_sql(client, UK_mcc_country)

mcc_country_df['year'] = mcc_country_df['time_period_value'].str[:4]

mcc_country_df.head()

In [None]:
mcc_country_df.to_csv('mcc_country_df.csv')

In [None]:
# Group by MCC and destination country, summing the spend
grouped = mcc_country_df.groupby(['mcc', 'destination_country'])['spend'].sum().reset_index()

# Normalize spend to get percentage share within each MCC
grouped['total_spend_per_mcc'] = grouped.groupby('mcc')['spend'].transform('sum')
grouped['spend_share'] = grouped['spend'] / grouped['total_spend_per_mcc'] * 100

# Aggregate countries contributing less than 3% into 'Rest of the World'
grouped['destination_country'] = grouped.apply(
    lambda row: 'Rest of the World' if row['spend_share'] < 20 else row['destination_country'],
    axis=1
)

# Re-aggregate after replacing low-share countries
aggregated = grouped.groupby(['mcc', 'destination_country'])['spend'].sum().reset_index()

# Recalculate spend share after aggregation
aggregated['total_spend_per_mcc'] = aggregated.groupby('mcc')['spend'].transform('sum')
aggregated['spend_share'] = aggregated['spend'] / aggregated['total_spend_per_mcc'] * 100

# Pivot the data for plotting
pivot_df = aggregated.pivot(index='mcc', columns='destination_country', values='spend_share').fillna(0)


#define colours
custom_colors = {
    'UNITED STATES OF AMERICA': '#1f77b4',
    'GERMANY': '#ff7f0e',
    'FRANCE': '#2ca02c',
    'SPAIN': '#d62728',
    'ITALY': '#9467bd',
    'NETHERLANDS': '#8c564b',
    'SWITZERLAND': '#e377c2',
    'AUSTRALIA': '#7f7f7f',
    'INDIA': '#bcbd22',
    'Rest of the World': '#17becf',
    'REPUBLIC OF IRELAND': '#aec7e8',
    'REST OF  EUROPE': '#ffbb78',
    'REST OF  ASIAPAC': '#98df8a',
    'REST OF  C.E.M.E.A.': '#ff9896',
    'REST OF LAT.AM.': '#c5b0d5'
}


# Reorder columns to match color keys
pivot_df = pivot_df[sorted(pivot_df.columns, key=lambda x: list(custom_colors.keys()).index(x) if x in custom_colors else 999)]

# Plot with custom colors
fig, ax = plt.subplots(figsize=(14, 12))
pivot_df.plot(kind='bar', stacked=True, ax=ax, width=0.8, color=[custom_colors.get(col, '#cccccc') for col in pivot_df.columns])

# Customize the plot
ax.set_ylabel('Share of Spend (%)')
ax.set_xlabel('Sector (MCC)')
ax.set_title('Share of Spend by Destination Country within Each MCC Sector (2024)')
plt.xticks(rotation=45, ha='right')

# Move the legend to the bottom and make it flatter and wider
ax.legend(
    loc='upper center',
    bbox_to_anchor=(0.5, -0.25),
    ncol=6,
    title='Destination Country',
    frameon=False
)

plt.tight_layout()
plt.savefig("stacked_bar_chart_with_flat_legend.png")

In [None]:
print(pivot_df)