In [None]:
project_path = "/home/jupyter"
import os
import sys

sys.path.append(project_path)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
from google.cloud import bigquery

from fintrans_toolbox.src import bq_utils as bq
from fintrans_toolbox.src import table_utils as t


client = bigquery.Client()

In [None]:
# Summarise the data by mcg
UK_spending_by_country = '''SELECT time_period_value, destination_country, spend, merchant_channel, cardholders
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter'
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM'
and destination_country != 'UNITED KINGDOM'
and mcg = 'All'
GROUP BY destination_country, time_period_value, spend, merchant_channel, cardholders
ORDER BY time_period_value, spend DESC'''

yearly_country_df = bq.read_bq_table_sql(client, UK_spending_by_country)

yearly_country_df['year'] = yearly_country_df['time_period_value'].str[:4]

yearly_country_df.head()

In [None]:
yearly_country_df.to_csv('yearly_country_df.csv')
df = pd.read_csv('yearly_country_df.csv')

# Filter for Online and All channels
online_df = df[df["merchant_channel"] == "Online"]
all_df = df[df["merchant_channel"] == "All"]

# Aggregate spend by destination_country and year
online_agg = online_df.groupby(["destination_country", "year"])["spend"].sum().unstack()
all_agg = all_df.groupby(["destination_country", "year"])["spend"].sum().unstack()

# Calculate percentage of online spend out of total spend for each year
online_share = (online_agg / all_agg) * 100

# Compute the change in share from 2019 to 2024
online_share["diff"] = online_share[2024] - online_share[2019]

# Calculate percentage of total online spend abroad in 2024
total_online_2024 = online_agg[2024].sum()
online_share["% of total online spend abroad (2024)"] = (online_agg[2024] / total_online_2024) * 100

# Prepare the final DataFrame
final_df = online_share.reset_index().rename(columns={
    2019: "2019_online_share",
    2024: "2024_online_share"
})


In [None]:
plt.figure(figsize=(12, 8))
plt.scatter(
    final_df["2019_online_share"],
    final_df["2024_online_share"],
    s=final_df["% of total online spend abroad (2024)"] * 100,
    alpha=0.6,
    edgecolors='w'
)

# Add labels for each point
for _, row in final_df.iterrows():
    plt.text(row["2019_online_share"], row["2024_online_share"], row["destination_country"],
             fontsize=8, ha='center', va='center')

# Add a 45-degree reference line
lims = [
    min(final_df["2019_online_share"].min(), final_df["2024_online_share"].min()),
    max(final_df["2019_online_share"].max(), final_df["2024_online_share"].max())
]
plt.plot(lims, lims, 'k--', alpha=0.75)

# Set axis labels and title
plt.xlabel("2019 Online Spend Share (%)")
plt.ylabel("2024 Online Spend Share (%)")
plt.title("Online Spend Share by Destination Country\n(2019 vs 2024, Bubble Size = % of Total Online Spend Abroad in 2024)")

# Add gridlines only at 50% on both axes
plt.axhline(50, color='gray', linestyle=':', linewidth=1)
plt.axvline(50, color='gray', linestyle=':', linewidth=1)
plt.grid(False)

plt.tight_layout()
plt.show()


In [None]:
print(final_df)

In [None]:
online_agg
