In [None]:
project_path = "/home/jupyter"
import os
import sys

sys.path.append(project_path)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
from google.cloud import bigquery

from fintrans_toolbox.src import bq_utils as bq
from fintrans_toolbox.src import table_utils as t


client = bigquery.Client()

In [None]:
# Summarise the data by mcg
UK_spending_by_country = '''SELECT time_period_value, destination_country, spend, merchant_channel
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter'
and time_period_value != '2025Q1'
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM'
and destination_country != 'UNITED KINGDOM'
and mcg = 'All'
GROUP BY destination_country, time_period_value, spend, merchant_channel
ORDER BY time_period_value, spend DESC'''

country_online_ratio = bq.read_bq_table_sql(client, UK_spending_by_country)

country_online_ratio['year'] = country_online_ratio['time_period_value'].str[:4]

country_online_ratio.head()

In [None]:
country_online_ratio.to_csv('online_ratio.csv')
online_ratio_yearly = country_online_ratio.groupby(['year', 'destination_country','merchant_channel'])['spend'].sum().reset_index()

In [None]:
# Ensure 'spend' is numeric
#online_ratio_yearly["spend"] = online_ratio_yearly.to_numeric(online_ratio_yearly["spend"], errors="coerce")

# Create a mapping of (year, mcg) to 'All' spend
all_spend = online_ratio_yearly[online_ratio_yearly["merchant_channel"] == "All"].set_index(["year", "destination_country"])["spend"]

# Function to compute percentage relative to 'All' spend
def compute_percentage(row):
    key = (row["year"], row["destination_country"])
    if key in all_spend and pd.notnull(all_spend[key]):
        return (row["spend"] / all_spend[key]) * 100
    else:
        return None

# Apply the function to compute the percentage
online_ratio_yearly["spend_vs_all_%"] = online_ratio_yearly.apply(compute_percentage, axis=1)

# Display the result
print(online_ratio_yearly)

In [None]:
# Ensure 'spend' is numeric
online_ratio_yearly["year"] = pd.to_numeric(online_ratio_yearly["year"], errors="coerce")

# Compute 'All' spend for each (year, mcg)
all_spend = online_ratio_yearly[online_ratio_yearly["merchant_channel"] == "All"].set_index(["year", "destination_country"])["spend"]

# Compute spend_vs_all_% for each row
def compute_percentage(row):
    key = (row["year"], row["destination_country"])
    if key in all_spend and pd.notnull(all_spend[key]):
        return (row["spend"] / all_spend[key]) * 100
    else:
        return None

online_ratio_yearly["spend_vs_all_%"] = online_ratio_yearly.apply(compute_percentage, axis=1)

# Filter only Face to Face and Online rows
online_ratio_2019_2024 = online_ratio_yearly[online_ratio_yearly["merchant_channel"].isin(["Online"])]
online_ratio_2019_2024 = online_ratio_2019_2024[online_ratio_2019_2024["year"].isin([2019, 2024])]
online_ratio_2019_2024

In [None]:
# Group by year and category, summing the spend
online_ratio_2019_2024 = online_ratio_2019_2024.groupby(['year', 'destination_country'])['spend_vs_all_%'].sum().reset_index()

# Pivot the data to have years as columns
pivot_online_ratio_2019_2024 = online_ratio_2019_2024.pivot(index='destination_country', columns='year', values='spend_vs_all_%').fillna(0)

# Calculate percentage of total spend for each year
#pivot_online_ratio_2019_2024[2019] = pivot_online_ratio_2019_2024[2019] / pivot_online_ratio_2019_2024[2019].sum()
#pivot_online_ratio_2019_2024[2024] = pivot_online_ratio_2019_2024[2024] / pivot_online_ratio_2019_2024[2024].sum()

# Sort categories by 2024 spend
pivot_online_ratio_2019_2024 = pivot_online_ratio_2019_2024.sort_values(by=2024, ascending=False)

# Plotting
categories = pivot_online_ratio_2019_2024.index.tolist()
spend_2024 = pivot_online_ratio_2019_2024[2024].values
spend_2019 = pivot_online_ratio_2019_2024[2019].values

x = np.arange(len(categories))
width = 0.6

fig, ax = plt.subplots(figsize=(14, 8))

# Bar for 2024
bars = ax.bar(x, spend_2024, width, label='2024', color='orange')

# Diamond markers for 2019 overlapping the bars
ax.scatter(x, spend_2019, color='blue', marker='D', label='2019')

# Labels and formatting
ax.set_xlabel('Categories')
ax.set_ylabel('Share of Spend (%)')
ax.set_title('Share of Online Spend by Country, 2019 vs 2024')
ax.set_xticks(x)
ax.set_xticklabels(categories, rotation=90)
ax.yaxis.set_major_formatter(PercentFormatter(1.0))  # Format y-axis as whole percentages
ax.legend()

plt.tight_layout()
plt.show()

In [None]:
pivot_online_ratio_2019_2024