In [None]:
project_path = "/home/jupyter"
import os
import sys

sys.path.append(project_path)
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import re
from google.cloud import bigquery
from matplotlib.ticker import PercentFormatter


from fintrans_toolbox.src import bq_utils as bq
from fintrans_toolbox.src import table_utils as t


client = bigquery.Client()

In [None]:
# Summarise the data by mcg
UK_spending_by_mcg = '''SELECT time_period_value, mcg, spend
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter'
and time_period_value != '2025Q1'  
and merchant_channel = 'Online'
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM'
and mcg != 'All'
and mcg != 'BUSINESS TO BUSINESS'
and mcg != 'ONLINE MARKETPLACES'
and mcc != 'All'
GROUP BY mcg, time_period_value, spend
ORDER BY time_period_value, spend DESC'''


mcg_quarter_year_spend = bq.read_bq_table_sql(client, UK_spending_by_mcg)

mcg_quarter_year_spend['year'] = mcg_quarter_year_spend['time_period_value'].str[:4]

# Convert the 'year' column to numeric
mcg_quarter_year_spend['year'] = pd.to_numeric(mcg_quarter_year_spend['year'], errors='coerce')

#mcg_quarter_year_spend.head()

In [None]:
completeness_matrix_mcg = (
mcg_quarter_year_spend.pivot_table(index='mcg', columns='time_period_value', values='spend', aggfunc='size')
    .fillna(False)
    .astype(bool)
)

incomplete_rows_mcg = completeness_matrix_mcg[~completeness_matrix_mcg.all(axis=1)]
incomplete_rows_mcg

In [None]:
# Filter data for 2019 and 2024
mcg_quarter_year_spend_2019_2024 = mcg_quarter_year_spend[mcg_quarter_year_spend['year'].isin([2019, 2024])]

# Group by year and category, summing the spend
mcg_year_spend_2019_2024 = mcg_quarter_year_spend_2019_2024.groupby(['year', 'mcg'])['spend'].sum().reset_index()

# Pivot the data to have years as columns
pivot_mcg_year_spend_2019_2024 = mcg_year_spend_2019_2024.pivot(index='mcg', columns='year', values='spend').fillna(0)

# Calculate percentage of total spend for each year
pivot_mcg_year_spend_2019_2024[2019] = pivot_mcg_year_spend_2019_2024[2019] / pivot_mcg_year_spend_2019_2024[2019].sum()
pivot_mcg_year_spend_2019_2024[2024] = pivot_mcg_year_spend_2019_2024[2024] / pivot_mcg_year_spend_2019_2024[2024].sum()

# Sort categories by 2024 spend
pivot_mcg_year_spend_2019_2024 = pivot_mcg_year_spend_2019_2024.sort_values(by=2024, ascending=False)

# Plotting
categories = pivot_mcg_year_spend_2019_2024.index.tolist()
spend_2024 = pivot_mcg_year_spend_2019_2024[2024].values
spend_2019 = pivot_mcg_year_spend_2019_2024[2019].values

x = np.arange(len(categories))
width = 0.6

fig, ax = plt.subplots(figsize=(14, 8))

# Bar for 2024
bars = ax.bar(x, spend_2024, width, label='2024', color='orange')

# Diamond markers for 2019 overlapping the bars
ax.scatter(x, spend_2019, color='blue', marker='D', label='2019')

# Labels and formatting
ax.set_xlabel('Categories')
ax.set_ylabel('Share of Spend (%)')
ax.set_title('Share of Online Spend by Merchant Category Group, 2019 vs 2024')
ax.set_xticks(x)
ax.set_xticklabels(categories, rotation=90)
ax.yaxis.set_major_formatter(PercentFormatter(1.0))  # Format y-axis as whole percentages
ax.legend()

plt.tight_layout()
plt.show()

In [None]:
#values used in section 5 are listed in the table below
pivot_mcg_year_spend_2019_2024

In [None]:
# Figure 3 to CSV 
#pivot_mcg_year_spend_2019_2024.to_csv("Figure 3: Share of Online Spend by MCG, 2019 vs 2024.csv")

In [None]:
# Calculating Visa marketshare drop-off using change in cardholders over time
# Looks at how total number of UK cardholders in the dataset changes over time to scale each category of spend

# Added destination_country = "UK" so it only captures UK cardholders that actually used their card in teh UK + doesn't double count cardholders that also spent money abroad
cardholders = '''SELECT time_period_value, sum(cardholders) as total_cardholders
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
WHERE time_period = 'Month' 
  AND mcg = 'All' 
  AND mcc = 'All'
  AND merchant_channel = 'All'
  AND cardholder_origin = 'UNITED KINGDOM'
  AND cardholder_origin_country = 'All'
  AND destination_country = 'UNITED KINGDOM'
  GROUP BY time_period_value
  ORDER BY time_period_value ASC
'''
cardholders_total = bq.read_bq_table_sql(client, cardholders)
#cardholders_total

cardholders_total['year'] = cardholders_total['time_period_value'].str[:4]

# Convert the 'year' column to numeric
cardholders_total['year'] = pd.to_numeric(cardholders_total['year'], errors='coerce')

yearly_cardholders = cardholders_total[cardholders_total['year'].isin([2019, 2024])]
yearly_cardholders = yearly_cardholders.groupby(['year'])['total_cardholders'].sum().reset_index()

base_cardholders = yearly_cardholders['total_cardholders'].iloc[0]
#base_cardholder



yearly_cardholders['Change from Base'] = (base_cardholders / yearly_cardholders['total_cardholders'])
yearly_cardholders # Change from Base column can now be multiplied against each quarter's spend values to adjust the spend for Visa's marketshare

In [None]:
# Total spend in each MCG to calcualte the online ratios
# Summarise the data by mcg
total_spending_by_mcg = '''SELECT time_period_value, mcg, spend AS total_spend
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Month'
and time_period_value != '2025Q1'  
and merchant_channel = 'All'
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM'
and mcg != 'All'
and mcg != 'BUSINESS TO BUSINESS'
and mcc != 'All'
GROUP BY mcg, time_period_value, spend
ORDER BY time_period_value, spend DESC'''

total_mcg_by_quarter = bq.read_bq_table_sql(client, total_spending_by_mcg)

total_mcg_by_quarter['year'] = total_mcg_by_quarter['time_period_value'].str[:4]

# Convert the 'year' column to numeric
total_mcg_by_quarter['year'] = pd.to_numeric(total_mcg_by_quarter['year'], errors='coerce')

total_mcg_by_quarter.head()

In [None]:
total_df_filtered = total_mcg_by_quarter[total_mcg_by_quarter['year'].isin([2019, 2024])]
total_grouped = total_df_filtered.groupby(['year', 'mcg'])['total_spend'].sum().reset_index()

merged_df = pd.merge(total_grouped, mcg_year_spend_2019_2024, on=["mcg", "year"])

# Calculating adjusted values for use later (shouldn't change online % - it doesn't)
merged_df = pd.merge(merged_df, yearly_cardholders, on="year")
merged_df["adjusted_total_spend"] = merged_df["total_spend"] * merged_df["Change from Base"]
merged_df["adjusted_online_spend"] = merged_df["spend"] * merged_df["Change from Base"]

# Calculating the online % of spend by MCG
merged_df["Online %"] = merged_df["adjusted_online_spend"] / merged_df["adjusted_total_spend"] * 100

# Cleaning up dataframe
merged_df = merged_df.drop(columns=["total_spend", "spend", "total_cardholders", "Change from Base"])
merged_df

In [None]:
# Change in online ratio for each mcg from 2019 to 2024
pivot_df = merged_df.pivot(index='mcg', columns='year', values='Online %').fillna(0)

#Calculate absolute and percent change
pivot_df['absolute_change'] = pivot_df[2024] - pivot_df[2019]
pivot_df['percent_change'] = (pivot_df['absolute_change'] / pivot_df[2019]) * 100

pivot_df

In [None]:
# Change in absolute online spend for each mcg from 2019 to 2024
pivot_df = merged_df.pivot(index='mcg', columns='year', values='adjusted_online_spend').fillna(0)

#Calculate absolute and percent change
pivot_df['absolute_change'] = pivot_df[2024] - pivot_df[2019]
pivot_df['percent_change'] = (pivot_df['absolute_change'] / pivot_df[2019]) * 100

pivot_df