In [None]:
project_path = "/home/jupyter"
import os
import sys

sys.path.append(project_path)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
from google.cloud import bigquery

from fintrans_toolbox.src import bq_utils as bq
from fintrans_toolbox.src import table_utils as t


client = bigquery.Client()

In [None]:
# Summarise the data by mcg
UK_spending_by_country = '''SELECT time_period_value, mcg, spend, merchant_channel
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter'
and time_period_value != '2025Q1'  
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM'
and destination_country NOT IN("UNITED KINGDOM","REST OF EUROPE")
and mcg IN ("All","RETAIL GOODS","ELECTRONICS","ENTERTAINMENT","APPAREL & ACCESSORIES","AIRLINES","PROFESSIONAL SERVICES","LODGING","TRANSPORTATION","EDUCATION & GOVERNMENT","TRAVEL SERVICES")
GROUP BY mcg, time_period_value, spend, merchant_channel
ORDER BY time_period_value, spend DESC'''

online_ratio = bq.read_bq_table_sql(client, UK_spending_by_country)

online_ratio['year'] = online_ratio['time_period_value'].str[:4]

online_ratio.head()


In [None]:
online_ratio.to_csv('online_ratio.csv')
online_ratio_yearly = online_ratio.groupby(['year', 'mcg','merchant_channel'])['spend'].sum().reset_index()

online_ratio_yearly.head()

In [None]:
online_ratio_yearly = online_ratio.groupby(['year', 'mcg','merchant_channel'])['spend'].sum().reset_index()

online_ratio_yearly_all = online_ratio_yearly[online_ratio_yearly["merchant_channel"] == "All"].set_index(["year", "mcg"])["spend"]

online_ratio_yearly_all.head(50)


In [None]:
# Ensure 'spend' is numeric
#online_ratio_yearly["spend"] = online_ratio_yearly.to_numeric(online_ratio_yearly["spend"], errors="coerce")

# Create a mapping of (year, mcg) to 'All' spend
all_spend = online_ratio_yearly[online_ratio_yearly["merchant_channel"] == "All"].set_index(["year", "mcg"])["spend"]

# Function to compute percentage relative to 'All' spend
def compute_percentage(row):
    key = (row["year"], row["mcg"])
    if key in all_spend and pd.notnull(all_spend[key]):
        return (row["spend"] / all_spend[key]) * 100
    else:
        return None

# Apply the function to compute the percentage
online_ratio_yearly["spend_vs_all_%"] = online_ratio_yearly.apply(compute_percentage, axis=1)

# Display the result
print(online_ratio_yearly)


In [None]:
# Ensure 'spend' is numeric
online_ratio_yearly["year"] = pd.to_numeric(online_ratio_yearly["year"], errors="coerce")

# Compute 'All' spend for each (year, mcg)
all_spend = online_ratio_yearly[online_ratio_yearly["merchant_channel"] == "All"].set_index(["year", "mcg"])["spend"]

# Compute spend_vs_all_% for each row
def compute_percentage(row):
    key = (row["year"], row["mcg"])
    if key in all_spend and pd.notnull(all_spend[key]):
        return (row["spend"] / all_spend[key]) * 100
    else:
        return None

online_ratio_yearly["spend_vs_all_%"] = online_ratio_yearly.apply(compute_percentage, axis=1)

# Filter only Face to Face and Online rows
df_filtered = online_ratio_yearly[online_ratio_yearly["merchant_channel"].isin(["Face to Face", "Online"])]
df_filtered_2 = df_filtered[df_filtered["year"].isin([2019, 2024])]

# Pivot to get 2019 and 2024 values side by side
mcg_online_ratio = df_filtered_2.pivot_table(index=["mcg", "merchant_channel"], columns="year", values="spend_vs_all_%")


print(mcg_online_ratio)

In [None]:
# Compute the difference between 2024 and 2019
mcg_online_ratio["diff"] = mcg_online_ratio[2024] - mcg_online_ratio[2019]

online_subset = mcg_online_ratio[mcg_online_ratio["merchant_channel"] == "Online"]
total_change = online_subset.groupby("mcg")["diff"].sum().sort_values(ascending=False)
ordered_mcgs = list(total_change.index)

# Set destination_country as a categorical variable with the desired order
mcg_online_ratio["mcg"] = pd.Categorical(
         mcg_online_ratio["mcg"],
         categories=ordered_mcgs,
         ordered=True
)

# Sort the DataFrame
mcg_online_ratio = mcg_online_ratio.sort_values(["mcg", "merchant_channel"])

mcgs = mcg_online_ratio["mcg"].unique()
channels = ["Face to Face", "Online"]
x = np.arange(len(mcgs))
width = 0.35

fig, ax = plt.subplots(figsize=(10, 6))
for i, channel in enumerate(channels):
    subset = mcg_online_ratio[mcg_online_ratio["merchant_channel"] == "Online"]
    subset = subset.set_index("mcg").loc[ordered_mcgs].reset_index()
    ax.bar(x + i * width, subset["diff"], width, label=channel)

ax.set_xlabel("Merchant Category Group")
ax.set_ylabel("Change in Spend vs All % (2024 - 2019)")
ax.set_title("Change in Spend vs All % from 2019 to 2024 by MCG and Channel")
ax.set_xticks(x + width / 2)
ax.set_xticklabels(mcgs, rotation=90)
ax.legend()
plt.grid(axis='y')
plt.tight_layout()
plt.show()

In [None]:
# Compute the difference between 2024 and 2019
pivot_df["diff"] = pivot_df[2024] - pivot_df[2019]

# Sort the DataFrame
pivot_df = pivot_df.sort_values(["mcg", "merchant_channel"])

mcgs = pivot_df["mcg"].unique()
channels = ["Face to Face", "Online"]
x = np.arange(len(mcgs))
width = 0.35

fig, ax = plt.subplots(figsize=(10, 6))
for i, channel in enumerate(channels):
    subset = pivot_df[pivot_df["merchant_channel"] == channel]
    ax.bar(x + i * width, subset["diff"], width, label=channel)

ax.set_xlabel("Merchant Category Group")
ax.set_ylabel("Change in Spend vs All % (2024 - 2019)")
ax.set_title("Change in Spend vs All % from 2019 to 2024 by MCG and Channel")
ax.set_xticks(x + width / 2)
ax.set_xticklabels(mcgs, rotation=90)
ax.legend()
plt.grid(axis='y')
plt.tight_layout()
plt.show()