In [None]:
project_path = "/home/jupyter"
import os
import sys

sys.path.append(project_path)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
from google.cloud import bigquery

from fintrans_toolbox.src import bq_utils as bq
from fintrans_toolbox.src import table_utils as t


client = bigquery.Client()

In [None]:
# Summarise the data by mcg
UK_spending_by_mcc = '''SELECT time_period_value, mcc, spend
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter'
and time_period_value != '2025Q1'  
and merchant_channel = 'Online'
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM'
and destination_country != 'UNITED KINGDOM'
and mcg != 'All'
and mcc != 'All'
GROUP BY mcc, time_period_value, spend
ORDER BY time_period_value, spend DESC'''

mcc_by_quarter = bq.read_bq_table_sql(client, UK_spending_by_mcc)

mcc_by_quarter.head()

In [None]:
mcc_by_quarter.to_csv('mcc_by_quarter.csv')

In [None]:
# Filter data for 2019 and 2024
df_2019 = mcc_by_quarter[mcc_by_quarter['time_period_value'].str.startswith('2019')]
df_2024 = mcc_by_quarter[mcc_by_quarter['time_period_value'].str.startswith('2024')]

# Get top 5 MCCs by total spend in 2019 and 2024
top5_2019 = df_2019.groupby('mcc')['spend'].sum().nlargest(5).index
top5_2024 = df_2024.groupby('mcc')['spend'].sum().nlargest(5).index

# Combine the top MCCs
top_mccs = set(top5_2019).union(set(top5_2024))

# Filter the original dataframe for these MCCs
df_filtered = mcc_by_quarter[mcc_by_quarter['mcc'].isin(top_mccs)]

# Calculate total spend per quarter
total_spend_per_quarter = mcc_by_quarter.groupby('time_period_value')['spend'].sum().reset_index()
total_spend_per_quarter.columns = ['time_period_value', 'total_spend']

# Merge total spend with filtered data
df_merged = pd.merge(df_filtered, total_spend_per_quarter, on='time_period_value')
df_merged['spend_pct'] = (df_merged['spend'] / df_merged['total_spend']) * 100

# Group by quarter and MCC, then sum the percentage
df_grouped = df_merged.groupby(['time_period_value', 'mcc'])['spend_pct'].sum().reset_index()

# Pivot the data for plotting
df_pivot = df_grouped.pivot(index='time_period_value', columns='mcc', values='spend_pct').fillna(0)

# Sort the index to ensure chronological order
df_pivot = df_pivot.sort_index()

# Plot the line chart
plt.figure(figsize=(14, 8))
for column in df_pivot.columns:
    plt.plot(df_pivot.index, df_pivot[column], marker='o', label=column)

plt.title('Quarterly % of Total Spend for Top 5 MCCs in 2019 and 2024 (2019 - 2024)')
plt.xlabel('Quarter')
plt.ylabel('Percentage of Total Spend')
plt.xticks(rotation=45)
plt.legend(title='MCC', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
print(top5_2019)


In [None]:
print(top5_2024)