In [None]:
project_path = "/home/jupyter"
import os
import sys

sys.path.append(project_path)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
from google.cloud import bigquery

from fintrans_toolbox.src import bq_utils as bq
from fintrans_toolbox.src import table_utils as t


client = bigquery.Client()

In [None]:
# Summarise the data by mcg
UK_spending_by_country = '''SELECT time_period_value, mcg, spend
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter'
and time_period_value != '2025Q1'  
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
and merchant_channel = 'Online'
and mcg != 'All'
and mcg != 'BUSINESS TO BUSINESS'
GROUP BY mcg, time_period_value, spend
ORDER BY time_period_value, spend DESC'''

df_by_mcg = bq.read_bq_table_sql(client, UK_spending_by_country)

df_by_mcg['year'] = df_by_mcg['time_period_value'].str[:4]

df_by_mcg.head()


In [None]:
df_by_mcg.to_csv('mcg_online_abroad.csv')

In [None]:
yearly_mcg_df = df_by_mcg.groupby(['year', 'mcg'])['spend'].sum().reset_index()

In [None]:
yearly_mcg_df.to_csv('yearly_mcg_df.csv')
df = pd.read_csv('yearly_mcg_df.csv')
filtered_df = df[df['year'] == 2024][['mcg', 'spend']]
filtered_df.to_csv('filtered_yearly_mcg_2024.csv', index=False)


In [None]:
pivot_df = yearly_mcg_df.pivot(index='year', columns='mcg', values='spend')

# Calculate index (2019 = 100)
base_year = '2019'
indexed_df = pivot_df.apply(lambda x: (x / x[base_year]) * 100)

# Plot the indexed spend values
pivot_df.plot(kind='line', marker='o')
plt.title('Rest of Europe Yearly Spend')
plt.xlabel('Year')
plt.ylabel('Spend')
plt.legend(title='mcg')
plt.grid(True)
plt.show()


In [None]:
# Group by 'mcg' and sum the 'spend'
total_spend_per_mcg = filtered_df.groupby('mcg')['spend'].sum()

# Get the top 10 'mcg' categories by spend
top_10_mcg = total_spend_per_mcg.nlargest(10)

# Check if the result is not empty
if not top_10_mcg.empty:
    # Calculate the percentage of total spend for these top 10
    top_10_mcg_percentage = (top_10_mcg / top_10_mcg.sum()) * 100

    # Plot the bar chart
    ax = top_10_mcg_percentage.plot(kind='bar', color='skyblue')

    # Customize the plot
    plt.ylabel('Percentage of Total Spend')
    plt.title('Top 10 MCGs by percentage of total spend, 2024')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()
else:
    print("No data available to plot.")
