In [None]:
project_path = "/home/jupyter"
import os
import sys

sys.path.append(project_path)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
from google.cloud import bigquery

from fintrans_toolbox.src import bq_utils as bq
from fintrans_toolbox.src import table_utils as t


client = bigquery.Client()

In [None]:
# Summarise the data by country
UK_spending_by_country = '''SELECT time_period_value, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter'
and time_period_value != '2025Q1' 
and merchant_channel = 'Online' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
and mcg != 'BUSINESS TO BUSINESS'
and destination_country != 'UNITED KINGDOM'
GROUP BY destination_country, time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''

df_by_country = bq.read_bq_table_sql(client, UK_spending_by_country)

df_by_country['year'] = df_by_country['time_period_value'].str[:4]

df_by_country.head()


In [None]:
df_by_country.to_csv('UK_country_intl.csv')

In [None]:
grouped_df = df_by_country.groupby(['year', 'destination_country'])['spend'].sum().reset_index()

In [None]:
grouped_df.to_csv('UK_country_yearly.csv')

df = pd.read_csv('UK_country_yearly.csv')
latest_year = df[df['year'] == 2024][['destination_country', 'spend']]
latest_year.to_csv('2024_countries.csv', index=False)


In [None]:
pivot_df = grouped_df.pivot(index='year', columns='destination_country', values='spend')

# Calculate index (2019 = 100)
base_year = '2019'
indexed_df = pivot_df.apply(lambda x: (x / x[base_year]) * 100)

# Plot the indexed spend values
indexed_df.plot(kind='line', marker='o')
plt.title('Indexed Yearly Spend by Destination Country (2019 = 100)')
plt.xlabel('Year')
plt.ylabel('Indexed Spend')
plt.legend(title='Destination Country')
plt.grid(True)
plt.show()


In [None]:
# Define EU countries
EU_definition = ['FRANCE', 'SPAIN', 'GERMANY', 'AUSTRIA', 'ITALY', 'NETHERLANDS','POLAND', 'PORTUGAL', 'REPUBLIC OF IRELAND', 'REST OF  EUROPE', 'SWITZERLAND']

# Label countries as 'EU' or 'Not EU'
grouped_df['EU'] = grouped_df['destination_country'].apply(lambda x: 'EU' if x in EU_definition else 'Not EU')

# Group by year and EU status
EU_NONEU = grouped_df.groupby(['year', 'EU'])['spend'].sum().reset_index()

# Save to CSV
grouped_df.to_csv('EU_grouped.csv', index=False)
EU_NONEU.to_csv('EU_NONEU.csv', index=False)


In [None]:
pivot_df = EU_NONEU.pivot(index='year', columns='EU', values='spend')

# Calculate index (2019 = 100)
base_year = '2019'
indexed_df = pivot_df.apply(lambda x: (x / x[base_year]) * 100)

# Plot the indexed spend values
indexed_df.plot(kind='line', marker='o')
plt.title('Indexed Yearly Online Spend by Destination (2019 = 100)')
plt.xlabel('Year')
plt.ylabel('Indexed Spend')
plt.legend(title='Destination Coun')
plt.grid(True)
plt.show()

In [None]:
# Group by 'mcg' and sum the 'spend'
total_spend_per_country = latest_year.groupby('destination_country')['spend'].sum()

# Get the top 10 'mcg' categories by spend
top_10_country = total_spend_per_country.nlargest(10)

# Check if the result is not empty
if not top_10_country.empty:
    # Calculate the percentage of total spend for these top 10
    top_10_country_percentage = (top_10_country / top_10_country.sum()) * 100

    # Plot the bar chart
    ax = top_10_country_percentage.plot(kind='bar', color='skyblue')

    # Customize the plot
    plt.ylabel('Percentage of Total Spend')
    plt.title('Top 10 Countries by percentage of total spend, 2024')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()
else:
    print("No data available to plot.")