In [None]:
project_path = "/home/jupyter"
import os
import sys
sys.path.append(project_path)
from google.cloud import bigquery, storage

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px

from fintrans_toolbox.src import bq_utils as bq

client = bigquery.Client()

In [None]:
# Summarise the UK Cardholders Spending data by country Abroad Online Totals

UK_spending_by_country = '''SELECT time_period_value, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcg = 'All' 
and merchant_channel = 'Online' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
and destination_country != 'UNITED KINGDOM' 
GROUP BY destination_country, 
time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''
df_by_country = bq.read_bq_table_sql(client, UK_spending_by_country)
df_by_country.head()

In [None]:
df_by_country.to_csv('UK_Spending_By_Country.csv')

In [None]:
# Assign value and percentage change variables
df_by_country = df_by_country.sort_values(['destination_country','time_period_value'])
df_by_country['pct_change'] = df_by_country.groupby(['destination_country']).spend.pct_change()
df_by_country['value_change'] = df_by_country.groupby(['destination_country']).spend.diff()


In [None]:
# Plot stacked bar chart with all values - Bar Chart UK Abroad Online Spend by Countries Total 2019 - 2025

df_by_country[['time_period_value','destination_country','spend']].groupby(['time_period_value','destination_country']).sum().unstack().plot(kind = 'bar', stacked = 'True')

In [None]:
# Plot line chart using plotly express for all countries
pfig2 = px.line(
        df_by_country,
        x="time_period_value",
        y="spend",
        color="destination_country")
pfig2

In [None]:
# Top 5 Countries Line Chart

import pandas as pd
import plotly.express as px

# Aggregating total spend by country across all time periods
total_spend_by_country = df_by_country.groupby('destination_country')['spend'].sum().reset_index()

# Sorting countries by total spend in descending order
top_5_countries = total_spend_by_country.sort_values(by='spend', ascending=False).head(5)

# Print the top 5 countries with the highest total spend
print("Top 5 Countries with the Highest Total Spend (2019-2025):")
print(top_5_countries[['destination_country', 'spend']])

# Now filter the original DataFrame to include only the top 5 countries
df_top_5_countries = df_by_country[df_by_country['destination_country'].isin(top_5_countries['destination_country'])]

# Plot the line chart using Plotly Express for the top 5 countries
pfig2 = px.line(
    df_top_5_countries,
    x="time_period_value",
    y="spend",
    color="destination_country",
    title="UK Online Spending by Country (Top 5 Countries) from 2019-2025"
)

# Show the plot
pfig2.show()

In [None]:
pip install kaleido

In [None]:
pfig2 = px.line(
    df_by_country, 
    x="time_period_value", 
    y="spend", 
    color="destination_country"
)

In [None]:
# Save the figure as a PNG image
pfig2.write_image("line_chartoutsideuk.png")


In [None]:
# Plot line chart using plotly express for all countries

pfig2 = px.line(
        df_by_country,
        x="time_period_value",
        y="spend",
        color="destination_country")
pfig2

In [None]:
# Select some contries to plot and reduce dataframe - Examples
countries = ['AUSTRALIA','SPAIN','FRANCE']
plot_df = df_by_country[['time_period_value','destination_country','spend']].loc[df_by_country['destination_country'].isin(countries) ]

In [None]:
#stacked bar chart using pandas plot
plot_df.groupby(['time_period_value','destination_country']).sum().unstack().plot(kind = 'bar', stacked = 'True')

In [None]:
#Select some contries to plot and reduce dataframe - Examples

import matplotlib.pyplot as plt
import pandas as pd

# List of selected countries to plot
countries = ['AUSTRIA','SPAIN','FRANCE', 'GERMANY', 'ITALY','NETHERLANDS','POLAND','PORTUGAL','REPUBLIC OF IRELAND']

# Filter the DataFrame to only include selected countries
plot_df = df_by_country[['time_period_value', 'destination_country', 'spend']].loc[df_by_country['destination_country'].isin(countries)]

# Group by time period and destination country, then aggregate the spend
plot_data = plot_df.groupby(['time_period_value', 'destination_country']).sum().unstack()

# Plot the stacked bar chart
plt.figure(figsize=(14, 8))
plot_data.plot(kind='bar', stacked=True, figsize=(14, 8))

# Adding labels and title
plt.title('UK Spending by Selected Countries (Online - All Cards)', fontsize=16)
plt.xlabel('Time Period', fontsize=12)
plt.ylabel('Spending', fontsize=12)
plt.legend(title='Destination Country', bbox_to_anchor=(1.05, 1), loc='upper left')

# Display the plot
plt.tight_layout()  # Ensures that everything fits well within the figure
plt.show()

In [None]:
# Line chart using plotly express

pfig = px.line(
        plot_df,
        x="time_period_value",
        y="spend",
        color="destination_country")
pfig

In [None]:
# Summarise the data by country Non-UK Cardholders Abroad Spending Online Totals by Country

UK_spending_by_country1 = '''SELECT time_period_value, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcg = 'All' 
and merchant_channel = 'Online' 
and cardholder_origin_country = 'All' 
and cardholder_origin != 'UNITED KINGDOM' 
and destination_country != 'UNITED KINGDOM' 
GROUP BY destination_country, 
time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''
df_by_country1 = bq.read_bq_table_sql(client, UK_spending_by_country1)
df_by_country1.head()

In [None]:
df_by_country1.to_csv('UK_Spending_By_Country1.csv')

In [None]:
# Assign value and percentage change variables
df_by_country1 = df_by_country1.sort_values(['destination_country','time_period_value'])
df_by_country1['pct_change'] = df_by_country1.groupby(['destination_country']).spend.pct_change()
df_by_country1['value_change'] = df_by_country1.groupby(['destination_country']).spend.diff()


In [None]:
# Plot stacked bar chart with all values - Non-UK Cardholder Abroad Online Spending Totals by Country

df_by_country1[['time_period_value','destination_country','spend']].groupby(['time_period_value','destination_country']).sum().unstack().plot(kind = 'bar', stacked = 'True')

In [None]:
# Bar chart, side labels

import matplotlib.pyplot as plt
import pandas as pd

# Grouping the data for the plot and saving the result to a CSV file
grouped_data = df_by_country1[['time_period_value', 'destination_country', 'spend']].groupby(['time_period_value', 'destination_country']).sum().unstack()

# Save the aggregated data to a CSV file
csv_filename = '/home/jupyter/uk_spending_by_country.csv'
grouped_data.to_csv(csv_filename)

# Now plot the stacked bar chart
plt.figure(figsize=(12, 8))
grouped_data.plot(kind='bar', stacked=True, figsize=(12, 8))

# Adding labels and title
plt.title('UK Spending by Country (Online - All Cards)', fontsize=16)
plt.xlabel('Time Period', fontsize=12)
plt.ylabel('Spending', fontsize=12)
plt.legend(title='Destination Country', bbox_to_anchor=(1.05, 1), loc='upper left')

# Saving the plot as a PNG image
png_filename = '/home/jupyter/uk_spending_by_country.png'
plt.tight_layout()  # Ensures that everything fits well
plt.savefig(png_filename)

# Show the plot (optional)
plt.show()

# Return file paths for reference
csv_filename, png_filename

In [None]:
# Bar Chart Quarterly the UK Cardholders Spending data by country Abroad Online Totals


import matplotlib.pyplot as plt
import pandas as pd

# Grouping the data for the plot and saving the result to a CSV file
grouped_data = df_by_country1[['time_period_value', 'destination_country', 'spend']].groupby(['time_period_value', 'destination_country']).sum().unstack()

# Save the aggregated data to a CSV file
csv_filename = '/home/jupyter/uk_spending_by_country.csv'
grouped_data.to_csv(csv_filename)

# Calculate the total spend for each quarter (sum across all countries)
total_spend_per_quarter = grouped_data.sum(axis=1)

# Print out the total spend for each quarter
print("Total Spend per Quarter:")
print(total_spend_per_quarter)

# Now plot the stacked bar chart
plt.figure(figsize=(12, 8))
grouped_data.plot(kind='bar', stacked=True, figsize=(12, 8))

# Adding labels and title
plt.title('UK Spending by Country (Online - All Cards)', fontsize=16)
plt.xlabel('Time Period', fontsize=12)
plt.ylabel('Spending', fontsize=12)
plt.legend(title='Destination Country', bbox_to_anchor=(1.05, 1), loc='upper left')

# Show the total spend on top of each bar
for i, value in enumerate(total_spend_per_quarter):
    plt.text(i, value + 0.05, f'{value:,.0f}', ha='center', va='bottom', fontsize=10, color='black')

# Saving the plot as a PNG image
png_filename = '/home/jupyter/uk_spending_by_country.png'
plt.tight_layout()  # Ensures that everything fits well
plt.savefig(png_filename)

# Show the plot (optional)
plt.show()

# Return file paths for reference
csv_filename, png_filename