In [None]:
project_path = "/home/jupyter"
import os
import sys

sys.path.append(project_path)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
from google.cloud import bigquery

from fintrans_toolbox.src import bq_utils as bq
from fintrans_toolbox.src import table_utils as t


client = bigquery.Client()

In [None]:
# Summarise the data by UK Cardholder Abroad Online Spending All 

UK_spending_by_online_Intl_All = '''SELECT time_period_value, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcg = 'All' 
and merchant_channel = 'Online' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
and destination_country != 'UNITED KINGDOM' 
GROUP BY destination_country, 
time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''
df_by_online_Intl_All = bq.read_bq_table_sql(client, UK_spending_by_online_Intl_All)
df_by_online_Intl_All = df_by_online_Intl_All.rename(columns={'spend': 'Intl_online_all'})
df_by_online_Intl_All.head()

# Summarised in Yearly Spending

# Extract the year from the time_period_value (assuming it's in format like '2023-Q1')
df_by_online_Intl_All['year'] = df_by_online_Intl_All['time_period_value'].str[:4].astype(int)

# Group by year and sum the online spend for each year
df_yearly_online_Intl_All = df_by_online_Intl_All.groupby('year')['Intl_online_all'].sum().reset_index()

# Optionally, sort the result by year
df_yearly_online_Intl_All = df_yearly_online_Intl_All.sort_values(by='year')

# Display the result
print(df_yearly_online_Intl_All)

df_yearly_online_Intl_All.to_csv('yearly_online_Intl_All.csv')

In [None]:
# UK Cardholder Abroad Spending by Countries

UK_spending_by_country = '''SELECT time_period_value, destination_country, SUM(spend) AS total 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcc = 'All' 
and mcg ='All' 
and merchant_channel = 'All' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
and destination_country != 'UNITED KINGDOM' 
GROUP BY destination_country, 
time_period_value 
ORDER BY time_period_value, total DESC'''
df_by_country = bq.read_bq_table_sql(client, UK_spending_by_country)
df_by_country

In [None]:
df_by_country.to_csv('UK_Spending_By_Country.csv')

In [None]:
# Summarise the data by UK Cardholder Abroad Spending All

UK_spending_by_Intl_All = '''SELECT time_period_value, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcc = 'All' 
and mcg = 'All' 
and merchant_channel = 'All' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
and destination_country != 'UNITED KINGDOM' 
GROUP BY destination_country, 
time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''
df_by_Intl_All = bq.read_bq_table_sql(client, UK_spending_by_Intl_All)
df_by_Intl_All = df_by_Intl_All.rename(columns={'spend': 'abroad_spend_all'})
df_by_Intl_All.head()

# Extract the year from the time_period_value (assuming it's in format like '2023-Q1')
df_by_Intl_All['year'] = df_by_Intl_All['time_period_value'].str[:4].astype(int)

# Group by year and sum the online spend for each year
df_yearly_Intl_All = df_by_Intl_All.groupby('year')['abroad_spend_all'].sum().reset_index()

# Optionally, sort the result by year
df_yearly_Intl_All = df_yearly_Intl_All.sort_values(by='year')

# Display the result
print(df_yearly_Intl_All)

df_yearly_Intl_All.to_csv('yearly_Intl_All.csv')

In [None]:
#UK Abroad by Country Yearly Total Spending

import pandas as pd

# Assuming df_by_country is the DataFrame with your data
# Ensure 'time_period_value' is a string type and split it to get the year (assuming 'Q1', 'Q2', etc. are part of the time_period_value)

# Extract the year from the time_period_value (assuming it's in the format like '2023-Q1', '2023-Q2', etc.)
df_by_country['year'] = df_by_country['time_period_value'].str[:4].astype(int)

# Now group by year and destination_country (to include countries)
df_yearly_spend = df_by_country.groupby(['year', 'destination_country'])['total'].sum().reset_index()

# Optionally, you can sort the result by year and total spend for each country
df_yearly_spend = df_yearly_spend.sort_values(by=['year', 'total'], ascending=[True, False])

# Display the yearly totals by country
print(df_yearly_spend)



In [None]:
#UK Abroad by Country Yearly Total Spending Bar Chart & Line Chart

import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# Assuming df_yearly_spend already exists with the correct data
# For example, df_yearly_spend might look like this:
# df_yearly_spend = pd.DataFrame({
#     'year': [2023, 2023, 2023, 2024, 2024, 2024],
#     'destination_country': ['United States', 'Germany', 'France', 'United States', 'Germany', 'France'],
#     'total': [5000000, 3500000, 2000000, 6000000, 4000000, 2500000]
# })

# Ensure the 'year' column is a string to force categorical x-axis treatment
df_yearly_spend['year'] = df_yearly_spend['year'].astype(str)

# Bar Chart: Total spend by country for each year using Plotly Express
fig_bar = px.bar(df_yearly_spend, 
                 x='year', 
                 y='total', 
                 color='destination_country', 
                 title='Total Spend by Country per Year',
                 labels={'total': 'Total Spend', 'year': 'Year'},
                 barmode='group')

# Update the layout for the bar chart to ensure the legend is readable
fig_bar.update_layout(
    title='Total Spend by Country per Year',
    xaxis_title='Year',
    yaxis_title='Total Spend',
    legend_title='Destination Country',
    legend=dict(
        title='Destination Country',
        x=1.05,  # Adjust legend position if necessary (1.05 moves it to the right)
        y=1,     # Place it at the top of the chart
        traceorder='normal',  # Keep the countries in normal order
        orientation='v',  # Vertical legend
        font=dict(size=10)  # Make the font smaller if needed
    ),
    margin=dict(r=200)  # Adjust the margin to accommodate the legend (if needed)
)

# Line Chart: Total spend trend across years for each country using Plotly Graph Objects
fig_line = go.Figure()

# Loop through each country and plot its line
for country in df_yearly_spend['destination_country'].unique():
    country_data = df_yearly_spend[df_yearly_spend['destination_country'] == country]
    fig_line.add_trace(go.Scatter(x=country_data['year'], 
                                 y=country_data['total'], 
                                 mode='lines+markers', 
                                 name=country))

# Customize line chart layout to ensure all country names are shown
fig_line.update_layout(
    title='Spend Trend by Country Over Time',
    xaxis_title='Year',
    yaxis_title='Total Spend',
    legend_title='Destination Country',
    legend=dict(
        title='Destination Country',
        x=1.05,  # Adjust the legend position if needed
        y=1,     # Place it at the top of the chart
        traceorder='normal',
        orientation='v',  # Vertical legend
        font=dict(size=10)  # Adjust font size if needed
    ),
    margin=dict(r=200)  # Increase right margin to accommodate legend
)

# Show the plots
fig_bar.show()
fig_line.show()

# Save the figures as images
fig_bar.write_image("bar_chart_with_all_countries.png")
fig_line.write_image("line_chart_with_all_countries.png")

In [None]:
fig_bar.write_image("bar_chart.svg")  # Save as SVG
fig_line.write_image("line_chart.jpg")  # Save as JPEG


In [None]:
# Show Bar chart and Line chart with Table
# Create Table for the Data
table = go.Figure(data=[go.Table(
    header=dict(values=["Year", "Destination Country", "Total Spend"]),
    cells=dict(values=[df_yearly_spend['year'], df_yearly_spend['destination_country'], df_yearly_spend['total']])
)])
table.show()

In [None]:
# 'REST OF  EUROPE', 'REPUBLIC OF IRELAND' Table 2019 - 2025 Online Spending

import pandas as pd
import plotly.graph_objects as go

# Load the CSV file
file_path = 'UK_Spending_Online_By_Country.csv'
df = pd.read_csv(file_path)

# Filter for the required destination countries
filtered_df = df[df['destination_country'].isin(['REST OF  EUROPE', 'REPUBLIC OF IRELAND'])].copy()

# Extract year from 'time_period_value' assuming it's in the format 'YYYYQX'
filtered_df.loc[:, 'year'] = filtered_df['time_period_value'].str.extract(r'(\d{4})').astype(int)

# Filter for years between 2019 and 2025
filtered_df = filtered_df[(filtered_df['year'] >= 2019) & (filtered_df['year'] <= 2025)]

# Group by year and destination_country and sum the total spend
yearly_spend = filtered_df.groupby(['year', 'destination_country'])['total'].sum().reset_index()

# Pivot the table to have years as rows and countries as columns
df_combined = yearly_spend.pivot(index='year', columns='destination_country', values='total').reset_index()

# Create the table using Plotly
table = go.Figure(data=[go.Table(
    header=dict(values=["Year"] + list(df_combined.columns[1:])),
    cells=dict(values=[df_combined[col] for col in df_combined.columns])
)])

# Show the table
table.show()



In [None]:
import pandas as pd

# Load the CSV file
file_path = 'UK_Spending_Online_By_Country.csv'
df = pd.read_csv(file_path)

# Check unique values in destination_country to verify exact matches
unique_countries = df['destination_country'].unique()

# Display the unique destination_country values to investigate why 'REST OF EUROPE' might be missing
unique_countries



In [None]:
#List tables for Each Country's Yearly Total Online Spending

import pandas as pd

# Assuming df_yearly_spend is already available with the necessary data
# Create an empty DataFrame to store the combined data
combined_data = pd.DataFrame()

# Loop through each country and gather yearly totals
countries = df_yearly_spend['destination_country'].unique()

for country in countries:
    # Filter the data for the current country
    country_data = df_yearly_spend[df_yearly_spend['destination_country'] == country][['year', 'total']].reset_index(drop=True)
    
    # Rename the column 'total' to the country name
    country_data = country_data.rename(columns={'total': country})
    
    # Merge with the combined data on 'year'
    if combined_data.empty:
        combined_data = country_data
    else:
        combined_data = pd.merge(combined_data, country_data, on='year', how='outer')

# Display the combined table
print(combined_data)


# Save the combined data to CSV
combined_data.to_csv('combined_data.csv', index=False)


In [None]:
# A scatter plot comparing values for this table: UK_Spending_Online_By_Country.csv Quarterly -------- Real Figures

import pandas as pd
import matplotlib.pyplot as plt

# Load the dataset
file_path = 'UK_Spending_Online_By_Country.csv'
df = pd.read_csv(file_path)

# Convert time_period_value to a datetime-like format for sorting
df['time_period_value'] = pd.Categorical(df['time_period_value'], ordered=True, categories=sorted(df['time_period_value'].unique()))

# Create the scatter plot
plt.figure(figsize=(14, 8))
for country in df['destination_country'].unique():
    country_data = df[df['destination_country'] == country]
    plt.scatter(country_data['time_period_value'], country_data['total'], label=country, alpha=0.6)

plt.xlabel('Time Period')
plt.ylabel('Total Spending (GBP)')
plt.title('UK Online Spending by Country Over Time')
plt.xticks(rotation=45)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize='small')
plt.tight_layout()
plt.grid(True)
plt.show()


In [None]:
# A scatter plot comparing values for this table: UK_Spending_Online_By_Country.csv Quarterly -------- Real Figures -- Labelled 

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
file_path = "UK_Spending_Online_By_Country.csv"
df = pd.read_csv(file_path)

# Create a unique color for each country
unique_countries = df['destination_country'].unique()
palette = sns.color_palette("hsv", len(unique_countries))
color_map = dict(zip(unique_countries, palette))

# Create the scatter plot
plt.figure(figsize=(14, 8))
for country in unique_countries:
    country_data = df[df['destination_country'] == country]
    plt.scatter(country_data['time_period_value'], country_data['total'],
                label=country, color=color_map[country])
    for _, row in country_data.iterrows():
        plt.text(row['time_period_value'], row['total'], row['destination_country'],
                 fontsize=8, alpha=0.7)

# Customize the plot
plt.xlabel("Time Period")
plt.ylabel("Total Spending")
plt.title("UK Online Spending by Country Over Time")
plt.xticks(rotation=45)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize='small')
plt.tight_layout()
plt.grid(True)

# Save the plot
plt.savefig("uk_spending_scatter_plot.png")
plt.show()



In [None]:
# An Indexed scatter plot — where each country's spending is shown relative to its 2019Q1 value (set as 100)

import pandas as pd
import matplotlib.pyplot as plt

# Load the dataset
df = pd.read_csv("UK_Spending_Online_By_Country.csv")

# Pivot the data to have time periods as rows and countries as columns
pivot_df = df.pivot(index="time_period_value", columns="destination_country", values="total")

# Index the values using 2019Q1 as the base (100)
indexed_df = pivot_df.divide(pivot_df.loc["2019Q1"]).multiply(100)

# Plotting
plt.figure(figsize=(14, 8))

# Assign unique colors and plot each country's indexed spending
for country in indexed_df.columns:
    plt.scatter(indexed_df.index, indexed_df[country], label=country)

plt.xlabel("Time Period")
plt.ylabel("Indexed Spending (Base = 100 at 2019Q1)")
plt.title("UK Online Spending by Country (Indexed to 2019Q1)")
plt.xticks(rotation=45)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.grid(True)
plt.show()




In [None]:
# Indexed Graph with Labels - 
# Each country in a different color
# Each point labeled for traceability
# Values indexed to 2019Q1 (base = 100)

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
df = pd.read_csv("UK_Spending_Online_By_Country.csv")

# Pivot the data to have countries as columns and time periods as index
pivot_df = df.pivot(index='time_period_value', columns='destination_country', values='total')

# Sort the index to ensure chronological order
pivot_df = pivot_df.sort_index()

# Normalize the data using 2019Q1 as the base (set to 100)
base = pivot_df.loc['2019Q1']
indexed_df = pivot_df.divide(base) * 100

# Reset index for plotting
indexed_df = indexed_df.reset_index().melt(id_vars='time_period_value', var_name='Country', value_name='Index')

# Plotting
plt.figure(figsize=(14, 8))
sns.scatterplot(data=indexed_df, x='time_period_value', y='Index', hue='Country', palette='tab20', legend=False)

# Add labels to each point
for i in range(len(indexed_df)):
    plt.text(x=indexed_df['time_period_value'][i], 
             y=indexed_df['Index'][i], 
             s=indexed_df['Country'][i], 
             fontsize=6, alpha=0.7)

plt.xticks(rotation=45)
plt.title('Indexed UK Online Spending by Country (Base: 2019Q1 = 100)')
plt.xlabel('Time Period')
plt.ylabel('Index (2019Q1 = 100)')
plt.tight_layout()
plt.grid(True)
plt.show()



In [None]:
# Top 10 countries by total online spending
 # Index their values to 2019Q1 = 100
 # With Labels

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
file_path = "UK_Spending_Online_By_Country.csv"
df = pd.read_csv(file_path)

# Ensure column names are stripped of whitespace
df.columns = df.columns.str.strip()

# Calculate total spending per country and select top 10
top_countries = df.groupby('destination_country')['total'].sum().nlargest(10).index

# Filter the dataset for top 10 countries
df_top = df[df['destination_country'].isin(top_countries)]

# Pivot the data to have time_period_value as index and countries as columns
pivot_df = df_top.pivot(index='time_period_value', columns='destination_country', values='total')

# Index the values to 2019Q1 = 100
indexed_df = pivot_df.divide(pivot_df.loc['2019Q1']).multiply(100)

# Reset index for plotting
indexed_df = indexed_df.reset_index().melt(id_vars='time_period_value', var_name='Country', value_name='Indexed Value')

# Plotting
plt.figure(figsize=(14, 8))
sns.scatterplot(data=indexed_df, x='time_period_value', y='Indexed Value', hue='Country', style='Country', palette='tab10')

# Annotate each point with the country name
for i in range(len(indexed_df)):
    plt.text(x=indexed_df['time_period_value'][i], 
             y=indexed_df['Indexed Value'][i], 
             s=indexed_df['Country'][i], 
             fontsize=8, alpha=0.7)

plt.title('Indexed UK Online Spending by Country (Base: 2019Q1 = 100)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(True)
plt.savefig("indexed_scatter_plot.png")
plt.show()



In [None]:
# Top 10 countries by total online spending
 # Index their values to 2019Q1 = 100
    # Without Labels

import pandas as pd
import matplotlib.pyplot as plt

# Load the dataset
df = pd.read_csv("UK_Spending_Online_By_Country.csv")

# Ensure column names are consistent
df.columns = df.columns.str.strip().str.lower()

# Pivot the data to have countries as columns and time periods as index
pivot_df = df.pivot(index='time_period_value', columns='destination_country', values='total')

# Select top 10 countries by total spending over the entire period
top_countries = pivot_df.sum().sort_values(ascending=False).head(10).index

# Filter the pivot table to include only the top 10 countries
top_pivot_df = pivot_df[top_countries]

# Index the values to 2019Q1 = 100
indexed_df = top_pivot_df.divide(top_pivot_df.loc['2019Q1']).multiply(100)

# Plotting
plt.figure(figsize=(12, 6))
for country in indexed_df.columns:
    plt.scatter(indexed_df.index, indexed_df[country], label=country)

plt.title("Indexed UK Online Spending by Country (2019Q1 = 100)")
plt.xlabel("Time Period")
plt.ylabel("Indexed Spending")
plt.xticks(rotation=45)
plt.legend(title="Country", bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.grid(True)
plt.show()



In [None]:
# All countries by total yearly online spending scatter plot comparing values from two years (2019 on the x-axis and 2024 on the y-axis)
 # Real Figures

# X-axis: Spending in 2019
# Y-axis: Spending in 2024
# Each dot: A country
# Color: Grey Represents continent
# Dashed line: Indicates equal spending (y=x)
# Label boxes: Added for countries with smaller values to improve visibility


import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

# Load the dataset
df = pd.read_csv("combined_data.csv")

# Filter for 2019 and 2024 data
df_filtered = df[df['year'].isin([2019, 2024])]

# Pivot the data to have countries as index and years as columns
df_pivot = df_filtered.set_index('year').T
df_pivot.columns = df_pivot.columns.astype(int)
df_pivot = df_pivot.drop('year', errors='ignore')  # Drop 'year' if it exists

# Extract 2019 and 2024 values
x = df_pivot[2019]
y = df_pivot[2024]

# Define a simple continent mapping for coloring
continent_map = {
    'REST OF EUROPE': 'Europe',
    'REPUBLIC OF IRELAND': 'Europe',
    'UNITED STATES OF AMERICA': 'North America',
    'SPAIN': 'Europe',
    'NETHERLANDS': 'Europe',
    'FRANCE': 'Europe',
    'REST OF ASIAPAC': 'Asia-Pacific',
    'GERMANY': 'Europe',
    'ITALY': 'Europe',
    'REST OF C.E.M.E.A.': 'Africa',
    'SWITZERLAND': 'Europe',
    'AUSTRALIA': 'Asia-Pacific',
    'REST OF LAT.AM.': 'South America',
    'UNITED ARAB EMIRATES': 'Asia-Pacific',
    'PORTUGAL': 'Europe',
    'POLAND': 'Europe',
    'REST OF CANADA': 'North America',
    'SOUTH AFRICA': 'Africa',
    'AUSTRIA': 'Europe',
    'INDIA': 'Asia-Pacific'
}

# Assign colors to continents
continent_colors = {
    'Europe': 'blue',
    'North America': 'green',
    'Asia-Pacific': 'orange',
    'Africa': 'red',
    'South America': 'purple'
}

# Map countries to continents and colors
countries = x.index
continents = [continent_map.get(country, 'Other') for country in countries]
colors = [continent_colors.get(continent, 'gray') for continent in continents]

# Create scatter plot
plt.figure(figsize=(10, 8))
scatter = plt.scatter(x, y, c=colors, s=100, edgecolors='k')

# Add y=x reference line
lims = [min(x.min(), y.min()), max(x.max(), y.max())]
plt.plot(lims, lims, 'k--', alpha=0.75, zorder=0)

# Add labels for countries with smaller values - spending < 1.2 billion in 2019
for country in countries:
    if x[country] < 1.2e9 and y[country] < 1.2e9:
        plt.text(x[country], y[country], country, fontsize=8,
                 bbox=dict(facecolor='white', edgecolor='black', boxstyle='round,pad=0.2'))

# Add legend
handles = [mpatches.Patch(color=color, label=continent) for continent, color in continent_colors.items()]
plt.legend(handles=handles, title="Continent")

# Set labels and title
plt.xlabel("2019 Spending (GBP)")
plt.ylabel("2024 Spending (GBP)")
plt.title("UK Online Spending by Country: 2019 vs 2024")

plt.grid(True)
plt.tight_layout()
plt.show()



In [None]:
# All countries by total yearly online spending scatter plot comparing values from two years (2019 on the x-axis and 2024 on the y-axis)
 # Real Figures

# X-axis: Spending in 2019
# Y-axis: Spending in 2024
# Each dot: A country
# Color: Grey Represents continent
# Dashed line: Indicates equal spending (y=x)
# Label boxes: Added for countries with smaller values to improve visibility

#  Offset each label slightly using a circular angle pattern.
 # Apply bounding boxes to keep them distinct.
 # Avoid stacking labels directly on top of each other.

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import numpy as np

# Load the dataset
df = pd.read_csv("UK_Spending_Online_By_Country.csv")

# Extract year from time_period_value
df['year'] = df['time_period_value'].str.extract(r'(\d{4})').astype(int)

# Filter for 2019 and 2024 data
df_filtered = df[df['year'].isin([2019, 2024])]

# Pivot the data to have countries as index and years as columns
df_pivot = df_filtered.pivot_table(index='destination_country', columns='year', values='total', aggfunc='sum')

# Drop rows with missing values for either year
df_pivot = df_pivot.dropna(subset=[2019, 2024])

# Extract 2019 and 2024 values
x = df_pivot[2019]
y = df_pivot[2024]

# Define a simple continent mapping for coloring
continent_map = {
    'REST OF EUROPE': 'Europe',
    'REPUBLIC OF IRELAND': 'Europe',
    'UNITED STATES OF AMERICA': 'North America',
    'SPAIN': 'Europe',
    'NETHERLANDS': 'Europe',
    'FRANCE': 'Europe',
    'REST OF ASIAPAC': 'Asia-Pacific',
    'GERMANY': 'Europe',
    'ITALY': 'Europe',
    'REST OF C.E.M.E.A.': 'Africa',
    'SWITZERLAND': 'Europe',
    'AUSTRALIA': 'Asia-Pacific',
    'REST OF LAT.AM.': 'South America',
    'UNITED ARAB EMIRATES': 'Asia-Pacific',
    'PORTUGAL': 'Europe',
    'POLAND': 'Europe',
    'REST OF CANADA': 'North America',
    'SOUTH AFRICA': 'Africa',
    'AUSTRIA': 'Europe',
    'INDIA': 'Asia-Pacific'
}

# Assign colors to continents
continent_colors = {
    'Europe': 'blue',
    'North America': 'green',
    'Asia-Pacific': 'orange',
    'Africa': 'red',
    'South America': 'purple'
}

# Map countries to continents and colors
countries = x.index
continents = [continent_map.get(country, 'Other') for country in countries]
colors = [continent_colors.get(continent, 'gray') for continent in continents]

# Create scatter plot
plt.figure(figsize=(12, 10))
scatter = plt.scatter(x, y, c=colors, s=100, edgecolors='k')

# Add y=x reference line
lims = [min(x.min(), y.min()), max(x.max(), y.max())]
plt.plot(lims, lims, 'k--', alpha=0.75, zorder=0)

# Add labels for countries with smaller values using angled or offset text
angle_step = 360 / len(countries)
angle = 0
for i, country in enumerate(countries):
    if x[country] < 1.2e9 and y[country] < 1.2e9:
        dx = 1e7 * np.cos(np.radians(angle))
        dy = 1e7 * np.sin(np.radians(angle))
        plt.text(x[country] + dx, y[country] + dy, country, fontsize=8,
                 bbox=dict(facecolor='white', edgecolor='black', boxstyle='round,pad=0.2'))
        angle += angle_step

# Add legend
handles = [mpatches.Patch(color=color, label=continent) for continent, color in continent_colors.items()]
plt.legend(handles=handles, title="Continent")

# Set labels and title
plt.xlabel("2019 Spending (GBP)")
plt.ylabel("2024 Spending (GBP)")
plt.title("UK Online Spending by Country: 2019 vs 2024")

plt.grid(True)
plt.tight_layout()
plt.show()



In [None]:
# A scatter plot interactive and clickable, used Plotly to generate a version where:

 # Each dot is clickable (or hoverable) to show the country name.
 # Colors still represent continents.
 # A dashed line shows equal spending between 2019 and 2024.

import pandas as pd
import plotly.express as px

# Load the dataset
df = pd.read_csv("UK_Spending_Online_By_Country.csv")

# Extract year from time_period_value
df['year'] = df['time_period_value'].str.extract(r'(\d{4})').astype(int)

# Filter for 2019 and 2024 data
df_filtered = df[df['year'].isin([2019, 2024])]

# Pivot the data to have countries as index and years as columns
df_pivot = df_filtered.pivot_table(index='destination_country', columns='year', values='total', aggfunc='sum')

# Drop rows with missing values in either year
df_pivot = df_pivot.dropna(subset=[2019, 2024])

# Define a simple continent mapping for coloring
continent_map = {
    'REST OF EUROPE': 'Europe',
    'REPUBLIC OF IRELAND': 'Europe',
    'UNITED STATES OF AMERICA': 'North America',
    'SPAIN': 'Europe',
    'NETHERLANDS': 'Europe',
    'FRANCE': 'Europe',
    'REST OF ASIAPAC': 'Asia-Pacific',
    'GERMANY': 'Europe',
    'ITALY': 'Europe',
    'REST OF C.E.M.E.A.': 'Africa',
    'SWITZERLAND': 'Europe',
    'AUSTRALIA': 'Asia-Pacific',
    'REST OF LAT.AM.': 'South America',
    'UNITED ARAB EMIRATES': 'Asia-Pacific',
    'PORTUGAL': 'Europe',
    'POLAND': 'Europe',
    'REST OF CANADA': 'North America',
    'SOUTH AFRICA': 'Africa',
    'AUSTRIA': 'Europe',
    'INDIA': 'Asia-Pacific'
}

# Map countries to continents
df_pivot['continent'] = df_pivot.index.map(lambda x: continent_map.get(x.upper(), 'Other'))

# Create interactive scatter plot
fig = px.scatter(
    df_pivot,
    x=2019,
    y=2024,
    color='continent',
    hover_name=df_pivot.index,
    labels={2019: "2019 Spending (GBP)", 2024: "2024 Spending (GBP)"},
    title="UK Online Spending by Country: 2019 vs 2024"
)

# Add y=x reference line
fig.add_shape(
    type='line',
    x0=df_pivot[2019].min(), y0=df_pivot[2019].min(),
    x1=df_pivot[2019].max(), y1=df_pivot[2019].max(),
    line=dict(dash='dash', color='gray')
)

fig.update_layout(legend_title_text='Continent')
fig.show()



In [None]:
# An Indexed version of the plot where:

 # 2019 spending is set to an index of 100 for each country.
 # 2024 spending is shown as an index relative to 2019.
  # The plot will still include:
  # Continent-based coloring,
  # Hoverable country names, and
  # A dashed y = x reference line.

import pandas as pd
import plotly.express as px

# Load the dataset
df = pd.read_csv("UK_Spending_Online_By_Country.csv")

# Extract year from time_period_value
df['year'] = df['time_period_value'].str.extract(r'(\d{4})').astype(int)

# Filter for 2019 and 2024 data
df_filtered = df[df['year'].isin([2019, 2024])]

# Pivot the data to have countries as index and years as columns
df_pivot = df_filtered.pivot_table(index='destination_country', columns='year', values='total', aggfunc='sum')

# Drop rows with missing values in either year
df_pivot = df_pivot.dropna(subset=[2019, 2024])

# Normalize to index values with 2019 as base year (set to 100)
df_pivot['index_2019'] = 100
df_pivot['index_2024'] = (df_pivot[2024] / df_pivot[2019]) * 100

# Define a simple continent mapping for coloring
continent_map = {
    'REST OF EUROPE': 'Europe',
    'REPUBLIC OF IRELAND': 'Europe',
    'UNITED STATES OF AMERICA': 'North America',
    'SPAIN': 'Europe',
    'NETHERLANDS': 'Europe',
    'FRANCE': 'Europe',
    'REST OF ASIAPAC': 'Asia-Pacific',
    'GERMANY': 'Europe',
    'ITALY': 'Europe',
    'REST OF C.E.M.E.A.': 'Africa',
    'SWITZERLAND': 'Europe',
    'AUSTRALIA': 'Asia-Pacific',
    'REST OF LAT.AM.': 'South America',
    'UNITED ARAB EMIRATES': 'Asia-Pacific',
    'PORTUGAL': 'Europe',
    'POLAND': 'Europe',
    'REST OF CANADA': 'North America',
    'SOUTH AFRICA': 'Africa',
    'AUSTRIA': 'Europe',
    'INDIA': 'Asia-Pacific'
}

# Map countries to continents
df_pivot['continent'] = df_pivot.index.map(lambda x: continent_map.get(x.upper(), 'Other'))

# Create interactive scatter plot using index values
fig = px.scatter(
    df_pivot,
    x='index_2019',
    y='index_2024',
    color='continent',
    hover_name=df_pivot.index,
    labels={'index_2019': "2019 Spending Index", 'index_2024': "2024 Spending Index"},
    title="UK Online Spending by Country: Indexed (2019 = 100)"
)

# Add y=x reference line
fig.add_shape(
    type='line',
    x0=100, y0=100,
    x1=df_pivot['index_2024'].max(), y1=df_pivot['index_2024'].max(),
    line=dict(dash='dash', color='gray')
)

fig.update_layout(legend_title_text='Continent')
fig.show()



In [None]:
# A graph Square Indexed version (with equal width and height and a 1:1 axis ratio)

import pandas as pd
import plotly.express as px

# Load the dataset
df = pd.read_csv("UK_Spending_Online_By_Country.csv")

# Extract year from time_period_value
df['year'] = df['time_period_value'].str.extract(r'(\d{4})').astype(int)

# Filter for 2019 and 2024 data
df_filtered = df[df['year'].isin([2019, 2024])]

# Pivot the data to have countries as index and years as columns
df_pivot = df_filtered.pivot_table(index='destination_country', columns='year', values='total', aggfunc='sum')

# Drop rows with missing values in either year
df_pivot = df_pivot.dropna(subset=[2019, 2024])

# Calculate index values (2019 = 100)
df_pivot['index_2019'] = 100
df_pivot['index_2024'] = (df_pivot[2024] / df_pivot[2019]) * 100

# Define a simple continent mapping for coloring
continent_map = {
    'REST OF EUROPE': 'Europe',
    'REPUBLIC OF IRELAND': 'Europe',
    'UNITED STATES OF AMERICA': 'North America',
    'SPAIN': 'Europe',
    'NETHERLANDS': 'Europe',
    'FRANCE': 'Europe',
    'REST OF ASIAPAC': 'Asia-Pacific',
    'GERMANY': 'Europe',
    'ITALY': 'Europe',
    'REST OF C.E.M.E.A.': 'Africa',
    'SWITZERLAND': 'Europe',
    'AUSTRALIA': 'Asia-Pacific',
    'REST OF LAT.AM.': 'South America',
    'UNITED ARAB EMIRATES': 'Asia-Pacific',
    'PORTUGAL': 'Europe',
    'POLAND': 'Europe',
    'REST OF CANADA': 'North America',
    'SOUTH AFRICA': 'Africa',
    'AUSTRIA': 'Europe',
    'INDIA': 'Asia-Pacific'
}

# Map countries to continents
df_pivot['continent'] = df_pivot.index.map(lambda x: continent_map.get(x.upper(), 'Other'))

# Create interactive scatter plot with index values
fig = px.scatter(
    df_pivot,
    x='index_2019',
    y='index_2024',
    color='continent',
    hover_name=df_pivot.index,
    labels={'index_2019': "2019 Spending Index", 'index_2024': "2024 Spending Index"},
    title="UK Online Spending by Country (Indexed: 2019 = 100)"
)

# Add y=x reference line
fig.add_shape(
    type='line',
    x0=80, y0=80,
    x1=160, y1=160,
    line=dict(dash='dash', color='gray')
)

# Set square aspect ratio - The plot is 700x700 pixels ;  
  # The x and y axes have the same scale, making the reference line (y = x) appear at a 45° angle

fig.update_layout(
    legend_title_text='Continent',
    width=700,
    height=700,
    xaxis=dict(scaleanchor="y", scaleratio=1)
)

fig.show()



In [None]:
#Each country's % Online in Total Abroad Spending

import pandas as pd

# Load the total abroad spending data
abroad_total = pd.read_csv('yearly_Intl_All.csv')

# Load the country-wise yearly spend data
df_combined_data = pd.read_csv('combined_data.csv')

# Ensure the 'year' columns are of the same type (integers)
df_combined_data['year'] = df_combined_data['year'].astype(int)
abroad_total['year'] = abroad_total['year'].astype(int)

# Merge the two datasets on 'year'
df_merged = pd.merge(df_combined_data, abroad_total, on='year', how='left')

# Calculate percentage for each country column (excluding 'year' and 'abroad_spend_all')
country_columns = [col for col in df_combined_data.columns if col != 'year']
percentage_df = df_merged[['year']].copy()

for country in country_columns:
    percentage_df[country + '_percent'] = (df_merged[country] / df_merged['abroad_spend_all']) * 100

# Save the result to a CSV file
percentage_df.to_csv('country_spend_percentage_vs_abroad.csv', index=False)

# Display the result
percentage_df.head()




In [None]:
#Make an Excel spreadsheet for countries' tatal yearly spending

import pandas as pd

# Assuming df_yearly_spend is already available with the necessary data
# Create an empty DataFrame to store the combined data
combined_data = pd.DataFrame()

# Loop through each country and gather yearly totals
countries = df_yearly_spend['destination_country'].unique()

for country in countries:
    # Filter the data for the current country
    country_data = df_yearly_spend[df_yearly_spend['destination_country'] == country][['year', 'total']].reset_index(drop=True)
    
    # Rename the column 'total' to the country name
    country_data = country_data.rename(columns={'total': country})
    
    # Merge with the combined data on 'year'
    if combined_data.empty:
        combined_data = country_data
    else:
        combined_data = pd.merge(combined_data, country_data, on='year', how='outer')

# Save the resulting table as a CSV file
combined_data.to_csv('yearly_spend_by_country_combined.csv', index=False)



In [None]:
# Save Table Image in a specific directory
output_dir = "images"  # Directory to save the image

# Check if the directory exists, if not, create it
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
    
# Save the table image as a PNG file in the specified directory
table_image_path = os.path.join(output_dir, "spend_table.png")
table.write_image(table_image_path)

# Optionally, you can also print the path where the file was saved
print(f"Table image saved to: {table_image_path}")

In [None]:
#List of All the yearly countries' spending

# Create Table for the Data
table = go.Figure(data=[go.Table(
    header=dict(values=["Year", "Destination Country", "Total Spend"]),
    cells=dict(values=[df_yearly_spend['year'], df_yearly_spend['destination_country'], df_yearly_spend['total']])
)])

# Ensure all rows are shown by adjusting the layout (height, font size, etc.)
table.update_layout(
    height=400 + len(df_yearly_spend) * 20,  # Adjust height based on the number of rows
    title="Spending Data by Country",
    font=dict(size=12),  # Adjust font size for readability
)

# Save Table Image in a specific directory
output_dir = "images"  # Directory to save the image

# Check if the directory exists, if not, create it
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Save the table image as a PNG file in the specified directory
table_image_path = os.path.join(output_dir, "spend_table.png")
table.write_image(table_image_path)

# Optionally, you can also print the path where the file was saved
print(f"Table image saved to: {table_image_path}")

# Show the table
table.show()

In [None]:
#List of each countries yearly total in Framed tables

import os
import plotly.graph_objects as go

# Assuming df_yearly_spend is your dataframe containing the data with columns 'year', 'destination_country', 'total'

# List of all countries in the dataset
countries = df_yearly_spend['destination_country'].unique()

# Directory to save the country-specific table images
output_dir = "images"

# Check if the directory exists, if not, create it
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Loop through each country and generate a table for it
for country in countries:
    # Filter the data for the current country
    country_data = df_yearly_spend[df_yearly_spend['destination_country'] == country]

    # Create a table for the current country
    country_table = go.Figure(data=[go.Table(
        header=dict(values=["Year", "Destination Country", "Total Spend"]),
        cells=dict(values=[country_data['year'], country_data['destination_country'], country_data['total']])
    )])

    # Adjust layout for readability
    country_table.update_layout(
        height=400 + len(country_data) * 20,  # Adjust height based on number of rows
        title=f"Spending Data for {country}",
        font=dict(size=12),  # Adjust font size
    )

    # Save the table as an image
    country_table_image_path = os.path.join(output_dir, f"{country}_spend_table.png")
    country_table.write_image(country_table_image_path)

    # Optionally, print the path where the file was saved
    print(f"Table image for {country} saved to: {country_table_image_path}")

    # Optionally, show the table
    country_table.show()


In [None]:
#List of each country's yearly total spending

# Loop through each country and print the yearly spend totals
countries = df_yearly_spend['destination_country'].unique()

for country in countries:
    # Filter the data for the current country
    country_data = df_yearly_spend[df_yearly_spend['destination_country'] == country][['year', 'total']].reset_index(drop=True)
    
    # Display the table for the current country
    print(f"\nYearly Spend Totals for {country}:")
    print(country_data)


In [None]:
#UK Abroad Online Spending Total Full Table

UK_spending_by_country1 = '''SELECT * FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` where time_period = 'Quarter' and merchant_channel = 'Online' and cardholder_origin_country = 'All' and cardholder_origin = 'UNITED KINGDOM' and destination_country != 'UNITED KINGDOM' and mcc = 'All' and mcg = 'All'

ORDER BY time_period_value ASC'''
df_by_country1 = bq.read_bq_table_sql(client, UK_spending_by_country1)
df_by_country1

In [None]:
df_by_country1.to_csv('UK_Spending_Online_By_Country1.csv')

In [None]:
#UK Abroad Spending Total Full Table

UK_spending_by_method = '''SELECT * FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` where time_period = 'Quarter' and merchant_channel = 'All' and cardholder_origin_country = 'All' and cardholder_origin = 'UNITED KINGDOM' and destination_country != 'UNITED KINGDOM' and mcc = 'All' and mcg = 'All'
ORDER BY time_period_value DESC'''
df_by_method = bq.read_bq_table_sql(client, UK_spending_by_method)
df_by_method

In [None]:
df_by_method.to_csv('UK_Spending_Online_By_Method.csv')

In [None]:
#UK Abroad Online Spending Total Australia Full Table

UK_spending_by_dcaustralia = '''SELECT * FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`    
where time_period = 'Quarter' and merchant_channel = 'Online' and cardholder_origin_country = 'All' and cardholder_origin = 'UNITED KINGDOM' and destination_country = 'AUSTRALIA' and mcc = 'All' and mcg = 'All'   
ORDER BY time_period_value DESC'''   
df_by_dcaustralia = bq.read_bq_table_sql(client, UK_spending_by_dcaustralia)   
df_by_dcaustralia  

In [None]:
df_by_dcaustralia.to_csv('UK_Spending_Online_By_DestinationCountryAustralia.csv') 

In [None]:
#UK Domestic Online Spending Total Full Table

UK_spending_by_dcuk = '''SELECT * FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`    
where time_period = 'Quarter' and merchant_channel = 'Online' and cardholder_origin_country = 'All' and cardholder_origin = 'UNITED KINGDOM' and destination_country = 'UNITED KINGDOM' and mcc = 'All' and mcg = 'All'   
ORDER BY time_period_value DESC'''   
df_by_dcuk = bq.read_bq_table_sql(client, UK_spending_by_dcuk)   
df_by_dcuk  

In [None]:
df_by_dcuk.to_csv('UK_Spending_Online_By_DestinationCountryUK.csv') 

In [None]:
#UK Domestic Online Spending Total Full Table - cardholder_origin_country NOT IN ('POSTAL_AREA', 'POSTAL_DISTRICT', 'POSTAL_SECTOR') 

UK_spending_by_oc = '''SELECT * FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`     
where time_period = 'Quarter' and merchant_channel = 'Online' and cardholder_origin_country NOT IN ('POSTAL_AREA', 'POSTAL_DISTRICT', 'POSTAL_SECTOR') and cardholder_origin = 'UNITED KINGDOM' and destination_country = 'UNITED KINGDOM' and mcc = 'All' and mcg = 'All'    
ORDER BY time_period_value DESC'''    
df_by_oc = bq.read_bq_table_sql(client, UK_spending_by_oc)    
df_by_oc  

In [None]:
df_by_oc.to_csv('UK_Spending_Online_By_oc.csv') 

In [None]:
#Sample mcc with B2B %

UK_spending_by_mcc = '''SELECT
    mcc,
    SUM(spend) AS total_spend,
    (SUM(spend) * 100.0 / (SELECT SUM(spend) 
                                          FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
                                          WHERE mcc IN (
                                              'ACCOUNTANTS/AUDITORS/BOOKPR',
                                              'ADVERTISING SERVICES',
                                              'ARCHITECTURAL/ENG/SURVEY',
                                              'BUSINESS SERVICES - DEFAULT',
                                              'CONTRACTORS - CONCRETE',
                                              'EMPLOYMENT/TEMP HELP AGEN',
                                              'MGMT/CONSULT/PUBLIC REL SER',
                                              'PROFESSIONAL SERVICES - DEF',
                                              'WHOLESALE CLUBS',
                                              'TRANSPORTATION SVCS - DEFAULT') ) ) AS b2b_percentage
FROM 
    `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
WHERE 
    mcc IN (
        'ACCOUNTANTS/AUDITORS/BOOKPR',
        'ADVERTISING SERVICES',
        'ARCHITECTURAL/ENG/SURVEY',
        'BUSINESS SERVICES - DEFAULT',
        'CONTRACTORS - CONCRETE',
        'EMPLOYMENT/TEMP HELP AGEN',
        'MGMT/CONSULT/PUBLIC REL SER',
        'PROFESSIONAL SERVICES - DEF',
        'WHOLESALE CLUBS',
        'TRANSPORTATION SVCS - DEFAULT') and time_period = 'Quarter' and merchant_channel = 'Online'
GROUP BY 
    mcc'''
df_by_mcc = bq.read_bq_table_sql(client, UK_spending_by_mcc)    
df_by_mcc 

In [None]:
df_by_mcc.to_csv('UK_Spending_Online_By_mccb2b.csv') 

In [None]:
#mcc Goods initial filter with B2B %

UK_spending_by_mccgoods = '''SELECT 
    mcc, 
    SUM(spend) AS total_spend, 
    (SUM(spend) * 100.0 / (SELECT SUM(spend)  
                                          FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
                                          WHERE mcc IN ( 
'ANTIQUE SHOPS', 
'AUTOMOTIVE PARTS STORES', 
'AUTOMOTIVE TIRE STORES', 
'BAKERIES', 
'BOOK STORES', 
'CANDY/NUT/CONFECTION STORES',
'CAR & TRUCK DEALERS/NEW/USED', 
'CAR & TRUCK DEALERS/USED ONLY', 
'CAMERA & PHOTO SUPPLY STORES', 
'CATERERS’, ‘COSMETIC STORES', 
'DEPARTMENT STORES', 
'ELECTRONICS STORES', 
'FABRIC STORES', 
'FLOOR COVERING STORES', 
'FLORISTS', 
'GLASS/PAINT/WALLPAPER STORES', 
'GIFT, CARD, NOVELTY STORES', 
'HOUSEHOLD APPLIANCE STORES', 
'JEWELRY STORES', 
'LUMBER/BUILD. SUPPLY STORES', 
'MISC GENERAL MERCHANDISE', 
'MISC HOME FURNISHING SPECIALTY', 
'MISC SPECIALTY RETAIL', 
'MOBILE HOME DEALERS', 
'MOTOR VEHICLE SUPPLY/NEW PARTS', 
'MOTORCYCLE DEALERS', 
'MUSIC STORES/PIANOS', 
'PET STORES/FOOD & SUPPLY', 
'PHOTO STUDIOS', 
'PLUMBING/HEATING EQUIPMENT') ) ) AS b2b_percentage 
FROM  
    `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
WHERE  
    mcc IN ( 
'ANTIQUE SHOPS', 
'AUTOMOTIVE PARTS STORES', 
'AUTOMOTIVE TIRE STORES', 
'BAKERIES', 
'BOOK STORES', 
'CANDY/NUT/CONFECTION STORES',
'CAR & TRUCK DEALERS/NEW/USED', 
'CAR & TRUCK DEALERS/USED ONLY', 
'CAMERA & PHOTO SUPPLY STORES', 
'CATERERS’, ‘COSMETIC STORES', 
'DEPARTMENT STORES', 
'ELECTRONICS STORES', 
'FABRIC STORES', 
'FLOOR COVERING STORES', 
'FLORISTS', 
'GLASS/PAINT/WALLPAPER STORES', 
'GIFT, CARD, NOVELTY STORES', 
'HOUSEHOLD APPLIANCE STORES', 
'JEWELRY STORES', 
'LUMBER/BUILD. SUPPLY STORES', 
'MISC GENERAL MERCHANDISE', 
'MISC HOME FURNISHING SPECIALTY', 
'MISC SPECIALTY RETAIL', 
'MOBILE HOME DEALERS', 
'MOTOR VEHICLE SUPPLY/NEW PARTS', 
'MOTORCYCLE DEALERS', 
'MUSIC STORES/PIANOS', 
'PET STORES/FOOD & SUPPLY', 
'PHOTO STUDIOS', 
'PLUMBING/HEATING EQUIPMENT') and time_period = 'Quarter' and merchant_channel = 'Online' 
GROUP BY  
    mcc''' 
df_by_mccgoods = bq.read_bq_table_sql(client, UK_spending_by_mccgoods)     
df_by_mccgoods

In [None]:
#mcg ='B2B'  UK Domestic Online Spending All full table

UK_spending_by_dcukmcgb2b = '''SELECT * FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`    
where time_period = 'Quarter' and merchant_channel = 'Online' and cardholder_origin_country = 'All' and cardholder_origin = 'UNITED KINGDOM' and destination_country = 'UNITED KINGDOM' and mcc = 'All' and mcg = 'BUSINESS TO BUSINESS'   
ORDER BY time_period_value DESC'''   
df_by_dcukmcgb2b = bq.read_bq_table_sql(client, UK_spending_by_dcukmcgb2b)   
df_by_dcukmcgb2b  

In [None]:
df_by_dcukmcgb2b.to_csv('UK_Spending_Online_By_dcukmcgB2B.csv') 