In [None]:
project_path = "/home/jupyter"
import os
import sys
sys.path.append(project_path)
from google.cloud import bigquery, storage

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px

from fintrans_toolbox.src import bq_utils as bq

client = bigquery.Client()

In [None]:
# Summarise the data by UK Cardholder Spending All ---- Quarterly

UK_spending_All_Quarter = '''SELECT time_period_value, SUM(spend) AS total_spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcc = 'All' 
and mcg = 'All' 
and merchant_channel = 'All' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM'  
GROUP BY
time_period_value 
ORDER BY time_period_value'''
df_by_All = bq.read_bq_table_sql(client, UK_spending_All_Quarter)
df_by_All.head()

print(df_by_All)

df_by_All.to_csv('UK_spending_All_Quarter.csv')

In [None]:
# Summarise the data by UK Cardholder Spending All

UK_spending_All = '''SELECT time_period_value, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcc = 'All' 
and mcg = 'All' 
and merchant_channel = 'All' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM'  
GROUP BY destination_country, 
time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''
df_by_All = bq.read_bq_table_sql(client, UK_spending_All)
df_by_All.head()

#Caculate UK Total Spending

import pandas as pd

# Assuming df_by_Dom_All is the DataFrame with your data
# Ensure 'time_period_value' is a string type and split it to get the year (assuming 'Q1', 'Q2', etc., are part of the time_period_value)

# Extract the year from the time_period_value (assuming it's in the format like '2023-Q1', '2023-Q2', etc.)
df_by_All['year'] = df_by_All['time_period_value'].str[:4].astype(int)

# Now group by year and sum the spend for each year
df_yearly_All = df_by_All.groupby('year')['spend'].sum().reset_index()

# Optionally, you can sort the result by year
df_yearly_All = df_yearly_All.sort_values(by='year')

# Display the yearly totals
print(df_yearly_All)

df_yearly_All.to_csv('UK_spending_by_yearly_All.csv')

In [None]:
# Summarise the data by UK Cardholder Domestic Spending All

UK_spending_by_Dom_All = '''SELECT time_period_value, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcc = 'All' 
and mcg = 'All' 
and merchant_channel = 'All' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
and destination_country = 'UNITED KINGDOM' 
GROUP BY destination_country, 
time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''
df_by_Dom_All = bq.read_bq_table_sql(client, UK_spending_by_Dom_All)
df_by_Dom_All.head()

In [None]:
#Caculate UK Domestic Total Spending

import pandas as pd

# Assuming df_by_Dom_All is the DataFrame with your data
# Ensure 'time_period_value' is a string type and split it to get the year (assuming 'Q1', 'Q2', etc., are part of the time_period_value)

# Extract the year from the time_period_value (assuming it's in the format like '2023-Q1', '2023-Q2', etc.)
df_by_Dom_All['year'] = df_by_Dom_All['time_period_value'].str[:4].astype(int)

# Now group by year and sum the spend for each year
df_yearly_Dom_All = df_by_Dom_All.groupby('year')['spend'].sum().reset_index()

# Optionally, you can sort the result by year
df_yearly_Dom_All = df_yearly_Dom_All.sort_values(by='year')

# Display the yearly totals
print(df_yearly_Dom_All)

In [None]:
df_yearly_Dom_All.to_csv('UK_spending_by_yearly_Dom_All.csv')

In [None]:
# Summarise the data by UK Cardholder Domestic Household Spending Total

UK_spending_by_HH_Dom_All = '''SELECT time_period_value, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcc = 'All' 
and mcg != 'All'
and mcg != 'BUSINESS TO BUSINESS' 
and merchant_channel = 'All' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
and destination_country = 'UNITED KINGDOM' 
GROUP BY destination_country, 
time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''
df_by_HH_Dom_All = bq.read_bq_table_sql(client, UK_spending_by_HH_Dom_All)
df_by_HH_Dom_All = df_by_HH_Dom_All.rename(columns={'spend': 'domestic_spend_HH'})
df_by_HH_Dom_All.head()

# Extract the year from the time_period_value (assuming it's in format like '2023-Q1')
df_by_HH_Dom_All['year'] = df_by_HH_Dom_All['time_period_value'].str[:4].astype(int)

# Group by year and sum the online spend for each year
df_yearly_HH_Dom_All = df_by_HH_Dom_All.groupby('year')['domestic_spend_HH'].sum().reset_index()

# Optionally, sort the result by year
df_yearly_HH_Dom_All = df_yearly_HH_Dom_All.sort_values(by='year')

# Display the result
print(df_yearly_HH_Dom_All)

In [None]:
# Summarise the data by UK Cardholder Domestic Online Spending All

UK_spending_by_Dom_Online_All = '''SELECT time_period_value, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcg = 'All' 
and merchant_channel = 'Online' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
and destination_country = 'UNITED KINGDOM' 
GROUP BY destination_country, 
time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''
df_by_Dom_Online_All = bq.read_bq_table_sql(client, UK_spending_by_Dom_Online_All)
# Rename the 'spend' column to 'online_spend'
df_by_Dom_Online_All = df_by_Dom_Online_All.rename(columns={'spend': 'dom_online_spend'})
df_by_Dom_Online_All.head()


In [None]:
# Yearly UK Cardholder Domestic Online Spending All

import pandas as pd

# Assuming df_by_Dom_Online_All is the DataFrame with your data
# Ensure 'time_period_value' is a string type and split it to get the year (assuming 'Q1', 'Q2', etc., are part of the time_period_value)

# Extract the year from the time_period_value (assuming it's in the format like '2023-Q1', '2023-Q2', etc.)
df_by_Dom_Online_All['year'] = df_by_Dom_Online_All['time_period_value'].str[:4].astype(int)

# Now group by year and sum the spend for each year
df_yearly_Dom_Online_All = df_by_Dom_Online_All.groupby('year')['dom_online_spend'].sum().reset_index()

# Optionally, you can sort the result by year
df_yearly_Dom_Online_All = df_yearly_Dom_Online_All.sort_values(by='year')

# Display the yearly totals
print(df_yearly_Dom_Online_All)

In [None]:
df_yearly_Dom_Online_All.to_csv('UK_yearly_Dom_Online_All.csv')

In [None]:
# Display ----------------------- Why only to 2023? Not 2025?

import pandas as pd

# Read the online spend CSV
df_total_spend = pd.read_csv('UK_spending_by_yearly_Dom_All.csv')

# Read the total spend CSV
df_online_spend = pd.read_csv('UK_yearly_Dom_Online_All.csv')

# Display the first few rows of each DataFrame to check the structure
print(df_total_spend.head())
print(df_online_spend.head())

In [None]:
# Merge the two DataFrames on 'year'
merged_spend = pd.merge(df_online_spend[['year', 'dom_online_spend']], df_total_spend[['year', 'spend']], on='year', how='inner')

# Display the merged DataFrame to verify
print(merged_spend.head())


In [None]:
# Calculate the online spend ratio (as a percentage)
merged_spend['dom_online_spend_ratio'] = (merged_spend['dom_online_spend'] / merged_spend['spend']) * 100

# Display the DataFrame with the new ratio column
print(merged_spend[['year', 'dom_online_spend_ratio']])


In [None]:
# Save the result to a new CSV file - UK Domestic Online Spending Ratio
merged_spend.to_csv('UK_yearly_dom_online_spend_ratio.csv', index=False)

# Display a success message
print("The online spend ratio has been saved to 'UK_yearly_dom_online_spend_ratio.csv'.")


In [None]:
project_path = "/home/jupyter"
import os
import sys
sys.path.append(project_path)

from google.cloud import bigquery
import importlib
import plotly.express as px

import numpy as np
import pandas as pd
from datetime import datetime

import ft_digital_trade.src.utils.read_data as read_utils
import ft_digital_trade.src.utils.clean_utils as clean_utils
import ft_digital_trade.src.utils.calculation_utils as calc_utils
import ft_digital_trade.src.utils.plot_utils as plot_utils

client = bigquery.Client()

visa_data = read_utils.read_visa(cardholder_origin = "uk", cardholders_location = "uk", spend_location = "uk")

visa = calc_utils.calculate_visa(visa_data)
visa = clean_utils.rename_columns(df = visa, suffix = '_spoc')

global_cards = read_utils.read_global_cards()
global_cards = clean_utils.clean_global(global_cards)
global_cards = calc_utils.calculate_global(global_cards, 'card')

global_spend = read_utils.read_global_spend()
global_spend = clean_utils.clean_global(global_spend)
global_spend = calc_utils.calculate_global(global_spend, 'spend')

global_df = global_cards.merge(global_spend, how = 'inner', on = 'year', suffixes = ('_cards', '_spend'))
global_df = clean_utils.rename_columns(df = global_df, suffix = '_global')

uk_finance = read_utils.read_uk_finance()
uk_finance = clean_utils.clean_uk_finance(uk_finance)
uk_finance = calc_utils.calculate_uk_finance(uk_finance)
uk_finance = uk_finance[['year', 'cardholders','total value of purchases',"total volume of purchases"]]
uk_finance = clean_utils.rename_columns(df = uk_finance , suffix = '_uk_finance')

boe = read_utils.read_boe()
boe = clean_utils.clean_boe(boe)
boe = calc_utils.calculate_boe(boe)
boe = clean_utils.rename_columns(df = boe , suffix = '_boe')


In [None]:
link = read_utils.read_link()

merged = visa.merge(uk_finance, how = 'outer', on = 'year')
merged = merged.merge(boe, how = 'outer', on = 'year')
merged = merged.merge(global_df, how = 'outer', on = 'year')

cardholders = merged[['year','cardholders_spoc','cardholders_uk_finance','visa_total_cards_global','total_cards_global', 'visa_marketshare_cards_global']]
cardholders = cardholders.copy()
cardholders['uk_finance_marketshare'] = cardholders['cardholders_spoc'] / cardholders['cardholders_uk_finance'] *100
cardholders['global_marketshare'] = cardholders['cardholders_spoc'] / cardholders['total_cards_global'] *100
#melt df for charts
cardholders = pd.melt(cardholders, id_vars='year',var_name='Data source', value_name='value')
cardholders = calc_utils.calculate_index(df = cardholders)

spend = merged[['year','spend_spoc', 
        'total value of purchases_uk_finance',
       'Mastercard values_boe', 'Visa Europe values_boe',
       'Mastercard and Visa values_boe', 'Visa proportion_boe',
       'debit_spend_global', 'credit_spend_global', 'visa_total_spend_global',
       'total_spend_global', 'visa_marketshare_spend_global']]
spend = spend.copy()
# #replace 2024 spending with NA
spend['spend_spoc'] = np.where(spend['year']==2025, np.nan, spend['spend_spoc'])
spend['total value of purchases_uk_finance'] = np.where(spend['year']==2025, np.nan, spend['total value of purchases_uk_finance'])
#calculate marketshare
spend['uk_finance_marketshare'] = spend['spend_spoc'] / spend['total value of purchases_uk_finance'] *100
spend['global_marketshare'] = spend['spend_spoc'] / spend['total_spend_global'] *100
spend['boe_marketshare'] = spend['spend_spoc'] / spend['Mastercard and Visa values_boe'] *100
#copy used for getting 2019 marketshare
spend_copy = spend.copy()
#melt df for charts
spend = pd.melt(spend, id_vars='year',var_name='Data source', value_name='value')
spend = calc_utils.calculate_index(df = spend)

plot_utils.plot_total_cardholders(df = cardholders)


In [None]:
#options of marketshare threshold
# marketshare_2019 = spend_copy.iloc[0]['visa_marketshare_spend_global']
# marketshare_2019 = spend_copy.iloc[0]['global_marketshare']
marketshare_2019 = spend_copy.iloc[0]['uk_finance_marketshare']
marketshare_2019

df = merged.copy() 
#remove 2025 due to incomplete data
df = df[df['year'] != 2025]
#index spoc data
df['idx_cardholders_spoc'] = df['cardholders_spoc'].transform(lambda x: (x / x.iloc[0] * 100))
df['idx_spend_spoc'] = df['spend_spoc'].transform(lambda x: (x / x.iloc[0] * 100))
# adjust visa spend to 2019 cardholders (assume same number of cardholders each year)
df['visa_adj_spend_spoc'] = (df['spend_spoc']/df['idx_cardholders_spoc'])*100
df['total_spoc'] = df['visa_adj_spend_spoc'] / marketshare_2019 *100
#rename columns
df = df.rename(columns={'total value of purchases_uk_finance': 'total_uk_finance'})
df = df.rename(columns={'visa_total_spend_global': 'visa_total_global','total_spend_global':'total_global'})
df = df.rename(columns={'Visa Europe values_boe': 'visa_total_boe', 'Mastercard and Visa values_boe': 'total_boe'})
#filter columns
df = df[['year', 'visa_adj_spend_spoc', 'total_spoc', 'visa_total_global', 'total_global', 'total_uk_finance', 'visa_total_boe', 'total_boe' ]]
#melt df
df = pd.melt(df, id_vars='year',var_name='Data source', value_name='value')
df = calc_utils.calculate_index(df = df)

# total_uk_finance: Total spend in the UK finance sector.
# visa_total_global: Total global spend for Visa.
# total_global: Total global spend (presumably for both Visa and Mastercard).
# visa_total_boe: Total spend for Visa in the Bank of England (BOE) area.
# total_boe: Total spend in the BOE area, for both Visa and Mastercard.

In [None]:
import pandas as pd

# Assuming 'df' is your DataFrame after transformation and melting
# Filter the DataFrame for the relevant columns (those you want to display)
data_sources = ['total_uk_finance', 'visa_total_global', 'total_global', 'visa_total_boe', 'total_boe']

# Filter rows where 'Data source' is one of the specified sources
df_filtered = df[df['Data source'].isin(data_sources)]

# Pivot the DataFrame to get each data source as columns, showing values by year
df_pivot = df_filtered.pivot_table(index='year', columns='Data source', values='value', aggfunc='first')

# Display the result
print(df_pivot)

In [None]:
# Save total_boe separetely in a cvs file

import pandas as pd

# Assuming 'df' is already defined in the user's environment and contains the melted data
# Filter for 'total_boe' entries
df_total_boe = df[df['Data source'] == 'total_boe'][['year', 'value']].rename(columns={'value': 'total_boe'})

# Save to CSV for later use
df_total_boe.to_csv('total_boe_values.csv', index=False)

# Display the saved DataFrame
print(df_total_boe)


In [None]:
# Calculate total_boe 2019 and 2020 estimations based on previous years' growth rate

import pandas as pd

# Load the total_boe_values.csv file
df_boe = pd.read_csv('total_boe_values.csv')

# Ensure the data is sorted by year
df_boe = df_boe.sort_values('year').reset_index(drop=True)

# Extract known values from 2021 to 2024
known_values = df_boe[df_boe['total_boe'].notna()]
known_values = known_values[(known_values['year'] >= 2021) & (known_values['year'] <= 2024)]

# Calculate average annual growth rate
known_values = known_values.sort_values('year')
growth_rates = known_values['total_boe'].pct_change().dropna()
average_growth_rate = growth_rates.mean()

# Estimate 2020 and 2019 using reverse growth
value_2021 = df_boe.loc[df_boe['year'] == 2021, 'total_boe'].values[0]
value_2020 = value_2021 / (1 + average_growth_rate)
value_2019 = value_2020 / (1 + average_growth_rate)

# Update the DataFrame with estimated values
df_boe.loc[df_boe['year'] == 2020, 'total_boe'] = value_2020
df_boe.loc[df_boe['year'] == 2019, 'total_boe'] = value_2019

# Save the updated DataFrame to a new CSV file
df_boe.to_csv('updated_total_boe.csv', index=False)

# Display the updated DataFrame
df_boe



In [None]:
import pandas as pd

# Load all required CSV files
df_total_boe = pd.read_csv("updated_total_boe.csv")
df_total_global = pd.read_csv("total_global.csv")
df_total_uk_finance = pd.read_csv("total_uk_finance.csv")
df_visa_total_boe = pd.read_csv("visa_total_boe.csv")
df_visa_total_global = pd.read_csv("visa_total_global.csv")

# Inspect column names to ensure correct renaming
print("updated_total_boe.csv columns:", df_total_boe.columns)
print("total_global.csv columns:", df_total_global.columns)
print("total_uk_finance.csv columns:", df_total_uk_finance.columns)
print("visa_total_boe.csv columns:", df_visa_total_boe.columns)
print("visa_total_global.csv columns:", df_visa_total_global.columns)



In [None]:
project_path = "/home/jupyter"
import os
import sys
sys.path.append(project_path)
from google.cloud import bigquery, storage

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px

from fintrans_toolbox.src import bq_utils as bq

client = bigquery.Client()

# Summarise the data by UK Cardholder B2B Online Abroad Total

UK_spending_by_B2B_online_Intl = '''SELECT time_period_value, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcc = 'All' 
and mcg = 'BUSINESS TO BUSINESS' 
and merchant_channel = 'Online' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
and destination_country != 'UNITED KINGDOM' 
GROUP BY destination_country, 
time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''
df_by_B2B_online_Intl = bq.read_bq_table_sql(client, UK_spending_by_B2B_online_Intl)
df_by_B2B_online_Intl = df_by_B2B_online_Intl.rename(columns={'spend': 'Intl_online_B2B'})
df_by_B2B_online_Intl.head()

# UK Cardholder Abroad Online Spend B2B Total

# Extract the year from the time_period_value (assuming it's in format like '2023-Q1')
df_by_B2B_online_Intl['year'] = df_by_B2B_online_Intl['time_period_value'].str[:4].astype(int)

# Group by year and sum the online spend for each year
df_yearly_B2B_online_Intl = df_by_B2B_online_Intl.groupby('year')['Intl_online_B2B'].sum().reset_index()

# Optionally, sort the result by year
df_yearly_B2B_online_Intl = df_yearly_B2B_online_Intl.sort_values(by='year')

# Display the result
print(df_yearly_B2B_online_Intl)

In [None]:
# Summarise the data by UK Cardholder Abroad Online Spending All 

UK_spending_by_online_Intl_All = '''SELECT time_period_value, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcg = 'All' 
and merchant_channel = 'Online' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
and destination_country != 'UNITED KINGDOM' 
GROUP BY destination_country, 
time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''
df_by_online_Intl_All = bq.read_bq_table_sql(client, UK_spending_by_online_Intl_All)
df_by_online_Intl_All = df_by_online_Intl_All.rename(columns={'spend': 'Intl_online_all'})
df_by_online_Intl_All.head()

# Summarised in Yearly Spending

# Extract the year from the time_period_value (assuming it's in format like '2023-Q1')
df_by_online_Intl_All['year'] = df_by_online_Intl_All['time_period_value'].str[:4].astype(int)

# Group by year and sum the online spend for each year
df_yearly_online_Intl_All = df_by_online_Intl_All.groupby('year')['Intl_online_all'].sum().reset_index()

# Optionally, sort the result by year
df_yearly_online_Intl_All = df_yearly_online_Intl_All.sort_values(by='year')

# Display the result
print(df_yearly_online_Intl_All)

df_yearly_online_Intl_All.to_csv('yearly_online_Intl_All.csv')

In [None]:
# UK Cardholder Abroad Online spending ratio

import pandas as pd

# Load the data from CSV files
df_yearly_online_Intl_All = pd.read_csv("yearly_online_Intl_All.csv")
df_yearly_All = pd.read_csv("UK_spending_by_yearly_All.csv")

# Merge the two dataframes on 'year'
df_merged = pd.merge(df_yearly_All, df_yearly_online_Intl_All, on='year')

# Calculate the abroad spending ratio
df_merged['UK_Abroad_online_Spending_Ratio'] = df_merged['Intl_online_all'] / df_merged['spend']

# Display the result
print(df_merged[['year', 'UK_Abroad_online_Spending_Ratio']])


# Save the result to a new CSV file
df_merged.to_csv("UK_Abroad_online_Spending_Ratio.csv", index=False)

# Display the result
df_merged


In [None]:
#Calculate UK Cardholder Household Online Abroad Total

# Merge the two dataframes on the 'year' column
df_yearly_online_HH_Intl = pd.merge(df_yearly_online_Intl_All, df_yearly_B2B_online_Intl, on='year', how='inner')

# Create a new column for the difference between the two spending values
df_yearly_online_HH_Intl['Intl_online_household'] = df_yearly_online_HH_Intl['Intl_online_all'] - df_yearly_online_HH_Intl['Intl_online_B2B']

# Optionally, keep only the relevant columns
df_yearly_online_HH_Intl = df_yearly_online_HH_Intl[['year', 'Intl_online_household']]

# Display the resulting table
print(df_yearly_online_HH_Intl)


In [None]:
# Summarise the data by UK Cardholder Abroad Spending All Quarterly

UK_spending_by_Intl_All = '''SELECT time_period_value, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcc = 'All' 
and mcg = 'All' 
and merchant_channel = 'All' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
and destination_country != 'UNITED KINGDOM' 
GROUP BY destination_country, 
time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''
df_by_Intl_All = bq.read_bq_table_sql(client, UK_spending_by_Intl_All)
df_by_Intl_All = df_by_Intl_All.rename(columns={'spend': 'abroad_spend_all'})
df_by_Intl_All.head()

# Extract the year from the time_period_value (assuming it's in format like '2023-Q1')
# df_by_Intl_All['year'] = df_by_Intl_All['time_period_value'].str[:4].astype(int)

# Group by year and sum the online spend for each year
df_Quarterly_Intl_All = df_by_Intl_All.groupby('time_period_value')['abroad_spend_all'].sum().reset_index()

# Optionally, sort the result by year
df_Quarterly_Intl_All = df_Quarterly_Intl_All.sort_values(by='time_period_value')

# Display the result
print(df_Quarterly_Intl_All)

df_Quarterly_Intl_All.to_csv('Quarterly_Intl_All.csv')

In [None]:
# Summarise the data by UK Cardholder B2B Abroad All Quarterly 
UK_spending_by_B2B_Intl_All = '''SELECT time_period_value, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcc = 'All' 
and mcg = 'BUSINESS TO BUSINESS' 
and merchant_channel = 'All' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
and destination_country != 'UNITED KINGDOM' 
GROUP BY destination_country, 
time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''
df_by_B2B_Intl_All = bq.read_bq_table_sql(client, UK_spending_by_B2B_Intl_All)
df_by_B2B_Intl_All = df_by_B2B_Intl_All.rename(columns={'spend': 'Intl_B2B'})
df_by_B2B_Intl_All.head()

# UK Cardholder Yearly B2B Abroad Total

# Extract the year from the time_period_value (assuming it's in format like '2023-Q1')
# df_by_B2B_Intl_All['year'] = df_by_B2B_Intl_All['time_period_value'].str[:4].astype(int)

# Group by year and sum the online spend for each year
df_Quarterly_B2B_Intl_All = df_by_B2B_Intl_All.groupby('time_period_value')['Intl_B2B'].sum().reset_index()

# Optionally, sort the result by year
df_Quarterly_B2B_Intl_All = df_Quarterly_B2B_Intl_All.sort_values(by='time_period_value')

# Display the result
print(df_Quarterly_B2B_Intl_All)

In [None]:
#Calculate Household Abroad Total 2 Quarterly TEST --------------- USED This Method FOR NOW -----------------??????????????????????????????????

# Merge the two dataframes on the 'year' column
df_Quarterly_spend_abroad_household = pd.merge(df_Quarterly_Intl_All, df_Quarterly_B2B_Intl_All, on='time_period_value', how='inner')

# Create a new column for the difference between the two spending values
df_Quarterly_spend_abroad_household['spend_abroad_household'] = df_Quarterly_Intl_All['abroad_spend_all'] - df_Quarterly_B2B_Intl_All['Intl_B2B']

# Optionally, keep only the relevant columns
df_Quarterly_spend_abroad_household = df_Quarterly_spend_abroad_household[['time_period_value', 'spend_abroad_household']]

# Display the resulting table
print(df_Quarterly_spend_abroad_household)

In [None]:
# Summarise the data by UK Card Holder Household Abroad All 1 -------Queaterly TEST --------------------------------??????????????????????????????????

UK_spending_by_HH_Intl = '''SELECT time_period_value, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcc = 'All' 
and mcg NOT IN ("BUSINESS TO BUSINESS")
and merchant_channel = 'All' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
and destination_country != 'UNITED KINGDOM' 
GROUP BY destination_country, 
time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''
df_by_HH_Intl = bq.read_bq_table_sql(client, UK_spending_by_HH_Intl)
df_by_HH_Intl = df_by_HH_Intl.rename(columns={'spend': 'HH_Intl'})
df_by_HH_Intl.head()

# Extract the year from the time_period_value (assuming it's in format like '2023-Q1')
# df_by_HH_Intl['year'] = df_by_HH_Intl['time_period_value'].str[:4].astype(int)

# Group by year and sum the online spend for each year
df_Quarterly_HH_Intl = df_by_HH_Intl.groupby('time_period_value')['HH_Intl'].sum().reset_index()

# Optionally, sort the result by year
df_Quarterly_HH_Intl = df_Quarterly_HH_Intl.sort_values(by='time_period_value')

# Display the result
print(df_Quarterly_HH_Intl)

In [None]:
#Calculate Household Abroad Total (2 - 1) -----Quarterly TEST ---------- Compare Method 2 and 1 -----------------??????????????????????????????????

# Merge the two dataframes on the 'year' column
df_Compare_Quarterly_spend_abroad_household = pd.merge(df_Quarterly_spend_abroad_household, df_Quarterly_HH_Intl, on='time_period_value', how='inner')

# Create a new column for the difference between the two spending values
df_Compare_Quarterly_spend_abroad_household['spend_abroad_household'] = df_Quarterly_spend_abroad_household['spend_abroad_household'] - df_Quarterly_HH_Intl['HH_Intl']

# Optionally, keep only the relevant columns
df_Compare_Quarterly_spend_abroad_household = df_Compare_Quarterly_spend_abroad_household[['time_period_value', 'spend_abroad_household']]

# Display the resulting table
print(df_Compare_Quarterly_spend_abroad_household)

In [None]:
# Summarise the data by UK Cardholder Abroad Spending All

UK_spending_by_Intl_All = '''SELECT time_period_value, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcc = 'All' 
and mcg = 'All' 
and merchant_channel = 'All' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
and destination_country != 'UNITED KINGDOM' 
GROUP BY destination_country, 
time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''
df_by_Intl_All = bq.read_bq_table_sql(client, UK_spending_by_Intl_All)
df_by_Intl_All = df_by_Intl_All.rename(columns={'spend': 'abroad_spend_all'})
df_by_Intl_All.head()

# Extract the year from the time_period_value (assuming it's in format like '2023-Q1')
df_by_Intl_All['year'] = df_by_Intl_All['time_period_value'].str[:4].astype(int)

# Group by year and sum the online spend for each year
df_yearly_Intl_All = df_by_Intl_All.groupby('year')['abroad_spend_all'].sum().reset_index()

# Optionally, sort the result by year
df_yearly_Intl_All = df_yearly_Intl_All.sort_values(by='year')

# Display the result
print(df_yearly_Intl_All)

df_yearly_Intl_All.to_csv('yearly_Intl_All.csv')

In [None]:
# UK Cardholder Abroad spending ratio

import pandas as pd

# Load the data from CSV files
df_yearly_Intl_All = pd.read_csv("yearly_Intl_All.csv")
df_yearly_All = pd.read_csv("UK_spending_by_yearly_All.csv")

# Merge the two dataframes on 'year'
df_merged = pd.merge(df_yearly_All, df_yearly_Intl_All, on='year')

# Calculate the abroad spending ratio
df_merged['UK_Abroad_Spending_Ratio'] = df_merged['abroad_spend_all'] / df_merged['spend']

# Display the result
print(df_merged[['year', 'UK_Abroad_Spending_Ratio']])


# Save the result to a new CSV file
df_merged.to_csv("UK_Abroad_Spending_Ratio.csv", index=False)

# Display the result
df_merged


In [None]:
# Summarise the data by UK Cardholder B2B Abroad All
UK_spending_by_B2B_Intl_All = '''SELECT time_period_value, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcc = 'All' 
and mcg = 'BUSINESS TO BUSINESS' 
and merchant_channel = 'All' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
and destination_country != 'UNITED KINGDOM' 
GROUP BY destination_country, 
time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''
df_by_B2B_Intl_All = bq.read_bq_table_sql(client, UK_spending_by_B2B_Intl_All)
df_by_B2B_Intl_All = df_by_B2B_Intl_All.rename(columns={'spend': 'Intl_B2B'})
df_by_B2B_Intl_All.head()

# UK Cardholder Yearly B2B Abroad Total

# Extract the year from the time_period_value (assuming it's in format like '2023-Q1')
df_by_B2B_Intl_All['year'] = df_by_B2B_Intl_All['time_period_value'].str[:4].astype(int)

# Group by year and sum the online spend for each year
df_yearly_B2B_Intl_All = df_by_B2B_Intl_All.groupby('year')['Intl_B2B'].sum().reset_index()

# Optionally, sort the result by year
df_yearly_B2B_Intl_All = df_yearly_B2B_Intl_All.sort_values(by='year')

# Display the result
print(df_yearly_B2B_Intl_All)

In [None]:
# Summarise the data by UK Card Holder Household Abroad All 1 ---------------------------------------??????????????????????????????????

UK_spending_by_HH_Intl = '''SELECT time_period_value, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcc = 'All' 
and mcg NOT IN ("BUSINESS TO BUSINESS", "All")
and merchant_channel = 'All' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
and destination_country != 'UNITED KINGDOM' 
GROUP BY destination_country, 
time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''
df_by_HH_Intl = bq.read_bq_table_sql(client, UK_spending_by_HH_Intl)
df_by_HH_Intl = df_by_HH_Intl.rename(columns={'spend': 'HH_Intl'})
df_by_HH_Intl.head()

# Extract the year from the time_period_value (assuming it's in format like '2023-Q1')
df_by_HH_Intl['year'] = df_by_HH_Intl['time_period_value'].str[:4].astype(int)

# Group by year and sum the online spend for each year
df_yearly_HH_Intl = df_by_HH_Intl.groupby('year')['HH_Intl'].sum().reset_index()

# Optionally, sort the result by year
df_yearly_HH_Intl = df_yearly_HH_Intl.sort_values(by='year')

# Display the result
print(df_yearly_HH_Intl)

In [None]:
# Summarise the data by UK Card Holder Household Abroad All 1 ---------------------------------------??????????????????????????????????

UK_spending_by_HH_Intl = '''SELECT time_period_value, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcc = 'All' 
and mcg != 'BUSINESS TO BUSINESS' 
and mcg != 'All'
and merchant_channel = 'All' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
and destination_country != 'UNITED KINGDOM' 
GROUP BY destination_country, 
time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''
df_by_HH_Intl = bq.read_bq_table_sql(client, UK_spending_by_HH_Intl)
df_by_HH_Intl = df_by_HH_Intl.rename(columns={'spend': 'HH_Intl'})
df_by_HH_Intl.head()

# Extract the year from the time_period_value (assuming it's in format like '2023-Q1')
df_by_HH_Intl['year'] = df_by_HH_Intl['time_period_value'].str[:4].astype(int)

# Group by year and sum the online spend for each year
df_yearly_HH_Intl = df_by_HH_Intl.groupby('year')['HH_Intl'].sum().reset_index()

# Optionally, sort the result by year
df_yearly_HH_Intl = df_yearly_HH_Intl.sort_values(by='year')

# Display the result
print(df_yearly_HH_Intl)

In [None]:
# Calculate Household Abroad Spend Ratio 1
#Assuming df_yearly_spend_abroad_household and df_yearly_spend_domestic_household are already defined as dataframes with 'year' and 'spend' columns

# Merge both dataframes on 'year' column
df_combined_spend1 = pd.merge(df_yearly_HH_Intl, df_yearly_HH_Dom_All, on='year', how='inner', suffixes=('_abroad', '_domestic'))

# Calculate the Abroad Household Spend Ratio
df_combined_spend1['Abroad_Household_Spend_Ratio 1'] = (df_combined_spend1['HH_Intl'] / 
                                                     (df_combined_spend1['domestic_spend_HH'] + df_combined_spend1['HH_Intl'])) * 100

# Optionally, keep relevant columns for the final result
df_combined_spend1 = df_combined_spend1[['year', 'Abroad_Household_Spend_Ratio 1']]

# Display the resulting table
print(df_combined_spend1)


In [None]:
# Calculate Household Abroad Spend Ratio 2 --------------------- USED This one FOR NOW -----------------------
#Assuming df_yearly_spend_abroad_household and df_yearly_spend_domestic_household are already defined as dataframes with 'year' and 'spend' columns

# Merge both dataframes on 'year' column
df_combined_spend2 = pd.merge(df_yearly_spend_abroad_household, df_yearly_HH_Dom_All, on='year', how='inner', suffixes=('_abroad', '_domestic'))

# Calculate the Abroad Household Spend Ratio
df_combined_spend2['Abroad_Household_Spend_Ratio 2'] = (df_combined_spend2['spend_abroad_household'] / 
                                                     (df_combined_spend2['domestic_spend_HH'] + df_combined_spend2['spend_abroad_household'])) * 100

# Optionally, keep relevant columns for the final result
df_combined_spend2 = df_combined_spend2[['year', 'Abroad_Household_Spend_Ratio 2']]

# Display the resulting table
print(df_combined_spend2)


In [None]:

# Save the DataFrame to a CSV file
df_combined_spend2.to_csv("Abroad_Household_Spend_Ratio 2.csv", index=False)

# Confirm the file was saved
print("Saved Abroad_Household_Spend_Ratio 2.csv successfully.")


In [None]:
import pandas as pd

# Attempt to read the CSV file
file_path = "Abroad_Household_Spend_Ratio 2.csv"

# Load the CSV file into a DataFrame
df_ratio = pd.read_csv(file_path)

# Display the full DataFrame to confirm all rows are loaded
print("Loaded DataFrame:")
print(df_ratio)

# Check the number of rows and if any data is missing
print("\nNumber of rows loaded:", len(df_ratio))
print("\nDataFrame info:")
print(df_ratio.info())



In [None]:
# Calculate UK Cardholder Household Abroad Online Spend Ratio

# First, merge df_yearly_spend_abroad_household with df_yearly_spend_domestic_household
df_HH_Online_Ratio_Intl = pd.merge(df_yearly_online_HH_Intl, df_yearly_Intl_All, on='year', how='inner', suffixes=('_abroad', '_domestic'))

# Calculate the Abroad Household Spend Ratio
df_HH_Online_Ratio_Intl['Abroad_Household_Online_Spend_Ratio'] = (
    (df_yearly_online_HH_Intl['Intl_online_household']) / 
    (df_yearly_Intl_All['abroad_spend_all'])
) * 100

# Optionally, keep relevant columns for the final result
df_HH_Online_Ratio_Intl = df_HH_Online_Ratio_Intl[['year', 'Abroad_Household_Online_Spend_Ratio']]

# Display the resulting table
print(df_HH_Online_Ratio_Intl)


In [None]:

# Save the DataFrame to a CSV file
df_HH_Online_Ratio_Intl.to_csv("Abroad_Household_Online_Spend_Ratio.csv", index=False)

# Confirm the file was saved
print("Saved Abroad_Household_Online_Spend_Ratio.csv successfully.")



In [None]:
# Household Abroad Online Spending by Data Sources 2019 - 2024

import pandas as pd

# Load all required CSV files
df_total_boe = pd.read_csv("updated_total_boe.csv")
df_total_global = pd.read_csv("total_global.csv")
df_total_uk_finance = pd.read_csv("total_uk_finance.csv")
df_visa_total_boe = pd.read_csv("visa_total_boe.csv")
df_visa_total_global = pd.read_csv("visa_total_global.csv")
df_ratio = pd.read_csv("Abroad_Household_Online_Spend_Ratio.csv")
df_spend_ratio = pd.read_csv("Abroad_Household_Spend_Ratio 2.csv")

# Rename value columns to avoid duplication
df_total_boe.rename(columns={'total_boe': 'updated_total_boe_totals'}, inplace=True)
df_total_global.rename(columns={'total_global': 'total_global_totals'}, inplace=True)
df_total_uk_finance.rename(columns={'total_uk_finance': 'total_uk_finance_totals'}, inplace=True)
df_visa_total_boe.rename(columns={'visa_total_boe': 'visa_total_boe_totals'}, inplace=True)
df_visa_total_global.rename(columns={'visa_total_global': 'visa_total_global_totals'}, inplace=True)

# Merge all totals datasets on 'year'
df_totals = df_total_boe.merge(df_total_global, on='year', how='inner') \
                        .merge(df_total_uk_finance, on='year', how='inner') \
                        .merge(df_visa_total_boe, on='year', how='inner') \
                        .merge(df_visa_total_global, on='year', how='inner')

# Merge the ratio data with the spend ratio
df_ratio_combined = pd.merge(df_ratio, df_spend_ratio, on='year', how='inner')

# Merge the combined ratio data with the totals
df_combined = pd.merge(df_ratio_combined, df_totals, on='year', how='inner')

# Calculate Household Abroad Online Spending for each total column
df_combined['Household_Abroad_Online_Spending_visa_total_boe'] = (
    df_combined['Abroad_Household_Online_Spend_Ratio'] *
    df_combined['Abroad_Household_Spend_Ratio 2'] *
    df_combined['visa_total_boe_totals']
) / 10000

df_combined['Household_Abroad_Online_Spending_total_boe'] = (
    df_combined['Abroad_Household_Online_Spend_Ratio'] *
    df_combined['Abroad_Household_Spend_Ratio 2'] *
    df_combined['updated_total_boe_totals']
) / 10000

df_combined['Household_Abroad_Online_Spending_total_global'] = (
    df_combined['Abroad_Household_Online_Spend_Ratio'] *
    df_combined['Abroad_Household_Spend_Ratio 2'] *
    df_combined['total_global_totals']
) / 10000

df_combined['Household_Abroad_Online_Spending_visa_total_global'] = (
    df_combined['Abroad_Household_Online_Spend_Ratio'] *
    df_combined['Abroad_Household_Spend_Ratio 2'] *
    df_combined['visa_total_global_totals']
) / 10000

df_combined['Household_Abroad_Online_Spending_total_uk_finance'] = (
    df_combined['Abroad_Household_Online_Spend_Ratio'] *
    df_combined['Abroad_Household_Spend_Ratio 2'] *
    df_combined['total_uk_finance_totals']
) / 10000

# Display the result
df_combined_result = df_combined[[
    'year',
    'Household_Abroad_Online_Spending_visa_total_boe',
    'Household_Abroad_Online_Spending_total_boe',
    'Household_Abroad_Online_Spending_total_global',
    'Household_Abroad_Online_Spending_visa_total_global',
    'Household_Abroad_Online_Spending_total_uk_finance'
]]

df_combined_result



In [None]:
import pandas as pd

# Assuming df_combined_result is already defined in the environment
# Save the DataFrame to a CSV file
df_combined_result.to_csv('df_combined_result.csv', index=False)

print("CSV file 'df_combined_result.csv' has been created successfully.")


In [None]:
# Rounded to Billion

import pandas as pd

# Load the CSV file
df = pd.read_csv("df_combined_result.csv")

# Convert numeric columns to billions and format with thousands separators
numeric_cols = df.columns[1:]  # Exclude 'year' column
df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors='coerce')  # Ensure numeric conversion
df[numeric_cols] = df[numeric_cols].applymap(lambda x: f"{x / 1e9:,.0f}" if pd.notnull(x) else "")

# Display the updated DataFrame
print(df)


# Save the cleaned and formatted DataFrame to a new CSV file
df.to_csv("df_combined_result_billions.csv", index=False)

print("The cleaned and formatted data has been saved to 'df_combined_result_billions.csv'.")



In [None]:
# Bar & Line Charts for Data Sources

import pandas as pd
import plotly.graph_objects as go

# Load all required CSV files
df_total_boe = pd.read_csv("updated_total_boe.csv")
df_total_global = pd.read_csv("total_global.csv")
df_total_uk_finance = pd.read_csv("total_uk_finance.csv")
df_visa_total_boe = pd.read_csv("visa_total_boe.csv")
df_visa_total_global = pd.read_csv("visa_total_global.csv")

# Rename columns to match expected names
df_total_boe.rename(columns={'total_boe': 'total_boe'}, inplace=True)
df_total_global.rename(columns={'total_global': 'total_global'}, inplace=True)
df_total_uk_finance.rename(columns={'total_uk_finance': 'total_uk_finance'}, inplace=True)
df_visa_total_boe.rename(columns={'visa_total_boe': 'visa_total_boe'}, inplace=True)
df_visa_total_global.rename(columns={'visa_total_global': 'visa_total_global'}, inplace=True)

# Merge all datasets on 'year'
df_totals = df_total_boe.merge(df_total_global, on='year', how='inner') \
                        .merge(df_total_uk_finance, on='year', how='inner') \
                        .merge(df_visa_total_boe, on='year', how='inner') \
                        .merge(df_visa_total_global, on='year', how='inner')

# Create traces for the bar chart
bar_traces = []
columns = ['total_uk_finance', 'visa_total_global', 'total_global', 'visa_total_boe', 'total_boe']

for col in columns:
    bar_traces.append(go.Bar(
        x=df_totals['year'],
        y=df_totals[col] / 1e9,  # Convert values to billions
        name=col.replace('_', ' ').title(),
        text=(df_totals[col] / 1e9).round(2),
        hoverinfo='text+x+y',
        orientation='v'
    ))

# Bar chart figure
bar_fig = go.Figure(data=bar_traces)

# Update layout for bar chart
bar_fig.update_layout(
    title="Total UK Card Spending - Bar Chart (Billions GBP)",
    xaxis_title="Year",
    yaxis_title="Amount in Billions GBP",
    template="plotly_dark",
    showlegend=True,
    xaxis=dict(tickmode='array', tickvals=df_totals['year']),
    yaxis=dict(
        title="Amount in Billions GBP",
        showgrid=True,
        zeroline=False,
        tickformat='.2f'
    ),
)

# Display the bar chart
bar_fig.show()

# Create traces for the line chart
line_traces = []
for col in columns:
    line_traces.append(go.Scatter(
        x=df_totals['year'],
        y=df_totals[col] / 1e9,
        mode='lines+markers',
        name=col.replace('_', ' ').title(),
        line=dict(width=2),
        marker=dict(symbol='circle')
    ))

# Line chart figure
line_fig = go.Figure(data=line_traces)

# Update layout for line chart
line_fig.update_layout(
    title="Total UK Card Spending - Line Chart (Billions GBP)",
    xaxis_title="Year",
    yaxis_title="Amount in Billions GBP",
    template="plotly_dark",
    showlegend=True,
    xaxis=dict(tickmode='array', tickvals=df_totals['year']),
    yaxis=dict(
        title="Amount in Billions GBP",
        showgrid=True,
        zeroline=False,
        tickformat='.2f'
    ),
)

# Display the line chart
line_fig.show()



In [None]:
# In Trillians Bar & Line Charts

import pandas as pd
import plotly.graph_objects as go

# Load all required CSV files
df_total_boe = pd.read_csv("updated_total_boe.csv")
df_total_global = pd.read_csv("total_global.csv")
df_total_uk_finance = pd.read_csv("total_uk_finance.csv")
df_visa_total_boe = pd.read_csv("visa_total_boe.csv")
df_visa_total_global = pd.read_csv("visa_total_global.csv")

# Rename columns for consistency
df_total_boe.rename(columns={'total_boe': 'total_boe'}, inplace=True)
df_total_global.rename(columns={'total_global': 'total_global'}, inplace=True)
df_total_uk_finance.rename(columns={'total_uk_finance': 'total_uk_finance'}, inplace=True)
df_visa_total_boe.rename(columns={'visa_total_boe': 'visa_total_boe'}, inplace=True)
df_visa_total_global.rename(columns={'visa_total_global': 'visa_total_global'}, inplace=True)

# Merge all datasets on 'year'
df_totals = df_total_boe.merge(df_total_global, on='year', how='inner') \
                        .merge(df_total_uk_finance, on='year', how='inner') \
                        .merge(df_visa_total_boe, on='year', how='inner') \
                        .merge(df_visa_total_global, on='year', how='inner')

# Create traces for the bar chart
bar_traces = []
columns = ['total_uk_finance', 'visa_total_global', 'total_global', 'visa_total_boe', 'total_boe']

for col in columns:
    bar_traces.append(go.Bar(
        x=df_totals['year'],
        y=df_totals[col] / 1e12,  # Convert values to trillions
        name=col.replace('_', ' ').title(),
        text=(df_totals[col] / 1e12).round(2),
        hoverinfo='text+x+y',
        orientation='v'
    ))

# Bar chart figure
bar_fig = go.Figure(data=bar_traces)
bar_fig.update_layout(
    title="Total UK Card Spending - Bar Chart (Trillions GBP)",
    xaxis_title="Year",
    yaxis_title="Amount in Trillions GBP",
    template="plotly_dark",
    showlegend=True,
    xaxis=dict(tickmode='array', tickvals=df_totals['year']),
    yaxis=dict(
        title="Amount in Trillions GBP",
        showgrid=True,
        zeroline=False,
        tickformat='.2f'
    ),
)
bar_fig.show()

# Create traces for the line chart
line_traces = []
for col in columns:
    line_traces.append(go.Scatter(
        x=df_totals['year'],
        y=df_totals[col] / 1e12,
        mode='lines+markers',
        name=col.replace('_', ' ').title(),
        line=dict(width=2),
        marker=dict(symbol='circle')
    ))

# Line chart figure
line_fig = go.Figure(data=line_traces)
line_fig.update_layout(
    title="Total UK Card Spending - Line Chart (Trillions GBP)",
    xaxis_title="Year",
    yaxis_title="Amount in Trillions GBP",
    template="plotly_dark",
    showlegend=True,
    xaxis=dict(tickmode='array', tickvals=df_totals['year']),
    yaxis=dict(
        title="Amount in Trillions GBP",
        showgrid=True,
        zeroline=False,
        tickformat='.2f'
    ),
)
line_fig.show()



In [None]:
import pandas as pd
import plotly.graph_objects as go

# Assuming df_combined_result is already defined in the environment
# Convert all spending values to billions
df_plot = df_combined_result.copy()
spending_columns = [
    'Household_Abroad_Online_Spending_visa_total_boe',
    'Household_Abroad_Online_Spending_total_boe',
    'Household_Abroad_Online_Spending_total_global',
    'Household_Abroad_Online_Spending_visa_total_global',
    'Household_Abroad_Online_Spending_total_uk_finance'
]

# Convert to billions
df_plot[spending_columns] = df_plot[spending_columns] / 1e9

# Create the bar traces
bar_traces = []
for category in spending_columns:
    bar_traces.append(go.Bar(
        x=df_plot['year'],
        y=df_plot[category],
        name=category.replace('_', ' ').title(),
        text=df_plot[category].round(2),
        hoverinfo='text+x+y',
        orientation='v'
    ))

# Plotting the Bar Chart
fig_bar = go.Figure(bar_traces)

fig_bar.update_layout(
    title="Household Abroad Online Spending by Data Source",
    barmode='group',
    xaxis_title="Year",
    yaxis_title="Spending Amount (in Billions GBP)",
    template="plotly_dark",
    xaxis=dict(tickmode='array', tickvals=df_plot['year']),
    yaxis=dict(
        title="Amount (in Billions GBP)",
        showgrid=True,
        zeroline=False,
        tickformat=".2f",
        tickprefix="£"
    ),
    showlegend=True
)

# Display Bar Chart
fig_bar.show()



In [None]:
# Bar chart for Household Abroad Online Spending by Data Sources 2019 - 2024 in Billion

import plotly.graph_objects as go
import pandas as pd

# Load the data from the CSV file
df = pd.read_csv('df_combined_result_billions.csv')

# Define the columns for the bar chart
categories = [
    'Household_Abroad_Online_Spending_visa_total_boe',
    'Household_Abroad_Online_Spending_total_boe',
    'Household_Abroad_Online_Spending_total_global',
    'Household_Abroad_Online_Spending_visa_total_global',
    'Household_Abroad_Online_Spending_total_uk_finance'
]

# Create the bar traces
bar_traces = []
for category in categories:
    bar_traces.append(go.Bar(
        x=df['year'],
        y=df[category],
        name=category,
        text=df[category],  # Display value on top of bars
        hoverinfo='text+x+y',  # Display info on hover
        orientation= 'v'  # Horizontal bars
    ))

# Plotting the Bar Chart
fig_bar = go.Figure(bar_traces)

fig_bar.update_layout(
    title="Household Abroad Online Spending by Data Source",
    barmode='group',  # Group bars by category
    xaxis_title="Year",
    yaxis_title="Spending Amount (in Billions GBP)",
    template="plotly_dark",
    xaxis=dict(tickmode='array', tickvals=df['year']),  # Ensures that x-axis is properly labeled
    yaxis=dict(
        title="Amount (in Billions GBP)",
        showgrid=True,
        zeroline=False,
        tickformat=".0f",  # Format to integer
        tickprefix="£",  # Adds the currency symbol
    ),
    showlegend=True
)

# Display Bar Chart
fig_bar.show()



In [None]:
# Create the line traces
line_traces = []
for category in categories:
    line_traces.append(go.Scatter(
        x=df['year'],
        y=df[category],
        mode='lines+markers',
        name=category
    ))

# Plotting the Line Chart
fig_line = go.Figure(line_traces)

fig_line.update_layout(
    title="Household Abroad Online Spending Trend by Category",
    xaxis_title="Year",
    yaxis_title="Spending Amount (in GBP)",
    template="plotly_dark",
    showlegend=True
)

# Display Line Chart
fig_line.show()


In [None]:
# Line chart for UK Finance Household Abroad Online Total

import pandas as pd
import plotly.express as px

# Load the data from the CSV file
df = pd.read_csv('df_combined_result_billions.csv')

# Create the line chart using the specified column
fig_line = px.line(df, 
                   x='year', 
                   y='Household_Abroad_Online_Spending_total_uk_finance', 
                   title='UK Finance Household Abroad Online Total (Excl B2B)',
                   labels={'Household_Abroad_Online_Spending_total_uk_finance': 'Abroad Online Total (£)', 'year': 'Year'},
                   markers=True)

# Customize the line chart appearance
fig_line.update_traces(line=dict(width=3), marker=dict(size=7))
fig_line.update_layout(xaxis=dict(tickmode='linear'), yaxis=dict(title='Value (£ bn)'))

# Show the line chart
fig_line.show()



In [None]:
# Calculate UK Finance Abroad Household All

import pandas as pd

# Load the two CSV files
df_finance = pd.read_csv("total_uk_finance.csv")
df_ratio = pd.read_csv("Abroad_Household_Spend_Ratio 2.csv")

# Display column names to identify the correct ratio column
print("Columns in total_uk_finance.csv:", df_finance.columns.tolist())
print("Columns in Abroad_Household_Spend_Ratio 2.csv:", df_ratio.columns.tolist())

# Merge the dataframes on 'year'
df_merged = pd.merge(df_finance, df_ratio, on="year")

# Identify the correct column name for the ratio
# For now, we assume the ratio column is the second column in df_ratio
ratio_column = df_ratio.columns[1]

# Calculate the new column
df_merged["UK_Finance_Abroad_Household_Total"] = df_merged["total_uk_finance"] * df_merged[ratio_column] / 100

# Display the updated dataframe
df_merged[["year", "total_uk_finance", ratio_column, "UK_Finance_Abroad_Household_Total"]]


# Save the result to a new CSV file
output_file = "total_uk_finance_abroad_household_total.csv"
df_merged.to_csv(output_file, index=False)

output_file


In [None]:
# Compare UK Finance Total, and Household Abroad Online Total (Excl B2B)

import pandas as pd
import plotly.express as px

# Load the data from the provided CSV files
df_combined = pd.read_csv("df_combined_result.csv")
df_total_uk_finance = pd.read_csv("total_uk_finance.csv")

# Merge the two dataframes on the 'year' column
df_merged = pd.merge(df_total_uk_finance, df_combined[['year', 'Household_Abroad_Online_Spending_total_uk_finance']], on='year')

# Rename columns for clarity in the plot
df_merged.rename(columns={
    'total_uk_finance': 'UK_Total',
    'Household_Abroad_Online_Spending_total_uk_finance': 'Household_Abroad_Online_Total (Excl B2B)'
}, inplace=True)

# Create the line chart
fig_line = px.line(df_merged, 
                   x='year', 
                   y=['UK_Total', 'Household_Abroad_Online_Total (Excl B2B)'], 
                   title='UK Finance Total and UK Finance Household Abroad Online Total (Excl B2B)',
                   labels={'year': 'Year', 
                           'UK_Total': 'Total (£)', 
                           'Household_Abroad_Online_Total (Excl B2B)': 'Household Abroad Online Total (£)'},
                   markers=True)

# Customize the line chart appearance
fig_line.update_traces(line=dict(width=3), marker=dict(size=7))
fig_line.update_layout(xaxis=dict(tickmode='linear'), yaxis=dict(title='Value (£)'))

# Show the line chart
fig_line.show()




In [None]:
# Compare UK Finance Total, Abroad Household Total and Household Abroad Online Total (Excl B2B)

import pandas as pd
import plotly.express as px

# Load the data from the provided CSV files
df_combined = pd.read_csv("df_combined_result.csv")
df_total_uk_finance = pd.read_csv("total_uk_finance.csv")
df_abroad_household_total = pd.read_csv("total_uk_finance_abroad_household_total.csv")

# Merge the three dataframes on the 'year' column
df_merged = pd.merge(df_total_uk_finance, df_combined[['year', 'Household_Abroad_Online_Spending_total_uk_finance']], on='year')
df_merged = pd.merge(df_merged, df_abroad_household_total[['year', 'UK_Finance_Abroad_Household_Total']], on='year')

# Rename columns for clarity in the plot
df_merged.rename(columns={
    'total_uk_finance': 'UK_Total',
    'Household_Abroad_Online_Spending_total_uk_finance': 'Abroad_Household_Online_Total (Excl B2B)',
    'UK_Finance_Abroad_Household_Total': 'Abroad_Household_Total'
}, inplace=True)

# Create the line chart
fig_line = px.line(df_merged, 
                   x='year', 
                   y=['UK_Total', 'Abroad_Household_Total', 'Abroad_Household_Online_Total (Excl B2B)'], 
                   title='UK Finance Total, Abroad Household Total, and Abroad Household Online Total (Excl B2B)',
                   labels={
                       'year': 'Year', 
                       'UK_Total': 'Total (£)', 
                       'Abroad_Household_Total': 'Abroad Household Total (£)',
                       'Abroad_Household_Online_Total (Excl B2B)': 'Household Abroad Online Total (£)'
                   },
                   markers=True)

# Customize the line chart appearance
fig_line.update_traces(line=dict(width=3), marker=dict(size=7))
fig_line.update_layout(xaxis=dict(tickmode='linear'), yaxis=dict(title='Value (£)'))

# Show the line chart
fig_line.show()



In [None]:
# UK Finance Abroad Total and Online Abroad Total Comaprison

import pandas as pd
import plotly.express as px

# Load the data from the CSV files
df_total_uk_finance = pd.read_csv("total_uk_finance.csv")
df_abroad_ratio = pd.read_csv("UK_Abroad_Spending_Ratio.csv")
df_abroad_online_ratio = pd.read_csv("UK_Abroad_online_Spending_Ratio.csv")

# Merge the dataframes on 'year'
df_merged = df_total_uk_finance.merge(df_abroad_ratio, on='year').merge(df_abroad_online_ratio, on='year')

# Calculate the values
df_merged['UK_Finance_Abroad_Total'] = df_merged['total_uk_finance'] * df_merged['UK_Abroad_Spending_Ratio']
df_merged['UK_Finance_Abroad_Online_Total'] = df_merged['total_uk_finance'] * df_merged['UK_Abroad_online_Spending_Ratio']

# Create the line chart
fig = px.line(df_merged,
              x='year',
              y=['UK_Finance_Abroad_Total', 'UK_Finance_Abroad_Online_Total'],
              title='UK Finance Abroad Total vs UK Finance Abroad Online Total',
              labels={
                  'year': 'Year',
                  'UK_Finance_Abroad_Total': 'Abroad Total (£)',
                  'UK_Finance_Abroad_Online_Total': 'Abroad Online Total (£)'
              },
              markers=True)

# Customize the line chart appearance
fig.update_traces(line=dict(width=3), marker=dict(size=7))
fig.update_layout(xaxis=dict(tickmode='linear'), yaxis=dict(title='Value (£)'))

# Show the chart
fig.show()



In [None]:
# comparison chart between:

 # Abroad_Household_Online_Total (Excl B2B)
 # Abroad_Household_Total

import pandas as pd
import plotly.express as px

# Load the data from the provided CSV files
df_combined = pd.read_csv("df_combined_result.csv")
df_abroad_household_total = pd.read_csv("total_uk_finance_abroad_household_total.csv")

# Merge the two dataframes on the 'year' column
df_merged = pd.merge(
    df_combined[['year', 'Household_Abroad_Online_Spending_total_uk_finance']],
    df_abroad_household_total[['year', 'UK_Finance_Abroad_Household_Total']],
    on='year'
)

# Rename columns for clarity in the plot
df_merged.rename(columns={
    'Household_Abroad_Online_Spending_total_uk_finance': 'Abroad_Household_Online_Total (Excl B2B)',
    'UK_Finance_Abroad_Household_Total': 'Abroad_Household_Total'
}, inplace=True)

# Create the line chart
fig_line = px.line(
    df_merged,
    x='year',
    y=['Abroad_Household_Online_Total (Excl B2B)', 'Abroad_Household_Total'],
    title='Comparison of Abroad Household Online Total (Excl B2B) and Abroad Household Total',
    labels={
        'year': 'Year',
        'Abroad_Household_Online_Total (Excl B2B)': 'Abroad Household Online Total (£)',
        'Abroad_Household_Total': 'Abroad Household Total (£)'
    },
    markers=True
)

# Customize the line chart appearance
fig_line.update_traces(line=dict(width=3), marker=dict(size=7))
fig_line.update_layout(xaxis=dict(tickmode='linear'), yaxis=dict(title='Value (£)'))

# Show the line chart
fig_line.show()



In [None]:
# Bar Chart for UK Cardholder Abroad Online Spend Ratio (%)

import pandas as pd
import plotly.express as px

# Load the data from the CSV file
df_abroad_online_spend_ratio = pd.read_csv("Abroad_Household_Online_Spend_Ratio.csv")

# Convert the ratio to percentage
df_abroad_online_spend_ratio['Abroad_Household_Online_Spend_Ratio'] = df_abroad_online_spend_ratio['Abroad_Household_Online_Spend_Ratio']

# Determine a suitable y-axis range slightly above the maximum value
max_value = df_abroad_online_spend_ratio['Abroad_Household_Online_Spend_Ratio'].max()
y_axis_upper_limit = max_value + 10  # Add a small buffer above the tallest bar

# Create the Bar Chart
fig_bar = px.bar(df_abroad_online_spend_ratio, 
                 x='year', 
                 y='Abroad_Household_Online_Spend_Ratio', 
                 title='Household Abroad Online Spend Ratio (2019 - 2025)', 
                 labels={'Abroad_Household_Online_Spend_Ratio': 'Abroad Online Spend Ratio (%)', 'year': 'Year'}, 
                 color='year', 
                 text='Abroad_Household_Online_Spend_Ratio')

# Customize the bar chart
fig_bar.update_traces(texttemplate='%{text:.2f}%', textposition='outside', marker=dict(line=dict(width=1, color='black')))
fig_bar.update_layout(xaxis=dict(tickmode='linear'), yaxis=dict(range=[0, 100]))

# Show the bar chart
fig_bar.show()

