In [None]:
project_path = "/home/jupyter"
import os
import sys

sys.path.append(project_path)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
import plotly.express as px
from datetime import datetime
from google.cloud import bigquery

from fintrans_toolbox.src import bq_utils as bq
from fintrans_toolbox.src import table_utils as t

import ft_digital_trade.src.utils.read_data as read_utils
import ft_digital_trade.src.utils.clean_utils as clean_utils
import ft_digital_trade.src.utils.calculation_utils as calc_utils
import ft_digital_trade.src.utils.plot_utils as plot_utils

client = bigquery.Client()

In [None]:
# Total spend by UK cardholders
total_spend = '''SELECT time_period_value, sum(spend) as total_spend
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
WHERE time_period = 'Quarter' 
  AND mcg = 'All' 
  AND mcc = 'All'
  AND merchant_channel = 'All'
  AND cardholder_origin = 'UNITED KINGDOM'
  AND cardholder_origin_country = 'All' 
GROUP BY time_period_value
ORDER BY time_period_value ASC'''
spoc_spend_total = bq.read_bq_table_sql(client, total_spend)
#spoc_spend_total.head()

In [None]:
# Total spend in the UK by UK cardholders
total_spend_domestic = '''SELECT time_period_value, sum(spend) as total_spend_domestic
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
WHERE time_period = 'Quarter' 
  AND mcg = 'All' 
  AND mcc = 'All'
  AND merchant_channel = 'All'
  AND cardholder_origin = 'UNITED KINGDOM'
  AND cardholder_origin_country = 'All' 
  AND destination_country = 'UNITED KINGDOM'
GROUP BY time_period_value
ORDER BY time_period_value ASC'''
spoc_spend_domestic = bq.read_bq_table_sql(client, total_spend_domestic)
#spoc_spend_domestic.head()

In [None]:
# Total spend in foreign countries by UK cardholders
total_spend_abroad = '''SELECT time_period_value, sum(spend) as total_spend_abroad
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
WHERE time_period = 'Quarter'
  AND mcg = 'All' 
  AND mcc = 'All'
  AND merchant_channel = 'All'
  AND cardholder_origin = 'UNITED KINGDOM'
  AND cardholder_origin_country = 'All'
  AND destination_country != 'UNITED KINGDOM' 
GROUP BY time_period_value
ORDER BY time_period_value ASC'''
spoc_spend_abroad = bq.read_bq_table_sql(client, total_spend_abroad)
#spoc_spend_abroad.head()

In [None]:
# Total online spend by UK cardholders
total_online_spend = '''SELECT time_period_value, sum(spend) as total_online_spend
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
WHERE time_period = 'Quarter' 
  AND mcg = 'All' 
  AND mcc = 'All'
  AND merchant_channel = 'Online'
  AND cardholder_origin = 'UNITED KINGDOM'
  AND cardholder_origin_country = 'All'
GROUP BY time_period_value
ORDER BY time_period_value ASC'''
spoc_online_spend = bq.read_bq_table_sql(client, total_online_spend)
#spoc_online_spend.head()

In [None]:
# Check to see if total domestic+abroad spending added together is the same as spoc_spend_total
# It is
merged_total_spending = pd.merge(spoc_spend_domestic, spoc_spend_abroad, on='time_period_value')
merged_total_spending = pd.merge(merged_total_spending, spoc_online_spend, on='time_period_value')
merged_total_spending['Total_Spend'] = merged_total_spending['total_spend_domestic'] + merged_total_spending['total_spend_abroad']
merged_total_spending

In [None]:
# Total online spend in UK by UK cardholders
total_online_spend_domestic = '''SELECT time_period_value, sum(spend) as total_online_spend_domestic
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
WHERE time_period = 'Quarter' 
  AND mcg = 'All' 
  AND mcc = 'All'
  AND merchant_channel = 'Online'
  AND cardholder_origin = 'UNITED KINGDOM'
  AND cardholder_origin_country = 'All'
  AND destination_country = 'UNITED KINGDOM' 
GROUP BY time_period_value
ORDER BY time_period_value ASC'''
spoc_online_spend_domestic = bq.read_bq_table_sql(client, total_online_spend_domestic)
#spoc_online_spend_domestic.head()

In [None]:
# Total online spend in foreign countries by UK cardholders
total_online_spend_abroad = '''SELECT time_period_value, sum(spend) as total_online_spend_abroad
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
WHERE time_period = 'Quarter' 
  AND mcg = 'All'
  AND mcc = 'All'
  AND merchant_channel = 'Online'
  AND cardholder_origin = 'UNITED KINGDOM'
  AND cardholder_origin_country = 'All'
  AND destination_country != 'UNITED KINGDOM' 
GROUP BY time_period_value
ORDER BY time_period_value ASC'''
spoc_online_spend_abroad = bq.read_bq_table_sql(client, total_online_spend_abroad)
spoc_online_spend_abroad.head()

In [None]:
# Total f2f spend in foreign countries by UK cardholders
total_f2f_spend_abroad = '''SELECT time_period_value, sum(spend) as total_f2f_spend_abroad
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
WHERE time_period = 'Quarter' 
  AND mcg = 'All' 
  AND mcc = 'All'
  AND merchant_channel = 'Face to Face'
  AND cardholder_origin = 'UNITED KINGDOM'
  AND cardholder_origin_country = 'All'
  AND destination_country != 'UNITED KINGDOM' 
GROUP BY time_period_value
ORDER BY time_period_value ASC'''
spoc_f2f_spend_abroad = bq.read_bq_table_sql(client, total_f2f_spend_abroad)
#spoc_f2f_spend_abroad

In [None]:
# Check to see if abroad online+f2f spending added together is the same as spoc_online_spend_abroad
# It is
merged_abroad_spending = pd.merge(spoc_online_spend_abroad, spoc_f2f_spend_abroad, on='time_period_value')
merged_abroad_spending['Total_Spend_Abroad'] = merged_abroad_spending['total_online_spend_abroad'] + merged_abroad_spending['total_f2f_spend_abroad']
merged_abroad_spending

In [None]:
# Putting it all together in one table
# On a quarterly basis
complete_df = spoc_spend_total
complete_df = pd.merge(complete_df, spoc_spend_domestic, on='time_period_value', how='outer')
complete_df = pd.merge(complete_df, spoc_spend_abroad, on='time_period_value', how='outer')
complete_df = pd.merge(complete_df, spoc_online_spend, on='time_period_value', how='outer')
complete_df = pd.merge(complete_df, spoc_online_spend_domestic, on='time_period_value', how='outer')
complete_df = pd.merge(complete_df, spoc_online_spend_abroad, on='time_period_value', how='outer')
complete_df = pd.merge(complete_df, spoc_f2f_spend_abroad, on='time_period_value', how='outer')
#complete_df

In [None]:
# Calculating relevant ratios at a quarterly level
quarterly_totals = complete_df
quarterly_totals['Abroad_Ratio %'] = (quarterly_totals['total_spend_abroad'] / quarterly_totals['total_spend']) * 100
quarterly_totals['Online+Abroad_Ratio %'] = (quarterly_totals['total_online_spend_abroad'] / quarterly_totals['total_spend']) * 100
quarterly_totals['Ratio of Abroad Spending Online %'] = (quarterly_totals['total_online_spend_abroad'] / quarterly_totals['total_spend_abroad']) * 100
quarterly_totals['Ratio of Domestic Spending Online %'] = (quarterly_totals['total_online_spend_domestic'] / quarterly_totals['total_spend_domestic']) * 100
quarterly_totals

In [None]:
# Summing everything up on a yearly basis
complete_df['Year'] = complete_df['time_period_value'].str[:4]
yearly_totals = complete_df.groupby('Year').sum(numeric_only=True).reset_index()
yearly_totals = yearly_totals.drop(columns=['time_period_value'], errors='ignore')
yearly_totals

In [None]:
# Calculating relevant ratios
yearly_totals['Abroad_Ratio %'] = (yearly_totals['total_spend_abroad'] / yearly_totals['total_spend']) * 100
yearly_totals['Online+Abroad_Ratio %'] = (yearly_totals['total_online_spend_abroad'] / yearly_totals['total_spend']) * 100
yearly_totals['Ratio of Abroad Spending Online %'] = (yearly_totals['total_online_spend_abroad'] / yearly_totals['total_spend_abroad']) * 100
yearly_totals

In [None]:
#yearly_totals.to_csv('spoc_totals_yearly.csv')
quarterly_totals.to_csv('spoc_totals_quarterly.csv')

# Combining with Marketshare values

In [None]:
#This allows you to select different location of merchant spend for face-to-face and online
## for more information run the following:
##help(read_utils.read_visa)
#visa_data = read_utils.read_f2f_online(cardholder_origin = "all", f2f =  "uk", online =  "all")

#This applies the same filters for face-to-face and online data
## for more information run the following:
##help(read_utils.read_visa)
visa_data = read_utils.read_visa(cardholder_origin = "uk", cardholders_location = "uk", spend_location = "all")

In [None]:
visa = calc_utils.calculate_visa(visa_data)
visa = clean_utils.rename_columns(df = visa, suffix = '_spoc')
visa

In [None]:
global_cards = read_utils.read_global_cards()
global_cards = clean_utils.clean_global(global_cards)
global_cards = calc_utils.calculate_global(global_cards, 'card')

# Making what this column represents a bit clearer
global_cards = global_cards.rename(columns={'visa_marketshare': 'GDR - Visa Card Marketshare'})
global_cards

In [None]:
global_spend = read_utils.read_global_spend()
global_spend = clean_utils.clean_global(global_spend)
global_spend = calc_utils.calculate_global(global_spend, 'spend')

# Making what this column represents a bit clearer
global_spend = global_spend.rename(columns={'visa_marketshare': 'GDR - Visa Spend Marketshare'})

# Creating a table with just the marketshare values
marketshares = pd.merge(global_cards, global_spend[['year', 'GDR - Visa Spend Marketshare']], on='year', how='left')
marketshares = marketshares.drop(['debit', 'credit', 'visa_total', 'total'], axis=1)
#marketshares
global_spend

In [None]:
uk_finance = read_utils.read_uk_finance()
uk_finance = clean_utils.clean_uk_finance(uk_finance)
uk_finance = calc_utils.calculate_uk_finance(uk_finance)
uk_finance = uk_finance[['year', 'cardholders','total value of purchases',"total volume of purchases"]]
uk_finance = clean_utils.rename_columns(df = uk_finance , suffix = '_uk_finance')

In [None]:
boe = read_utils.read_boe()
boe = clean_utils.clean_boe(boe)
boe = calc_utils.calculate_boe(boe)
boe = clean_utils.rename_columns(df = boe , suffix = '_boe')
boe

In [None]:
link = read_utils.read_link()

In [None]:
merged = visa.merge(uk_finance, how = 'outer', on = 'year')
merged = merged.merge(boe, how = 'outer', on = 'year')
merged = merged.merge(global_spend, how = 'outer', on = 'year')

In [None]:
cardholders = merged[['year','cardholders_spoc','cardholders_uk_finance','visa_total_cards_global','total_cards_global', 'visa_marketshare_cards_global']]
cardholders = cardholders.copy()
cardholders['uk_finance_marketshare'] = cardholders['cardholders_spoc'] / cardholders['cardholders_uk_finance'] *100
cardholders['global_marketshare'] = cardholders['cardholders_spoc'] / cardholders['total_cards_global'] *100
#melt df for charts
cardholders = pd.melt(cardholders, id_vars='year',var_name='Data source', value_name='value')
cardholders = calc_utils.calculate_index(df = cardholders)

In [None]:
spend = merged[['year','spend_spoc', 
        'total value of purchases_uk_finance',
       'Mastercard values_boe', 'Visa Europe values_boe',
       'Mastercard and Visa values_boe', 'Visa proportion_boe',
       'debit_spend_global', 'credit_spend_global', 'visa_total_spend_global',
       'total_spend_global', 'visa_marketshare_spend_global']]
spend = spend.copy()
# #replace 2024 spending with NA
spend['spend_spoc'] = np.where(spend['year']==2024, np.nan, spend['spend_spoc'])
spend['total value of purchases_uk_finance'] = np.where(spend['year']==2024, np.nan, spend['total value of purchases_uk_finance'])
#calculate marketshare
spend['uk_finance_marketshare'] = spend['spend_spoc'] / spend['total value of purchases_uk_finance'] *100
spend['global_marketshare'] = spend['spend_spoc'] / spend['total_spend_global'] *100
spend['boe_marketshare'] = spend['spend_spoc'] / spend['Mastercard and Visa values_boe'] *100
#copy used for getting 2019 marketshare
spend_copy = spend.copy()
#melt df for charts
spend = pd.melt(spend, id_vars='year',var_name='Data source', value_name='value')
spend = calc_utils.calculate_index(df = spend)
spend

In [None]:
#options of marketshare threshold
# marketshare_2019 = spend_copy.iloc[0]['visa_marketshare_spend_global']
# marketshare_2019 = spend_copy.iloc[0]['global_marketshare']
marketshare_2019 = spend_copy.iloc[0]['uk_finance_marketshare']
marketshare_2019

In [None]:
df = merged.copy()
#remove 2024 due to incomplete data
df = df[df['year'] != 2024]
#index spoc data
df['idx_cardholders_spoc'] = df['cardholders_spoc'].transform(lambda x: (x / x.iloc[0] * 100))
df['idx_spend_spoc'] = df['spend_spoc'].transform(lambda x: (x / x.iloc[0] * 100))
# adjust visa spend to 2019 cardholders
df['visa_adj_spend_spoc'] = (df['spend_spoc']/df['idx_cardholders_spoc'])*100
df['total_spoc'] = df['visa_adj_spend_spoc'] / marketshare_2019 *100
#rename columns
df = df.rename(columns={'total value of purchases_uk_finance': 'total_uk_finance'})
df = df.rename(columns={'visa_total_spend_global': 'visa_total_global','total_spend_global':'total_global'})
df = df.rename(columns={'Visa Europe values_boe': 'visa_total_boe', 'Mastercard and Visa values_boe': 'total_boe'})
#filter columns
df = df[['year', 'visa_adj_spend_spoc', 'total_spoc', 'visa_total_global', 'total_global', 'total_uk_finance', 'visa_total_boe', 'total_boe' ]]
#melt df
df = pd.melt(df, id_vars='year',var_name='Data source', value_name='value')
df = calc_utils.calculate_index(df = df)