In [None]:
project_path = "/home/jupyter"
import os
import sys
sys.path.append(project_path)

from google.cloud import bigquery
import importlib
import plotly.express as px

import numpy as np
import pandas as pd
from datetime import datetime

import ft_digital_trade.src.utils.read_data as read_utils
import ft_digital_trade.src.utils.clean_utils as clean_utils
import ft_digital_trade.src.utils.calculation_utils as calc_utils
import ft_digital_trade.src.utils.plot_utils as plot_utils

In [None]:
client = bigquery.Client()

In [None]:
#This allows you to select different location of merchant spend for face-to-face and online
## for more information run the following:
##help(read_utils.read_visa)
#visa_data = read_utils.read_f2f_online(cardholder_origin = "all", f2f =  "uk", online =  "all")

#This applies the same filters for face-to-face and online data
## for more information run the following:
##help(read_utils.read_visa)
visa_data = read_utils.read_visa(cardholder_origin = "uk", cardholders_location = "uk", spend_location = "uk")

In [None]:
visa = calc_utils.calculate_visa(visa_data)
visa = clean_utils.rename_columns(df = visa, suffix = '_spoc')

In [None]:
global_cards = read_utils.read_global_cards()
global_cards = clean_utils.clean_global(global_cards)
global_cards = calc_utils.calculate_global(global_cards, 'card')

In [None]:
global_spend = read_utils.read_global_spend()
global_spend = clean_utils.clean_global(global_spend)
global_spend = calc_utils.calculate_global(global_spend, 'spend')

In [None]:
global_df = global_cards.merge(global_spend, how = 'inner', on = 'year', suffixes = ('_cards', '_spend'))
global_df = clean_utils.rename_columns(df = global_df, suffix = '_global')

In [None]:
uk_finance = read_utils.read_uk_finance()
uk_finance = clean_utils.clean_uk_finance(uk_finance)
uk_finance = calc_utils.calculate_uk_finance(uk_finance)
uk_finance = uk_finance[['year', 'cardholders','total value of purchases',"total volume of purchases"]]
uk_finance = clean_utils.rename_columns(df = uk_finance , suffix = '_uk_finance')

In [None]:
boe = read_utils.read_boe()
boe = clean_utils.clean_boe(boe)
boe = calc_utils.calculate_boe(boe)
boe = clean_utils.rename_columns(df = boe , suffix = '_boe')

In [None]:
link = read_utils.read_link()

In [None]:
merged = visa.merge(uk_finance, how = 'outer', on = 'year')
merged = merged.merge(boe, how = 'outer', on = 'year')
merged = merged.merge(global_df, how = 'outer', on = 'year')

In [None]:
cardholders = merged[['year','cardholders_spoc','cardholders_uk_finance','visa_total_cards_global','total_cards_global', 'visa_marketshare_cards_global']]
cardholders = cardholders.copy()
cardholders['uk_finance_marketshare'] = cardholders['cardholders_spoc'] / cardholders['cardholders_uk_finance'] *100
cardholders['global_marketshare'] = cardholders['cardholders_spoc'] / cardholders['total_cards_global'] *100
#melt df for charts
cardholders = pd.melt(cardholders, id_vars='year',var_name='Data source', value_name='value')
cardholders = calc_utils.calculate_index(df = cardholders)

In [None]:
spend = merged[['year','spend_spoc', 
        'total value of purchases_uk_finance',
       'Mastercard values_boe', 'Visa Europe values_boe',
       'Mastercard and Visa values_boe', 'Visa proportion_boe',
       'debit_spend_global', 'credit_spend_global', 'visa_total_spend_global',
       'total_spend_global', 'visa_marketshare_spend_global']]
spend = spend.copy()
# #replace 2024 spending with NA
spend['spend_spoc'] = np.where(spend['year']==2024, np.nan, spend['spend_spoc'])
spend['total value of purchases_uk_finance'] = np.where(spend['year']==2024, np.nan, spend['total value of purchases_uk_finance'])
#calculate marketshare
spend['uk_finance_marketshare'] = spend['spend_spoc'] / spend['total value of purchases_uk_finance'] *100
spend['global_marketshare'] = spend['spend_spoc'] / spend['total_spend_global'] *100
spend['boe_marketshare'] = spend['spend_spoc'] / spend['Mastercard and Visa values_boe'] *100
#copy used for getting 2019 marketshare
spend_copy = spend.copy()
#melt df for charts
spend = pd.melt(spend, id_vars='year',var_name='Data source', value_name='value')
spend = calc_utils.calculate_index(df = spend)

In [None]:
plot_utils.plot_total_cardholders(df = cardholders)

In [None]:
plot_utils.plot_visa_cardholders(df = cardholders)

In [None]:
project_path = "/home/jupyter"
import os
import sys
sys.path.append(project_path)
from google.cloud import bigquery, storage

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px

from fintrans_toolbox.src import bq_utils as bq

In [None]:
client = bigquery.Client()

In [None]:
# Summarise the data by country
UK_spending_by_country3b = '''SELECT time_period_value, destination_country, spend FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` where time_period = 'Quarter' and mcc = 'All' and mcg = 'All' and merchant_channel = 'All' and cardholder_origin_country = 'All' and cardholder_origin = 'UNITED KINGDOM' and destination_country != 'UNITED KINGDOM' GROUP BY destination_country, 
time_period_value, spend ORDER BY time_period_value, destination_country DESC'''
df_by_country3b = bq.read_bq_table_sql(client, UK_spending_by_country3b)
df_by_country3b.head()

In [None]:
import pandas as pd

# Assuming you have already fetched the data as a DataFrame into df_by_country3b
df_by_country3b = bq.read_bq_table_sql(client, UK_spending_by_country3b)

# Save the DataFrame to a CSV file
df_by_country3b.to_csv('uk_spending_by_country3b.csv', index=False)


In [None]:
import pandas as pd

# Assuming df_by_country3b is the DataFrame with your data
# Ensure 'time_period_value' is a string type and split it to get the year (assuming 'Q1', 'Q2', etc., are part of the time_period_value)

# Extract the year from the time_period_value (assuming it's in the format like '2023-Q1', '2023-Q2', etc.)
df_by_country3b['year'] = df_by_country3b['time_period_value'].str[:4].astype(int)

# Now group by year and sum the spend for each year
df_yearly_spend2 = df_by_country3b.groupby('year')['spend'].sum().reset_index()

# Optionally, you can sort the result by year
df_yearly_spend2 = df_yearly_spend2.sort_values(by='year')

# Display the yearly totals
print(df_yearly_spend2)

In [None]:
df_yearly_spend2.to_csv('UK_yearly_spend_country3b.csv')

In [None]:
import pandas as pd
import plotly.express as px

# Step 1: Load the CSV data into a DataFrame
df_yearly_spend = pd.read_csv('UK_yearly_spend_country3b.csv')

# Step 2: Create the Bar Chart
fig_bar = px.bar(df_yearly_spend, 
                 x='year', 
                 y='spend', 
                 title='Yearly Total Spend Outside UK (2019-2024)',
                 labels={'spend': 'Total Spend (£)', 'year': 'Year'},
                 color='year',  # Color bars by year
                 text='spend')  # Display spend values on top of bars

# Customize the chart: Zoom in by setting the Y-axis range
fig_bar.update_traces(texttemplate='%{text:.2f}', textposition='outside', marker=dict(line=dict(width=1, color='black')))

# Adjust the Y-axis range to zoom in a bit
fig_bar.update_layout(
    xaxis=dict(tickmode='linear'),
    yaxis=dict(
        title='Total Spend (£)', 
        range=[0, df_yearly_spend['spend'].max() * 1.1]  # Set the max value a bit higher (10% above the max spend)
    )
)

# Show the bar chart
fig_bar.show()


In [None]:
# Step 4: Create the Line Chart
fig_line = px.line(df_yearly_spend, 
                   x='year', 
                   y='spend', 
                   title='Yearly Total Spend Outside UK (2019-2024) - Line Chart',
                   labels={'spend': 'Total Spend (£)', 'year': 'Year'},
                   markers=True)  # Mark data points on the line

# Customize the line chart
fig_line.update_traces(line=dict(width=3), marker=dict(size=8, color='red'))  # Line width and point style

# Show the line chart
fig_line.show()


In [None]:
# Summarise the data by Online Abroad
UK_spending_by_country_online = '''SELECT time_period_value, destination_country, spend FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` where time_period = 'Quarter' and mcc = 'All' and mcg = 'All' and merchant_channel = 'Online' and cardholder_origin_country = 'All' and cardholder_origin = 'UNITED KINGDOM' and destination_country != 'UNITED KINGDOM' GROUP BY destination_country, 
time_period_value, spend ORDER BY time_period_value, destination_country DESC'''
df_by_country = bq.read_bq_table_sql(client, UK_spending_by_country_online)
df_by_country = df_by_country.rename(columns={'spend': 'online_spend'})
df_by_country.head()

In [None]:
import pandas as pd

# Assuming you have already fetched the data as a DataFrame into df_by_country3b
df_by_country = bq.read_bq_table_sql(client, UK_spending_by_country_online)

# Save the DataFrame to a CSV file
df_by_country.to_csv('uk_spending_by_country_online.csv', index=False)

In [None]:
#New Total without mcg = 'BUSINESS TO BUSINESS'

# Ensure the 'spend' column is renamed to 'online_spend'
df_by_country = df_by_country.rename(columns={'spend': 'online_spend'})

# Check the column names to ensure it's correctly renamed
print(df_by_country.columns)

# Extract the year from 'time_period_value' (assuming the format is '2023-Q1')
df_by_country['year'] = df_by_country['time_period_value'].str[:4].astype(int)

# Now group by 'year' and sum the 'online_spend' for each year
df_yearly_spend_total = df_by_country.groupby('year')['online_spend'].sum().reset_index()

# Sort the result by year
df_yearly_spend_total = df_yearly_spend_total.sort_values(by='year')

# Display the total online spending for each year
print(df_yearly_spend_total)


In [None]:
# Extract the year from the time_period_value (assuming it's in format like '2023-Q1')
df_by_country['year'] = df_by_country['time_period_value'].str[:4].astype(int)

# Group by year and sum the online spend for each year
df_yearly_spend_total = df_by_country.groupby('year')['online_spend'].sum().reset_index()

# Optionally, sort the result by year
df_yearly_spend_total = df_yearly_spend_total.sort_values(by='year')

# Display the result
print(df_yearly_spend_total)


In [None]:
# Display the yearly totals UK Spent in UK
df_yearly_spend_total.to_csv('UK_yearly_spend_Online_Non-UKadj.csv')

In [None]:
import pandas as pd

# Read the online spend CSV
df_yearly_abroad_online = pd.read_csv('UK_yearly_spend_Online_Non-UKadj.csv')

# Read the total spend CSV
df_total_spend = pd.read_csv('UK_yearly_spend_country3b.csv')

# Display the first few rows of each DataFrame to check the structure
print(df_yearly_spend_total)
print(df_total_spend)

In [None]:
import pandas as pd
import plotly.express as px

# Step 1: Load the CSV data into a DataFrame
df_yearly_spend = pd.read_csv('UK_yearly_spend_Online_Non-UK.csv')

# Step 2: Create the Bar Chart
fig_bar = px.bar(df_yearly_spend_total, 
                 x='year', 
                 y='online_spend', 
                 title='Yearly Total Online Spend Outside UK (2019-2024)',
                 labels={'online_spend': 'Online Spend (£)', 'year': 'Year'},
                 color='year',  # Color bars by year
                 text='online_spend')  # Display spend values on top of bars

# Customize the chart: Zoom in by setting the Y-axis range
fig_bar.update_traces(texttemplate='%{text:.2f}', textposition='outside', marker=dict(line=dict(width=1, color='black')))

# Adjust the Y-axis range to zoom in a bit
fig_bar.update_layout(
    xaxis=dict(tickmode='linear'),
    yaxis=dict(
        title='Online Spend (£)', 
        range=[0, df_yearly_spend_total['online_spend'].max() * 1.1]  # Set the max value a bit higher (10% above the max spend)
    )
)

# Show the bar chart
fig_bar.show()


In [None]:
# Step 4: Create the Line Chart
fig_line = px.line(df_yearly_spend, 
                   x='year', 
                   y='online_spend', 
                   title='Yearly Total Online Spend Outside UK (2019-2024) - Line Chart',
                   labels={'online_spend': 'Online Spend (£)', 'year': 'Year'},
                   markers=True)  # Mark data points on the line

# Customize the line chart
fig_line.update_traces(line=dict(width=3), marker=dict(size=8, color='red'))  # Line width and point style

# Show the line chart
fig_line.show()


In [None]:
import pandas as pd

# Read the online spend CSV
df_online_spend = pd.read_csv('UK_yearly_spend_Online_Non-UK.csv')

# Read the total spend CSV
df_total_spend = pd.read_csv('UK_yearly_spend_country3b.csv')

# Display the first few rows of each DataFrame to check the structure
print(df_online_spend.head())
print(df_total_spend.head())

In [None]:
import pandas as pd

# Data for online spend
online_spend_data = {
    'year': [2019, 2020, 2021, 2022, 2023],
    'online_spend': [2.968316e+10, 3.057935e+10, 2.963259e+10, 2.219765e+10, 2.192451e+10]
}

# Data for total spend
total_spend_data = {
    'year': [2019, 2020, 2021, 2022, 2023],
    'spend': [4.162683e+10, 3.659158e+10, 3.586756e+10, 3.485071e+10, 3.577281e+10]
}

# Create dataframes
df_online_spend = pd.DataFrame(online_spend_data)
df_total_spend = pd.DataFrame(total_spend_data)

# Merge the dataframes on 'year'
merged_spend = pd.merge(df_online_spend[['year', 'online_spend']], df_total_spend[['year', 'spend']], on='year', how='inner')

# Calculate the online spend ratio (as a percentage)
merged_spend['online_spend_ratio'] = (merged_spend['online_spend'] / merged_spend['spend']) * 100

# Display the result
print(merged_spend[['year', 'online_spend_ratio']])

In [None]:
import pandas as pd
import plotly.express as px

# Data for the online spend ratio
data = {
    'year': [2019, 2020, 2021, 2022, 2023],
    'online_spend_ratio': [71.307760, 83.569362, 82.616688, 63.693537, 61.288196]
}

# Create dataframe
df = pd.DataFrame(data)

# Create the Bar Chart
fig_bar = px.bar(df, 
                 x='year', 
                 y='online_spend_ratio', 
                 title='Online Spend Ratio by Year (2019 - 2023)', 
                 labels={'online_spend_ratio': 'Online Spend Ratio (%)', 'year': 'Year'}, 
                 color='year', 
                 text='online_spend_ratio')

# Customize the bar chart
fig_bar.update_traces(texttemplate='%{text:.2f}%', textposition='outside', marker=dict(line=dict(width=1, color='black')))
fig_bar.update_layout(xaxis=dict(tickmode='linear'), yaxis=dict(range=[0, 100]))

# Show the bar chart
fig_bar.show()

In [None]:
import pandas as pd
import plotly.express as px

# Data for the online spend ratio
data = {
    'year': [2019, 2020, 2021, 2022, 2023, 2024],
    'online_spend_ratio': [71.307760, 83.569362, 82.616688, 63.693537, 61.288196, 60.58829345]
}

# Create dataframe
df = pd.DataFrame(data)

# Create the Bar Chart
fig_bar = px.bar(df, 
                 x='year', 
                 y='online_spend_ratio', 
                 title='Online Spend Ratio by Year (2019 - 2023)', 
                 labels={'online_spend_ratio': 'Online Spend Ratio (%)', 'year': 'Year'}, 
                 color='year', 
                 text='online_spend_ratio')

# Customize the bar chart
fig_bar.update_traces(texttemplate='%{text:.2f}%', textposition='outside', marker=dict(line=dict(width=1, color='black')))
fig_bar.update_layout(xaxis=dict(tickmode='linear'), yaxis=dict(range=[0, 100]))

# Show the bar chart
fig_bar.show()

In [None]:
# Merge the two DataFrames on 'year'
merged_spend = pd.merge(df_online_spend[['year', 'online_spend']], df_total_spend[['year', 'spend']], on='year', how='inner')

# Display the merged DataFrame to verify
print(merged_spend.head())


In [None]:
#Adjusted to Non B2B

# Merge the two DataFrames on 'year'
merged_spend = pd.merge(df_yearly_spend_total[['year', 'online_spend']], df_total_spend[['year', 'spend']], on='year', how='inner')

# Display the merged DataFrame to verify
print(merged_spend.head())


In [None]:
# Calculate the online spend ratio (as a percentage)
merged_spend['online_spend_ratio'] = (merged_spend['online_spend'] / merged_spend['spend']) * 100

# Display the DataFrame with the new ratio column
print(merged_spend[['year', 'online_spend_ratio']])


In [None]:
# Merge df_online_spend and df_yearly_spend_total, then include df_total_spend for the total spend
merged_spend = pd.merge(df_online_spend[['year', 'online_spend']], df_yearly_spend_total[['year', 'online_spend']], on='year', how='inner')
merged_spend = pd.merge(merged_spend, df_total_spend[['year', 'spend']], on='year', how='inner')

# Display the merged DataFrame to verify
print(merged_spend.head())

# Calculate the B2B ratio
merged_spend['b2b_ratio'] = (merged_spend['online_spend_x'] - merged_spend['online_spend_y']) / merged_spend['spend'] * 100

# Display the resulting DataFrame with the B2B ratio
print(merged_spend[['year', 'b2b_ratio']])


In [None]:
# Save the result to a new CSV file
merged_spend.to_csv('UK_yearly_online_spend_abroad_ratio.csv', index=False)

# Display a success message
print("The online spend ratio has been saved to 'UK_yearly_online_spend_abroad_ratio.csv'.")


In [None]:
import plotly.express as px
import pandas as pd

# Data for abroad spend
data_abroad_spend = {
    'year': [2019, 2020, 2021, 2022, 2023, 2024],
    'abroad_spend': [4.162683e+10, 3.659158e+10, 3.586756e+10, 3.485071e+10, 3.577281e+10, 3.833597e+10]
}

# Data for online spend
data_online_spend = {
    'year': [2019, 2020, 2021, 2022, 2023, 2024],
    'online_spend': [2.968316e+10, 3.057935e+10, 2.963259e+10, 2.219765e+10, 2.192451e+10, 2.322711e+10]
}

# Create dataframes
df_abroad_spend = pd.DataFrame(data_abroad_spend)
df_online_spend = pd.DataFrame(data_online_spend)

# Merge the dataframes on 'year'
df = pd.merge(df_abroad_spend, df_online_spend, on='year')

# Create the line chart for both abroad and online spend
fig_line = px.line(df, 
                   x='year', 
                   y=['abroad_spend', 'online_spend'], 
                   title='UK Yearly Abroad Spend vs Abroad Online Spend 2019 - 2024',
                   labels={'year': 'Year', 'abroad_spend': 'Abroad Spend (£)', 'online_spend': 'Online Spend (£)'},
                   markers=True)  # Add markers at each data point

# Customize the line chart appearance
fig_line.update_traces(line=dict(width=3), marker=dict(size=7, color='red'))
fig_line.update_layout(xaxis=dict(tickmode='linear'), yaxis=dict(title='Spend (£)'))

# Show the line chart
fig_line.show()