In [None]:
project_path = "/home/jupyter"
import os
import sys

sys.path.append(project_path)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime
from google.cloud import bigquery

from fintrans_toolbox.src import bq_utils as bq
from fintrans_toolbox.src import table_utils as t

import ft_digital_trade.src.utils.read_data as read_utils
import ft_digital_trade.src.utils.clean_utils as clean_utils
import ft_digital_trade.src.utils.calculation_utils as calc_utils
import ft_digital_trade.src.utils.plot_utils as plot_utils

client = bigquery.Client()

In [None]:
# Calculating Visa marketshare drop-off using change in cardholders over time
# Looks at how total number of UK cardholders in the dataset changes over time to scale each category of spend

cardholders = '''SELECT time_period_value, sum(cardholders) as total_cardholders
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
WHERE time_period = 'Quarter' 
  AND mcg = 'All'
  AND mcc = 'All'
  AND merchant_channel = 'All'
  AND cardholder_origin = 'UNITED KINGDOM'
  AND cardholder_origin_country = 'All'
  GROUP BY time_period_value
  ORDER BY time_period_value ASC
'''
cardholders_total = bq.read_bq_table_sql(client, cardholders)
#cardholders_total

base_cardholders = cardholders_total['total_cardholders'].iloc[0]
#base_cardholder

cardholders_total['Change from Base'] = (base_cardholders / cardholders_total['total_cardholders'])
cardholders_total # Change from Base column can now be multiplied against each quarter's spend values to adjust the spend for Visa's marketshare

In [None]:
# Total online spend in foreign countries by UK cardholders
total_online_spend_abroad = '''SELECT time_period_value, sum(spend) as total_online_spend_abroad
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
WHERE time_period = 'Quarter' 
  AND mcg = 'All'
  AND mcc = 'All'
  AND merchant_channel = 'Online'
  AND cardholder_origin = 'UNITED KINGDOM'
  AND cardholder_origin_country = 'All'
  AND destination_country != 'UNITED KINGDOM' 
GROUP BY time_period_value
ORDER BY time_period_value ASC'''
online_spend_abroad = bq.read_bq_table_sql(client, total_online_spend_abroad)
online_spend_abroad.head()

In [None]:
online_spend_abroad["Adjusted Total"] = online_spend_abroad["total_online_spend_abroad"] * cardholders_total["Change from Base"]
online_spend_abroad["mcg"] = 'All' # Adding this in so the function borrowed from Analyis for section 5 doesn't need change
online_spend_abroad

In [None]:
# Total online spend in foreign countries by UK cardholders, broken down by MCC
by_mcc = '''SELECT time_period_value, sum(spend) as total_online_spend_abroad, mcc
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
WHERE time_period = 'Quarter' 
  AND mcc != 'All'
  AND merchant_channel = 'Online'
  AND cardholder_origin = 'UNITED KINGDOM'
  AND cardholder_origin_country = 'All'
  AND destination_country != 'UNITED KINGDOM' 
GROUP BY time_period_value, mcc
ORDER BY time_period_value ASC'''
online_abroad_by_mcc = bq.read_bq_table_sql(client, by_mcc)
online_abroad_by_mcc

In [None]:
# Creating an analysis function to apply to both datasets


def analyse_online_spend(df, total_spend):
    # Largest MCGs in the most recent quarter
    latest_quarter = df['time_period_value'].iloc[-1]
    most_recent = df[df['time_period_value'] == latest_quarter].copy()
    top_3_spend = most_recent.nlargest(3, 'total_online_spend_abroad')

    # Reformatting and renaming columns for clarity
    table = top_3_spend[[
        'mcc',
        'total_online_spend_abroad',
    ]].rename(columns={
        'mcc': 'MCC',
        'total_online_spend_abroad Total': 'Adjusted Spend'})

    print(f"\nTop 3 MCGs in {latest_quarter} by Adjusted Online+Abroad Spend:\n")
    print(table.to_string(index=False))
    print()

    
    # Annual analysis from 2019 to 2024
    df_copy = df.copy()
    df_copy['year'] = df_copy['time_period_value'].str[:4].astype(int)
    
    annual_data = df_copy.groupby(['year', 'mcc']).agg({
        'total_online_spend_abroad': 'sum'}).reset_index() #Average quarter % changes
    
    annual_data = annual_data[annual_data['year'] != 2025]
    
    annual_data = annual_data.sort_values(['mcc', 'year'])
    annual_data['yoy_spend_change'] = annual_data.groupby('mcc')['total_online_spend_abroad'].pct_change() * 100
    annual_data['yoy_nominal_change'] = annual_data.groupby('mcc')['total_online_spend_abroad'].diff()
    print(annual_data)                     
    
    # Filter for years 2019 and 2024
    data_2019 = annual_data[annual_data['year'] == 2019][['mcc', 'total_online_spend_abroad']].rename(columns={'total_online_spend_abroad': 'spend_2019'})
    data_2024 = annual_data[annual_data['year'] == 2024][['mcc', 'total_online_spend_abroad']].rename(columns={'total_online_spend_abroad': 'spend_2024'})

    # Merge the two datasets on 'mcg'
    growth_data = pd.merge(data_2019, data_2024, on='mcc')

    # Calculate the absolute and percentage change
    growth_data['absolute_change'] = growth_data['spend_2024'] - growth_data['spend_2019']
    growth_data['percent_change'] = (growth_data['absolute_change'] / growth_data['spend_2019']) * 100

    # Get top 5 MCGs with the highest increase
    top_gainers = growth_data.sort_values(by='absolute_change', ascending=False).head(5)

    # Get top 5 MCGs with the highest decrease
    top_losers = growth_data.sort_values(by='absolute_change').head(5)

    # Display the results
    print("Top 5 MCGs with the highest increase in adjusted online spend from 2019 to 2024:")
    print(top_gainers.to_string(index=False))

    print("\nTop 5 MCGs with the highest decrease in adjusted online spend from 2019 to 2024:")
    print(top_losers.to_string(index=False))
    print()
    
    # Largest YonY movements across time series
    biggest_percent_increases = annual_data.nlargest(10, 'yoy_spend_change')
    print("Top 10 biggest % increases:")
    for idx, row in biggest_percent_increases.iterrows():
        if not pd.isna(row['yoy_spend_change']):
            print(f"{row['mcc']} ({row['year']}): {row['yoy_spend_change']:+.1f}%")
    print()
    
    biggest_percent_decreases = annual_data.nsmallest(10, 'yoy_spend_change')
    print("Top 10 biggest % decreases:")
    for idx, row in biggest_percent_decreases.iterrows():
        if not pd.isna(row['yoy_spend_change']):
            print(f"{row['mcc']} ({row['year']}): {row['yoy_spend_change']:+.1f}%")
    print()

    # Filter total_spend to only include rows where mcg == 'All'
    total = total_spend[total_spend['mcg'] == 'All'][['time_period_value', 'Adjusted Total']]
    total = total.rename(columns={'Adjusted Total': 'total_adjusted_online_spend_abroad'})
    total_annual_spend = annual_data.groupby('year')['total_online_spend_abroad'].sum().reset_index()
    
    df_merged = df.merge(total, on='time_period_value', how='left')
    annual_df_merged = annual_data.merge(total_annual_spend, on='year', how='left')

    print(annual_df_merged)
    # Calculate each MCG's share of the total spend
    df_merged['mcg_share'] = (df_merged['total_online_spend_abroad'] / df_merged['total_adjusted_online_spend_abroad']) * 100
    annual_df_merged['mcg_share'] = (annual_df_merged['adjusted_online_spend_abroad'] / annual_df_merged['total_adjusted_online_spend']) * 100
    

    df_2019_2024 = annual_df_merged[annual_df_merged['year'].isin([2019, 2024])]

    # Pivot to get shares side by side
    pivot_df = df_2019_2024.pivot(index='mcg', columns='year', values='mcg_share').reset_index()

    # Calculate change from 2019 to 2024
    pivot_df['change_2019_to_2024'] = pivot_df[2024] - pivot_df[2019]
    print(pivot_df[['mcg', 'change_2019_to_2024']])


    return 
    
analyse_online_spend(online_abroad_by_mcc, online_spend_abroad)