In [None]:
project_path = "/home/jupyter"
import os
import sys

sys.path.append(project_path)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime
from google.cloud import bigquery

from fintrans_toolbox.src import bq_utils as bq
from fintrans_toolbox.src import table_utils as t

import ft_digital_trade.src.utils.read_data as read_utils
import ft_digital_trade.src.utils.clean_utils as clean_utils
import ft_digital_trade.src.utils.calculation_utils as calc_utils
import ft_digital_trade.src.utils.plot_utils as plot_utils

client = bigquery.Client()

In [None]:
# Calculating Visa marketshare drop-off using change in cardholders over time
# Looks at how total number of UK cardholders in the dataset changes over time to scale each category of spend

cardholders = '''SELECT time_period_value, sum(cardholders) as total_cardholders
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
WHERE time_period = 'Month' 
  AND mcg = 'All'
  AND mcc = 'All'
  AND merchant_channel = 'All'
  AND cardholder_origin = 'UNITED KINGDOM'
  AND cardholder_origin_country = 'All'
  GROUP BY time_period_value
  ORDER BY time_period_value ASC
'''
cardholders_total = bq.read_bq_table_sql(client, cardholders)
#cardholders_total

base_cardholders = cardholders_total['total_cardholders'].iloc[0]
#base_cardholder

cardholders_total['Change from Base'] = (base_cardholders / cardholders_total['total_cardholders'])
cardholders_total # Change from Base column can now be multiplied against each quarter's spend values to adjust the spend for Visa's marketshare

In [None]:
# Calculating Total online spend ratio over time

In [None]:
# Total spend by UK cardholders
total_spend = '''SELECT time_period_value, sum(spend) as total_spend
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
WHERE time_period = 'Month' 
  AND mcg = 'All' 
  AND mcc = 'All'
  AND merchant_channel = 'All'
  AND cardholder_origin = 'UNITED KINGDOM'
  AND cardholder_origin_country = 'All' 
GROUP BY time_period_value
ORDER BY time_period_value ASC'''
spend_total = bq.read_bq_table_sql(client, total_spend)
#spend_total.head()

In [None]:
# Total online spend by UK cardholders
total_online_spend = '''SELECT time_period_value, sum(spend) as total_online_spend
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
WHERE time_period = 'Month' 
  AND mcg = 'All' 
  AND mcc = 'All'
  AND merchant_channel = 'Online'
  AND cardholder_origin = 'UNITED KINGDOM'
  AND cardholder_origin_country = 'All'
GROUP BY time_period_value
ORDER BY time_period_value ASC'''
online_spend = bq.read_bq_table_sql(client, total_online_spend)
online_spend.head()

In [None]:
# Merging the 2 totals together
online_ratio = pd.merge(spend_total, online_spend, on='time_period_value')

# Calculating the % of spend that is online in each quarter
online_ratio["Online %"] = (online_ratio["total_online_spend"] / online_ratio["total_spend"]) * 100

online_ratio

In [None]:
online_ratio['6-month Moving Average'] = online_ratio["Online %"].rolling(window=6).mean()
online_ratio

In [None]:
fig = px.line(
        online_ratio,
        x="time_period_value",
        y=["Online %", "6-month Moving Average"],
    )
fig

In [None]:
# Difference between 202501 and 201901
starting_month = "201906"
finishing_month = "202506"

online_start = online_ratio


# Extract values directly
online_start_value = online_ratio.loc[online_ratio['time_period_value'] == starting_month, '12-month Moving Average'].values[0]
online_end_value = online_ratio.loc[online_ratio['time_period_value'] == finishing_month, '12-month Moving Average'].values[0]

difference = online_end_value - online_start_value
difference

In [None]:
# Average online % from 2022 onwards
# Creating temporary copies to avoid SettingWithCopyWarning
temp_df = online_ratio.copy()

# Extract year and quarter
temp_df['year'] = temp_df['time_period_value'].str[:4].astype(int)
temp_df['month'] = temp_df['time_period_value'].str[-2:].str.replace('M', '').astype(int)

# Filter for 2022Q1 and later
temp_df = temp_df[(temp_df['year'] > 2021) | ((temp_df['year'] == 2022) & (temp_df['month'] >= 1))]

# Drop helper columns
temp_df = temp_df.drop(columns=['year', 'month'])

avg_online_ratio = temp_df["Online %"].mean()
avg_online_ratio

In [None]:
# Calculating the top mcg drivers in each quarter specifically for Q1's after 2022

In [None]:
# Getting a total online spend by mcg, by UK cardholders
online_by_mcg = '''SELECT time_period_value, sum(spend) as online_spend, mcg
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
WHERE time_period = 'Month'
  AND mcc = 'All'
  AND merchant_channel = 'Online'
  AND cardholder_origin = 'UNITED KINGDOM'
  AND cardholder_origin_country = 'All'
GROUP BY time_period_value, mcg
ORDER BY time_period_value  ASC'''
online_by_mcg_df = bq.read_bq_table_sql(client, online_by_mcg)
#online_by_mcg_df

In [None]:
# Getting a total f2f spend by mcg, by UK cardholders
f2f_by_mcg = '''SELECT time_period_value, sum(spend) as f2f_spend, mcg
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
WHERE time_period = 'Month'
  AND mcc = 'All'
  AND merchant_channel = 'Face to Face'
  AND cardholder_origin = 'UNITED KINGDOM'
  AND cardholder_origin_country = 'All'
GROUP BY time_period_value, mcg
ORDER BY time_period_value  ASC'''
f2f_by_mcg_df = bq.read_bq_table_sql(client, f2f_by_mcg)
#f2f_by_mcg_df

In [None]:
# Adjusting the monthly values for online spend values
merged_df = online_by_mcg_df.merge(cardholders_total, on='time_period_value', how='left')
merged_df['adjusted_spend'] = merged_df['online_spend'] * merged_df['Change from Base']

online_by_mcg_df['adjusted_online_spend'] = merged_df['adjusted_spend']
online_by_mcg_df

In [None]:
# Adjusting the quarterly values for f2f spend values
merged_df_f2f = f2f_by_mcg_df.merge(cardholders_total, on='time_period_value', how='left')
merged_df_f2f['adjusted_spend'] = merged_df_f2f['f2f_spend'] * merged_df_f2f['Change from Base']

f2f_by_mcg_df['adjusted_f2f_spend'] = merged_df_f2f['adjusted_spend']
#f2f_by_mcg_df

In [None]:
# Summing everything up on a monthly basis and formatting the quarterly data - for online values
monthly_totals = online_by_mcg_df.sort_values(by =["mcg", "time_period_value"])

monthly_totals

In [None]:
# Summing everything up on a yearly basis and formatting the quarterly data - for f2f values
monthly_totals_f2f = f2f_by_mcg_df.sort_values(by =["mcg", "time_period_value"])

#monthly_totals_f2f

In [None]:
# Calculating change values for online spending
monthly_totals['nominal_change'] = monthly_totals.groupby('mcg')['adjusted_online_spend'].diff()
monthly_totals['percent_change'] = monthly_totals.groupby('mcg')['adjusted_online_spend'].pct_change() * 100

monthly_totals

In [None]:
# Calculating change values for f2f spending
monthly_totals_f2f['nominal_change'] = monthly_totals_f2f.groupby('mcg')['adjusted_f2f_spend'].diff()
monthly_totals_f2f['percent_change'] = monthly_totals_f2f.groupby('mcg')['adjusted_f2f_spend'].pct_change() * 100

#monthly_totals_f2f

In [None]:
all_nominal = monthly_totals[monthly_totals['mcg'] == 'All'][['time_period_value', 'nominal_change']].rename(columns={'nominal_change': 'all_nominal_change'})
all_nominal

In [None]:
all_nominal_f2f = monthly_totals_f2f[monthly_totals_f2f['mcg'] == 'All'][['time_period_value', 'nominal_change']].rename(columns={'nominal_change': 'all_nominal_change'})
#all_nominal_f2f

In [None]:
# Merge to get 'All' nominal change for each year back in table
monthly_totals = monthly_totals.merge(all_nominal, on='time_period_value', how='left')

# Calculate contribution to 'All' nominal change
monthly_totals['contribution_to_all_change'] = (monthly_totals['nominal_change'] / monthly_totals['all_nominal_change']) * 100
monthly_totals

In [None]:
# Merge to get 'All' nominal change for each year back in table
monthly_totals_f2f = monthly_totals_f2f.merge(all_nominal_f2f, on='time_period_value', how='left')

# Calculate contribution to 'All' nominal change
monthly_totals_f2f['contribution_to_all_change'] = (monthly_totals_f2f['nominal_change'] / monthly_totals_f2f['all_nominal_change']) * 100
#monthly_totals_f2f

In [None]:
#quarterly_totals.to_csv('online_mcg_totals_quarterly.csv')
#quarterly_totals_f2f.to_csv('f2f_mcg_totals_quarterly.csv')

In [None]:
# Summing everything up on a yearly basis and formatting the quarterly data
drivers_for_online_spend = monthly_totals #[["time_period_value", "mcg", "contribution_to_all_change","all_nominal_change","nominal_change"]]
drivers_for_online_spend = drivers_for_online_spend.sort_values(by = ["time_period_value", "contribution_to_all_change"])
drivers_for_online_spend

# Summing everything up on a yearly basis and formatting the quarterly data
drivers_for_f2f_spend = monthly_totals_f2f #[["time_period_value", "mcg", "contribution_to_all_change","all_nominal_change","nominal_change"]]
drivers_for_f2f_spend = drivers_for_f2f_spend.sort_values(by = ["time_period_value", "contribution_to_all_change"])
#drivers_for_f2f_spend

In [None]:
drivers_for_f2f_spend.to_csv('m_f2f_drivers.csv')
drivers_for_online_spend.to_csv('m_online_drivers.csv')

In [None]:
mcg_of_interest = ["All", "DEPARTMENT STORES", "DISCOUNT STORES", "APPAREL & ACCESSORIES", "TRAVEL SERVICES", "EDUCATION & GOVERNMENT", "AIRLINES"]
filtered_df = drivers_for_online_spend[drivers_for_online_spend['mcg'].isin(mcg_of_interest)]
filtered_df.to_csv('m_online_spend_by_mcg.csv')

In [None]:
f2f_mcg_of_interest = ["All", "DEPARTMENT STORES", "DISCOUNT STORES", "APPAREL & ACCESSORIES", "RESTAURANTS", "RETAIL GOODS", "FOOD & GROCERY"]
filtered_df_f2f = drivers_for_f2f_spend[drivers_for_f2f_spend['mcg'].isin(f2f_mcg_of_interest)]
filtered_df_f2f.to_csv('m_f2f_spend_by_mcg.csv')

In [None]:
# Calculating the average growth for each MCG in each month

# Creating temporary copies to avoid SettingWithCopyWarning
temp_df = filtered_df.copy()
temp_df_f2f = filtered_df_f2f.copy()

# Extract year and month
temp_df['year'] = temp_df['time_period_value'].str[:4].astype(int)
temp_df['month'] = temp_df['time_period_value'].str[-2:].str.replace('M', '').astype(int)
temp_df_f2f['year'] = temp_df_f2f['time_period_value'].str[:4].astype(int)
temp_df_f2f['month'] = temp_df_f2f['time_period_value'].str[-2:].str.replace('M', '').astype(int)

# Filter for 2022Q1 and later
temp_df = ((temp_df['year'] == 2021) & (temp_df['month'] >= 12))
temp_df_f2f = ((temp_df_f2f['year'] == 2021) & (temp_df_f2f['month'] >= 12))

# Drop helper columns
temp_df = temp_df.drop(columns=['year', 'month'])
temp_df_f2f = temp_df_f2f.drop(columns=['year', 'month'])

# Recalculate quarter for grouping
temp_df['month'] = temp_df['time_period_value'].str[-2:]
#temp_df_f2f['month'] = temp_df_f2f['time_period_value'].str[-2:]

# Group and calculate average percent_change
average_df = temp_df.groupby(['mcg', 'month'])[['percent_change', 'contribution_to_all_change']].mean().reset_index()
average_df_f2f = temp_df_f2f.groupby(['mcg', 'month'])[['percent_change', 'contribution_to_all_change']].mean().reset_index()

average_df

In [None]:
# Writing the average growth in each quarter for top online and f2f drivers to csv

# These tables show the average % change and average contribution to the total change in online or f2f spending each mcg made up in each quarter. 
# Only looks at 2022Q1 onwards
average_df.to_csv('M_online_drivers_avg_Q_growth.csv')
average_df_f2f.to_csv('M_f2f_drivers_avg_Q_growth.csv')