In [None]:
project_path = "/home/jupyter"
import os
import sys

sys.path.append(project_path)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime
from google.cloud import bigquery

from fintrans_toolbox.src import bq_utils as bq
from fintrans_toolbox.src import table_utils as t

client = bigquery.Client()

In [None]:
# Calculating Visa marketshare drop-off using change in cardholders over time
# Looks at how total number of UK cardholders in the dataset changes over time to scale each category of spend

# Added destination_country = "UK" so it only captures UK cardholders that actually used their card in the UK and doesn't double count cardholders that also spent money abroad
# May miss some UK cardholders that only spend money abroad, but that is likely an immaterial number of cardholders
cardholders = '''SELECT time_period_value, sum(cardholders) as total_cardholders
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
WHERE time_period = 'Month' 
  AND mcg = 'All' 
  AND mcc = 'All'
  AND merchant_channel = 'All'
  AND cardholder_origin = 'UNITED KINGDOM'
  AND cardholder_origin_country = 'All'
  AND destination_country = 'UNITED KINGDOM' 
  GROUP BY time_period_value
  ORDER BY time_period_value ASC
'''
cardholders_total = bq.read_bq_table_sql(client, cardholders)
#cardholders_total

base_cardholders = cardholders_total['total_cardholders'].iloc[0]
#base_cardholder

cardholders_total['Change from Base'] = (base_cardholders / cardholders_total['total_cardholders'])
#cardholders_total # Change from Base column can now be multiplied against each quarter's spend values to adjust the spend for Visa's marketshare

#########################################################
#                Graphs for section 4                   #
#########################################################

# Figure 1: Average spend per cardholder online vs face-to-face

In [None]:
# Total spend by UK cardholders
total_spend = '''SELECT time_period_value, sum(spend) as total_spend
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
WHERE time_period = 'Month' 
  AND mcg = 'All' 
  AND mcc = 'All'
  AND merchant_channel = 'All'
  AND cardholder_origin = 'UNITED KINGDOM'
  AND cardholder_origin_country = 'All' 
GROUP BY time_period_value
ORDER BY time_period_value ASC'''
spend_total = bq.read_bq_table_sql(client, total_spend)
#spend_total

In [None]:
# Total online spend by UK cardholders
total_online_spend = '''SELECT time_period_value, sum(spend) as total_online_spend
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
WHERE time_period = 'Month' 
  AND mcg = 'All' 
  AND mcc = 'All'
  AND merchant_channel = 'Online'
  AND cardholder_origin = 'UNITED KINGDOM'
  AND cardholder_origin_country = 'All'
GROUP BY time_period_value
ORDER BY time_period_value ASC'''
online_spend = bq.read_bq_table_sql(client, total_online_spend)
#online_spend.head()

In [None]:
# Total online spend by UK cardholders
total_f2f_spend = '''SELECT time_period_value, sum(spend) as total_f2f_spend
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
WHERE time_period = 'Month' 
  AND mcg = 'All' 
  AND mcc = 'All'
  AND merchant_channel = 'Face to Face'
  AND cardholder_origin = 'UNITED KINGDOM'
  AND cardholder_origin_country = 'All'
GROUP BY time_period_value
ORDER BY time_period_value ASC'''
f2f_spend = bq.read_bq_table_sql(client, total_f2f_spend)
f2f_spend.head()

In [None]:
# Merge tables together and check if sum of online+f2f = total (it does)
online_vs_f2f = pd.merge(online_spend, f2f_spend, on='time_period_value')
online_vs_f2f = pd.merge(online_vs_f2f, spend_total, on='time_period_value')
online_vs_f2f['Total_Spend'] = online_vs_f2f['total_online_spend'] + online_vs_f2f['total_f2f_spend']
online_vs_f2f['check'] = online_vs_f2f['Total_Spend'] - online_vs_f2f['total_spend']
#online_vs_f2f

In [None]:
# Drop the additional total spend figure and check columns
online_vs_f2f = online_vs_f2f.drop(columns=['total_spend', 'check'])
#online_vs_f2f

In [None]:
# Defining a function to calculate the average spend in 2019 for each category to use as the base value

def average_spend(df, column_name, year=2019):
    # Convert year to string for comparison
    year_str = str(year)
    
    # Filter rows where 'time_period_value' starts with the year
    filtered_df = df[df['time_period_value'].astype(str).str.startswith(year_str)]
    
    # Calculate and return the average
    return filtered_df[column_name].mean()

In [None]:
# Number of cardholders that spent money online
online_cardholders = '''SELECT time_period_value, sum(cardholders) as online_cardholders
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
WHERE time_period = 'Month' 
  AND mcg = 'All' 
  AND mcc = 'All'
  AND merchant_channel = 'Online'
  AND cardholder_origin = 'UNITED KINGDOM'
  AND cardholder_origin_country = 'All'
  AND destination_country = 'UNITED KINGDOM'
  GROUP BY time_period_value
  ORDER BY time_period_value ASC
'''
online_cardholders = bq.read_bq_table_sql(client, online_cardholders)
#online_cardholders

In [None]:
# Number of cardholders that spent money online
f2f_cardholders =  '''SELECT time_period_value, sum(cardholders) as f2f_cardholders
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
WHERE time_period = 'Month' 
  AND mcg = 'All' 
  AND mcc = 'All'
  AND merchant_channel = 'Face to Face'
  AND cardholder_origin = 'UNITED KINGDOM'
  AND cardholder_origin_country = 'All'
  AND destination_country = 'UNITED KINGDOM'
  GROUP BY time_period_value
  ORDER BY time_period_value ASC
'''
f2f_cardholders = bq.read_bq_table_sql(client, f2f_cardholders)
#f2f_cardholders

In [None]:
# Merge cardholder tables and spend table together
spend_per_cardholder = pd.merge(online_vs_f2f, online_cardholders, on='time_period_value')
spend_per_cardholder = pd.merge(spend_per_cardholder, f2f_cardholders, on='time_period_value')
spend_per_cardholder = pd.merge(spend_per_cardholder, cardholders_total, on='time_period_value')

# Calculate averages - currently using total cardholders instead of cardholders with transactions online/ f2f (need to check DSC method)
spend_per_cardholder["Average per Cardholder Online"] = spend_per_cardholder["total_online_spend"] / spend_per_cardholder["online_cardholders"]
spend_per_cardholder["Average per Cardholder F2F"] = spend_per_cardholder["total_f2f_spend"] / spend_per_cardholder["f2f_cardholders"]
spend_per_cardholder["Average per Cardholder Total"] = spend_per_cardholder["Total_Spend"] / spend_per_cardholder["total_cardholders"]

#spend_per_cardholder

In [None]:
# These index values don't match DSC publication, needs double checking
index_spend_per_cardholder = spend_per_cardholder
index_spend_per_cardholder['Avg per Cardholder Online Index'] = (index_spend_per_cardholder['Average per Cardholder Online'] / average_spend(index_spend_per_cardholder, 'Average per Cardholder Online')) * 100
index_spend_per_cardholder['Avg per Cardholder F2F Index'] = (index_spend_per_cardholder['Average per Cardholder F2F'] / average_spend(index_spend_per_cardholder, 'Average per Cardholder F2F')) * 100
index_spend_per_cardholder['Avg per Cardholder Index'] = (index_spend_per_cardholder['Average per Cardholder Total'] / average_spend(index_spend_per_cardholder, 'Average per Cardholder Total')) * 100
index_spend_per_cardholder

In [None]:
clean_index_per_cardholder = index_spend_per_cardholder
clean_index_per_cardholder = clean_index_per_cardholder.drop(columns=["total_online_spend", "total_f2f_spend", "Total_Spend", "online_cardholders", "f2f_cardholders", "total_cardholders",
                                                                      "Change from Base", "Average per Cardholder Online", "Average per Cardholder F2F", "Average per Cardholder Total"])
                                                             
clean_index_per_cardholder

In [None]:
melted_df = clean_index_per_cardholder.melt(
    id_vars="time_period_value",
    value_vars=[
        "Avg per Cardholder Online Index",
        "Avg per Cardholder F2F Index",
        "Avg per Cardholder Index"
    ],
    var_name="Merchant Channel Type",
    value_name="Value"
)
#melted_df.to_csv("Figure 1: Indexed spend per cardholder, by merchant channel.csv")

In [None]:
fig1 = px.line(
    melted_df,
    x="time_period_value",
    y="Value",
    color="Merchant Channel Type",
    title="Average Spend per Cardholder by Merchant Channel"
)
fig1.show()

# Figure 2:  Online vs Face-to-Face Spending 

In [None]:
# Merging the total and online spend tables together
online_ratio = pd.merge(spend_total, online_spend, on='time_period_value')

# Calculating the % of spend that is online in each quarter
online_ratio["Online %"] = (online_ratio["total_online_spend"] / online_ratio["total_spend"]) * 100

#online_ratio.to_csv("Figure 2: Ratio of total spend that is online.csv")

In [None]:
# Line chart for figure 2
fig2 = px.line(
        online_ratio,
        x="time_period_value",
        y=["Online %"],
    )
fig2

#########################################################
#                Analysis for section 4                 #
#########################################################

In [None]:
# Create function to calculate difference between 2 months for a certain value in a table
def nom_growth_between_months(starting_month, finishing_month, table, column):
    if starting_month == "2019 average":
        start_value = average_spend(table, column)
    else:
        start_value = table.loc[table['time_period_value'] == starting_month, column].values[0]
    
    end_value = table.loc[table['time_period_value'] == finishing_month, column].values[0]
    
    difference = end_value - start_value
    return difference

def pct_growth_between_months(starting_month, finishing_month, table, column):
    if starting_month == "2019 average":
        start_value = average_spend(table, column)
    else:
        start_value = table.loc[table['time_period_value'] == starting_month, column].values[0]
    
    end_value = table.loc[table['time_period_value'] == finishing_month, column].values[0]
    
    difference = end_value/ start_value - 1
    return difference

# Average spend per cardholder analysis


## This method needs checking as the values don't match DSC

In [None]:
# Calculate difference in the index for total, online, and f2f spending since start 
starting_month = "2019 average"
finishing_month = "202506"

total_difference = nom_growth_between_months(starting_month, finishing_month, index_spend_per_cardholder, "Avg per Cardholder Index")
online_difference = nom_growth_between_months(starting_month, finishing_month, index_spend_per_cardholder, "Avg per Cardholder Online Index")
f2f_difference = nom_growth_between_months(starting_month, finishing_month, index_spend_per_cardholder, "Avg per Cardholder F2F Index")

print(f"Difference in average spend per cardholder in {finishing_month} from {starting_month}: {total_difference}")
print(f"Difference in average spend online per cardholder spend in {finishing_month} from {starting_month}: {online_difference}")
print(f"Difference in average spend f2f per cardholder spend in {finishing_month} from {starting_month}: {f2f_difference}")

In [None]:
# Calculate % difference for total, online, and f2f spending since June 2021 
starting_month = "202106"
finishing_month = "202306"

total_difference = pct_growth_between_months(starting_month, finishing_month, index_spend_per_cardholder, "Avg per Cardholder Index")
online_difference = pct_growth_between_months(starting_month, finishing_month, index_spend_per_cardholder, "Avg per Cardholder Online Index")
f2f_difference = pct_growth_between_months(starting_month, finishing_month, index_spend_per_cardholder, "Avg per Cardholder F2F Index")

print(f"Percentage difference in average spend per cardholder in {finishing_month} in comparison with {starting_month}: {total_difference}")
print(f"Percentage difference in average spend online per cardholder spend in {finishing_month} in comparison with {starting_month}: {online_difference}")
print(f"Percentage difference in average spend f2f per cardholder spend in {finishing_month} in comparison with {starting_month}: {f2f_difference}")

## Average transaction value analysis 

### These values also don't match with what was published by DSC ###

In [None]:
# Number of transactions that were made online and the total online spend
online_transactions = '''SELECT time_period_value, sum(transactions) as online_transactions, sum(spend) as online_spend
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
WHERE time_period = 'Month' 
  AND mcg = 'All' 
  AND mcc = 'All'
  AND merchant_channel = 'Online'
  AND cardholder_origin = 'UNITED KINGDOM'
  AND cardholder_origin_country = 'All'
  GROUP BY time_period_value
  ORDER BY time_period_value ASC
'''
online_transactions = bq.read_bq_table_sql(client, online_transactions)
online_transactions = pd.merge(online_transactions, online_cardholders, on="time_period_value", how="outer")
online_transactions

In [None]:
# Number of transactions that were made f2f and the total f2f spend
f2f_transactions = '''SELECT time_period_value, sum(transactions) as f2f_transactions, sum(spend) as f2f_spend
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
WHERE time_period = 'Month' 
  AND mcg = 'All' 
  AND mcc = 'All'
  AND merchant_channel = 'Face to Face'
  AND cardholder_origin = 'UNITED KINGDOM'
  AND cardholder_origin_country = 'All'
  GROUP BY time_period_value
  ORDER BY time_period_value ASC
'''
f2f_transactions = bq.read_bq_table_sql(client, f2f_transactions)
f2f_transactions = pd.merge(f2f_transactions, f2f_cardholders, on="time_period_value", how="outer")
f2f_transactions

In [None]:
# Calculating the average spend per transaction online and f2f
spend_per_transaction = pd.merge(online_transactions, f2f_transactions, on="time_period_value", how="outer")

spend_per_transaction["Average per Transaction Online"] = spend_per_transaction["online_spend"] / spend_per_transaction["online_transactions"]
spend_per_transaction["Average per Transaction F2F"] = spend_per_transaction["f2f_spend"] / spend_per_transaction["f2f_transactions"]

spend_per_transaction

In [None]:
# Calculating the average number of transactions per cardholder
spend_per_transaction["Average Number of Transactions per Cardholder Online"] = spend_per_transaction["online_transactions"] / spend_per_transaction["online_cardholders"] 
spend_per_transaction["Average Number of Transactions per Cardholder F2F"] = spend_per_transaction["f2f_transactions"] / spend_per_transaction["f2f_cardholders"]

spend_per_transaction

In [None]:
# Calculating the average transaction value per cardholder
spend_per_transaction["Average Transaction Value per Cardholder Online"] = spend_per_transaction["Average per Transaction Online"] / spend_per_transaction["Average Number of Transactions per Cardholder Online"]
spend_per_transaction["Average Transaction Value per Cardholder F2F"] = spend_per_transaction["Average per Transaction F2F"] / spend_per_transaction["Average Number of Transactions per Cardholder F2F"]

spend_per_transaction

In [None]:
# Calculating the index for each average transaction value per cardholder and number of transactions
indexed_spend_per_transaction = pd.DataFrame()
indexed_spend_per_transaction["time_period_value"] = spend_per_transaction["time_period_value"]

indexed_spend_per_transaction["Avg per Transaction per Cardholder Online Index"] = (spend_per_transaction['Average Transaction Value per Cardholder Online'] / average_spend(spend_per_transaction, 'Average Transaction Value per Cardholder Online')) * 100
indexed_spend_per_transaction["Avg per Transaction per Cardholder F2F Index"] = (spend_per_transaction['Average Transaction Value per Cardholder F2F'] / average_spend(spend_per_transaction, 'Average Transaction Value per Cardholder F2F')) * 100

indexed_spend_per_transaction['Avg Number of Transactions per Cardholder Online Index'] = (spend_per_transaction['Average Number of Transactions per Cardholder Online'] / average_spend(spend_per_transaction, 'Average Number of Transactions per Cardholder Online')) * 100
indexed_spend_per_transaction['Avg Number of Transactions per Cardholder F2F Index'] = (spend_per_transaction['Average Number of Transactions per Cardholder F2F'] / average_spend(spend_per_transaction, 'Average Number of Transactions per Cardholder F2F')) * 100

indexed_spend_per_transaction["Average per Transaction Online Index"] = (spend_per_transaction["Average per Transaction Online"] / average_spend(spend_per_transaction, 'Average per Transaction Online')) * 100
indexed_spend_per_transaction["Average per Transaction F2F Index"] = (spend_per_transaction["Average per Transaction F2F"] / average_spend(spend_per_transaction, 'Average per Transaction F2F')) * 100
indexed_spend_per_transaction

In [None]:
melted_df_2 = indexed_spend_per_transaction.melt(
    id_vars="time_period_value",
    value_vars=[
        "Average per Transaction Online Index",
        "Average per Transaction F2F Index",
        "Avg Number of Transactions per Cardholder Online Index",
        "Avg Number of Transactions per Cardholder F2F Index",
    ],
    var_name="Merchant Channel Type",
    value_name="Value"
)
melted_df_2
        #"Avg per Transaction per Cardholder Online Index",
        #"Avg per Transaction per Cardholder F2F Index",

In [None]:
fig10 = px.line(
    melted_df_2,
    x="time_period_value",
    y="Value",
    color="Merchant Channel Type",
    title="Average Spend per Transaction by Merchant Channel"
)
fig10.show()

In [None]:
# Calculate % difference for online and f2f average transaction value since start
starting_month = "2019 average"
finishing_month = "202506"

online_difference = pct_growth_between_months(starting_month, finishing_month, indexed_spend_per_transaction, "Average per Transaction Online Index")
f2f_difference = pct_growth_between_months(starting_month, finishing_month, indexed_spend_per_transaction, "Average per Transaction F2F Index")

print(f"Percentage difference in average spend online per transaction spend in {finishing_month} in comparison with {starting_month}: {online_difference}")
print(f"Percentage difference in average spend f2f per transaction spend in {finishing_month} in comparison with {starting_month}: {f2f_difference}")

In [None]:
# Calculate % difference for online and f2f average transaction value since June 2021
starting_month = "202106"
finishing_month = "202506"

online_difference = pct_growth_between_months(starting_month, finishing_month, indexed_spend_per_transaction, "Average per Transaction Online Index")
f2f_difference = pct_growth_between_months(starting_month, finishing_month, indexed_spend_per_transaction, "Average per Transaction F2F Index")

print(f"Percentage difference in average spend online per transaction spend in {finishing_month} in comparison with {starting_month}: {online_difference}")
print(f"Percentage difference in average spend f2f per transaction spend in {finishing_month} in comparison with {starting_month}: {f2f_difference}")

In [None]:
# Calculate % difference for average number of transactions per cardholder for both online and f2f spending since start
starting_month = "2019 average"
finishing_month = "202506"

online_difference = pct_growth_between_months(starting_month, finishing_month, indexed_spend_per_transaction, "Avg Number of Transactions per Cardholder Online Index")
f2f_difference = pct_growth_between_months(starting_month, finishing_month, indexed_spend_per_transaction, "Avg Number of Transactions per Cardholder F2F Index")

print(f"Percentage difference in average number of transactions per cardholder online in {finishing_month} in comparison with {starting_month}: {online_difference}")
print(f"Percentage difference in average number of transactions per cardholder f2f in {finishing_month} in comparison with {starting_month}: {f2f_difference}")

In [None]:
# Calculate % difference for average number of transactions per cardholder for both online and f2f spending since June 2021
starting_month = "202106"
finishing_month = "202506"

online_difference = pct_growth_between_months(starting_month, finishing_month, indexed_spend_per_transaction, "Avg Number of Transactions per Cardholder Online Index")
f2f_difference = pct_growth_between_months(starting_month, finishing_month, indexed_spend_per_transaction, "Avg Number of Transactions per Cardholder F2F Index")

print(f"Percentage difference in average number of transactions per cardholder online in {finishing_month} in comparison with {starting_month}: {online_difference}")
print(f"Percentage difference in average number of transactions per cardholder f2f in {finishing_month} in comparison with {starting_month}: {f2f_difference}")

# Online ratio analysis

In [None]:
# Calculate difference in online ratio spending since start (June 2025 vs June 2019)
starting_month = "201906"
finishing_month = "202506"

start_value = online_ratio.loc[online_ratio['time_period_value'] == starting_month, 'Online %'].values[0]
finishing_value = online_ratio.loc[online_ratio['time_period_value'] == finishing_month, 'Online %'].values[0]

difference = nom_growth_between_months(starting_month, finishing_month, online_ratio, "Online %")

print(f"Difference in online ratio from {finishing_month} in comparison with {starting_month}: {difference}")
print(f"Online ratio in {starting_month} = {start_value} and {finishing_month} = {finishing_value}")

# MCG Driver Analysis

In [None]:
# Getting a total online spend by mcg, by UK cardholders
online_by_mcg = '''SELECT time_period_value, sum(spend) as online_spend, mcg
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
WHERE time_period = 'Month'
  AND mcc = 'All'
  AND merchant_channel = 'Online'
  AND cardholder_origin = 'UNITED KINGDOM'
  AND cardholder_origin_country = 'All'
GROUP BY time_period_value, mcg
ORDER BY time_period_value  ASC'''
online_by_mcg_df = bq.read_bq_table_sql(client, online_by_mcg)
#online_by_mcg_df

In [None]:
# Getting a total f2f spend by mcg, by UK cardholders
f2f_by_mcg = '''SELECT time_period_value, sum(spend) as f2f_spend, mcg
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
WHERE time_period = 'Month'
  AND mcc = 'All'
  AND merchant_channel = 'Face to Face'
  AND cardholder_origin = 'UNITED KINGDOM'
  AND cardholder_origin_country = 'All'
GROUP BY time_period_value, mcg
ORDER BY time_period_value  ASC'''
f2f_by_mcg_df = bq.read_bq_table_sql(client, f2f_by_mcg)
#f2f_by_mcg_df

In [None]:
# Adjusting the monthly values for online spend values
merged_df = online_by_mcg_df.merge(cardholders_total, on='time_period_value', how='left')
merged_df['adjusted_spend'] = merged_df['online_spend'] * merged_df['Change from Base']

online_by_mcg_df['adjusted_online_spend'] = merged_df['adjusted_spend']
#online_by_mcg_df

In [None]:
# Adjusting the quarterly values for f2f spend values
merged_df_f2f = f2f_by_mcg_df.merge(cardholders_total, on='time_period_value', how='left')
merged_df_f2f['adjusted_spend'] = merged_df_f2f['f2f_spend'] * merged_df_f2f['Change from Base']

f2f_by_mcg_df['adjusted_f2f_spend'] = merged_df_f2f['adjusted_spend']
f2f_by_mcg_df

In [None]:
# Bringing F2F and Online by MCG values into one table
merged_df = pd.merge(online_by_mcg_df, f2f_by_mcg_df, on=["time_period_value", "mcg"], how ="inner")
merged_df                     

In [None]:
# Cleaning up table
cleaned_df = merged_df
cleaned_df = cleaned_df.drop(columns=["online_spend", "f2f_spend"])
cleaned_df

In [None]:
# Summing everything up on a monthly basis and formatting the quarterly data for adjusted online and f2f values
monthly_totals = cleaned_df.sort_values(by =["mcg", "time_period_value"])

monthly_totals

In [None]:
# Calculating change values for online and f2f spending
monthly_totals['online_nominal_change'] = monthly_totals.groupby('mcg')['adjusted_online_spend'].diff()
monthly_totals['online_percent_change'] = monthly_totals.groupby('mcg')['adjusted_online_spend'].pct_change() * 100

monthly_totals['f2f_nominal_change'] = monthly_totals.groupby('mcg')['adjusted_f2f_spend'].diff()
monthly_totals['f2f_percent_change'] = monthly_totals.groupby('mcg')['adjusted_f2f_spend'].pct_change() * 100

monthly_totals

In [None]:
# Grabbing the total change when mcg = 'All' for each channel
# Using mcg='All' instead of summing each mcg up 
all_nominal_online = monthly_totals[monthly_totals['mcg'] == 'All'][['time_period_value', 'online_nominal_change']].rename(columns={'online_nominal_change': 'MoM Change Online'})
all_nominal_f2f = monthly_totals[monthly_totals['mcg'] == 'All'][['time_period_value', 'f2f_nominal_change']].rename(columns={'f2f_nominal_change': 'MoM Change F2F'})

all_nominal_f2f

In [None]:
# Merge to get 'All' nominal change for each merchant channel back in table
monthly_totals = pd.merge(monthly_totals, all_nominal_online, on='time_period_value', how='inner')
monthly_totals = pd.merge(monthly_totals, all_nominal_f2f, on='time_period_value', how='left')
#merged_df = pd.merge(online_by_mcg_df, f2f_by_mcg_df, on=["time_period_value", "mcg"], how ="inner")

# Calculate contribution to 'All' nominal change
monthly_totals['contribution_to_online_change'] = (monthly_totals['online_nominal_change'] / monthly_totals['MoM Change Online']) * 100
monthly_totals['contribution_to_f2f_change'] = (monthly_totals['f2f_nominal_change'] / monthly_totals['MoM Change F2F']) * 100
monthly_totals
#print(monthly_totals.columns.tolist())

In [None]:
wanted_columns = ['time_period_value', 'mcg', 'contribution_to_online_change','MoM Change Online', 'online_nominal_change', "online_percent_change"]
drivers_for_online_spend = monthly_totals[wanted_columns]
drivers_for_online_spend = drivers_for_online_spend.sort_values(by = ["time_period_value", "contribution_to_online_change"])

drivers_for_online_spend

In [None]:
wanted_columns = ['time_period_value', 'mcg', 'contribution_to_f2f_change', 'MoM Change F2F', 'f2f_nominal_change', "f2f_percent_change"]
drivers_for_f2f_spend = monthly_totals[wanted_columns]
drivers_for_f2f_spend = drivers_for_f2f_spend.sort_values(by = ["time_period_value", "contribution_to_f2f_change"])
drivers_for_f2f_spend

In [None]:
mcg_of_interest = ["All", "DEPARTMENT STORES", "DISCOUNT STORES", "APPAREL & ACCESSORIES", "TRAVEL SERVICES", "EDUCATION & GOVERNMENT", "AIRLINES"]
filtered_df = drivers_for_online_spend[drivers_for_online_spend['mcg'].isin(mcg_of_interest)]
filtered_df

In [None]:
f2f_mcg_of_interest = ["All", "DEPARTMENT STORES", "DISCOUNT STORES", "APPAREL & ACCESSORIES", "RESTAURANTS", "RETAIL GOODS", "FOOD & GROCERY"]
filtered_df_f2f = drivers_for_f2f_spend[drivers_for_f2f_spend['mcg'].isin(f2f_mcg_of_interest)]
filtered_df_f2f

In [None]:
# Calculating the average growth for each MCG in each month from 202201 onwards

# Creating temporary copies to avoid SettingWithCopyWarning
temp_df = filtered_df.copy()
temp_df_f2f = filtered_df_f2f.copy()

# Extract year and month
temp_df['year'] = temp_df['time_period_value'].str[:4].astype(int)
temp_df['month'] = temp_df['time_period_value'].str[-2:].str.replace('M', '').astype(int)
temp_df_f2f['year'] = temp_df_f2f['time_period_value'].str[:4].astype(int)
temp_df_f2f['month'] = temp_df_f2f['time_period_value'].str[-2:].str.replace('M', '').astype(int)

# Filter for 202201 and later
temp_df = temp_df[(temp_df['year'] > 2021)]
temp_df_f2f = temp_df_f2f[(temp_df_f2f['year'] > 2021)] 

# Recalculate month for grouping
temp_df['month'] = temp_df['time_period_value'].str[-2:]
temp_df_f2f['month'] = temp_df_f2f['time_period_value'].str[-2:]

# Group and calculate average percent_change in each month
average_df_2022 = temp_df.groupby(['mcg', 'month'])[['online_percent_change', 'contribution_to_online_change']].mean().reset_index()
average_df_f2f_2022 = temp_df_f2f.groupby(['mcg', 'month'])[['f2f_percent_change', 'contribution_to_f2f_change']].mean().reset_index()

average_df_2022

In [None]:
# Line chart showing average growth in online spend in each month for selected MCGs from 2022 onwards
fig4 = px.line(
        average_df_2022,
        x="month",
        y=["online_percent_change"],
    color = "mcg"
    )
fig4

In [None]:
# Line chart showing average growth in f2f spend in each month for selected MCGs from 2022 onwards
fig5 = px.line(
        average_df_f2f_2022,
        x="month",
        y=["f2f_percent_change"],
    color = "mcg"
    )
fig5

In [None]:
# Calculating the average growth for each MCG in each month from 202106 onwards (post pandemic period)
# Specifically being done so we avoid the start of the 2021 year but capture the MoM changes in September and December of 2021

# Creating temporary copies to avoid SettingWithCopyWarning
temp_df = filtered_df.copy()
temp_df_f2f = filtered_df_f2f.copy()

# Extract year and month
temp_df['year'] = temp_df['time_period_value'].str[:4].astype(int)
temp_df['month'] = temp_df['time_period_value'].str[-2:].str.replace('M', '').astype(int)
temp_df_f2f['year'] = temp_df_f2f['time_period_value'].str[:4].astype(int)
temp_df_f2f['month'] = temp_df_f2f['time_period_value'].str[-2:].str.replace('M', '').astype(int)

# Filter for 202201 and later
temp_df = temp_df[(temp_df['year'] > 2021) | ((temp_df['year'] >= 2021) & (temp_df['month'] >= 6))]
temp_df_f2f = temp_df_f2f[(temp_df_f2f['year'] > 2021) | ((temp_df_f2f['year'] >= 2021) & (temp_df_f2f['month'] >= 6))]

# Recalculate month for grouping
temp_df['month'] = temp_df['time_period_value'].str[-2:]
temp_df_f2f['month'] = temp_df_f2f['time_period_value'].str[-2:]

# Group and calculate average percent_change in each month
average_df_202106 = temp_df.groupby(['mcg', 'month'])[['online_percent_change', 'contribution_to_online_change']].mean().reset_index()
average_df_f2f_202106 = temp_df_f2f.groupby(['mcg', 'month'])[['f2f_percent_change', 'contribution_to_f2f_change']].mean().reset_index()

average_df_202106

In [None]:
# Line chart showing average growth in online spend in each month for selected MCGs from 202106 onwards
fig6 = px.line(
        average_df_202106,
        x="month",
        y=["online_percent_change"],
    color = "mcg"
    )
fig6

In [None]:
# Line chart showing average growth in f2f spend in each month for selected MCGs from 202106 onwards
fig7 = px.line(
        average_df_f2f_202106,
        x="month",
        y=["f2f_percent_change"],
    color = "mcg"
    )
fig7