In [None]:
project_path = "/home/jupyter"
import os
import sys

sys.path.append(project_path)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
from google.cloud import bigquery

from fintrans_toolbox.src import bq_utils as bq
from fintrans_toolbox.src import table_utils as t


client = bigquery.Client()


In [None]:
# Summarise the data by UK Cardholder Spending All Quarterly --------------- Cardholders' Number Total Quarterly ---- mcg = 'All'

UK_spending_by_mcg_All = '''SELECT time_period_value, cardholders, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcg = 'All' 
and merchant_channel = 'All' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
 
GROUP BY cardholders, destination_country, 
time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''
df_by_mcg_All = bq.read_bq_table_sql(client, UK_spending_by_mcg_All)
df_by_mcg_All.head()

# Caculate UK Domestic Total Spending Quarterly

# Assuming df_by_mcg_All is the DataFrame returned from the BigQuery query
# Then group by 'time_period_value' and sum the 'spend' for each quarter

# Check if df_by_mcg_All is not None and has the expected columns
if df_by_mcg_All is not None and 'time_period_value' in df_by_mcg_All.columns and 'spend' in df_by_mcg_All.columns:
    # Group by quarter and sum the spend
    UK_spending_by_mcg_All = df_by_mcg_All.groupby('time_period_value')['cardholders'].sum().reset_index()
   
 # Rename the column
    UK_spending_by_mcg_All = UK_spending_by_mcg_All.rename(columns={'cardholders': 'Spend_mcg_All_cardholders'})
    print(UK_spending_by_mcg_All)
else:
    print("DataFrame is empty or missing required columns.")

# Save the result to a CSV file
csv_filename = "UK_spending_by_mcg_All.csv"
UK_spending_by_mcg_All.to_csv(csv_filename, index=False)

print(f"CSV file '{csv_filename}' has been created successfully.")



In [None]:
# Summarise the data by UK Cardholder Domestic Spending All Quarterly

UK_spending_by_Dom_All = '''SELECT time_period_value, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcc = 'All' 
and mcg = 'All' 
and merchant_channel = 'All' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
and destination_country = 'UNITED KINGDOM' 
GROUP BY destination_country, 
time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''
df_by_Dom_All = bq.read_bq_table_sql(client, UK_spending_by_Dom_All)
df_by_Dom_All.head()

# Caculate UK Domestic Total Spending Quarterly

# Assuming df_by_Dom_All is the DataFrame returned from the BigQuery query
# Then group by 'time_period_value' and sum the 'spend' for each quarter

# Check if df_by_Dom_All is not None and has the expected columns
if df_by_Dom_All is not None and 'time_period_value' in df_by_Dom_All.columns and 'spend' in df_by_Dom_All.columns:
    # Group by quarter and sum the spend
    UK_spending_by_Dom_All = df_by_Dom_All.groupby('time_period_value')['spend'].sum().reset_index()
   
 # Rename the column
    UK_spending_by_Dom_All = UK_spending_by_Dom_All.rename(columns={'spend': 'Dom_spend_All'})
    print(UK_spending_by_Dom_All)
else:
    print("DataFrame is empty or missing required columns.")

# Save the result to a CSV file
csv_filename = "UK_spending_by_Dom_All.csv"
UK_spending_by_Dom_All.to_csv(csv_filename, index=False)

print(f"CSV file '{csv_filename}' has been created successfully.")

In [None]:
# Quaterly Domestic Spend Adjusted Quarterly Value ------------------------------ The Drop of Cardholders --- mcg = 'All'

# Fromula : Adjust_Quarterly_Dom_Spend = ( Dom_spend_All_cardholders (2019Q1) / Dom_spend_All_cardholders (quarter)) × Dom_spend_All (quarter)

import pandas as pd

# Load the two datasets
df_cardholders = pd.read_csv("UK_spending_by_mcg_All.csv")
df_all = pd.read_csv("UK_spending_by_Dom_All.csv")

# Get 2019Q1 values
cardholders_2019Q1 = df_cardholders.loc[df_cardholders["time_period_value"] == "2019Q1", "Spend_mcg_All_cardholders"].values[0]
# No NEED --- dom_all_2019Q1 = df_all.loc[df_all["time_period_value"] == "2019Q1", "Dom_spend_All"].values[0]

# Calculate Adjusted Quarterly Domestic Spend
df_cardholders["Adjust_Quarterly_Dom_Spend"] = (
    cardholders_2019Q1 / df_cardholders["Spend_mcg_All_cardholders"] 
) * df_all["Dom_spend_All"]

# Save or view the result
df_cardholders.to_csv("Adjusted_Dom_Spend_mcg.csv", index=False)
print(df_cardholders)



In [None]:
# Summarise the data by UK Cardholder Domestic Online Spending All

UK_spending_by_Dom_Online_All = '''SELECT time_period_value, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcc = 'All' 
and mcg = 'All' 
and merchant_channel = 'Online' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
and destination_country = 'UNITED KINGDOM' 
GROUP BY destination_country, 
time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''
df_by_Dom_Online_All = bq.read_bq_table_sql(client, UK_spending_by_Dom_Online_All)

df_by_Dom_Online_All.head()

# Sum UK Cardholder Domestic Online Spending All Quarterly
# Assuming df_by_Dom_Online_All is the DataFrame returned from the BigQuery query
# Then group by 'time_period_value' and sum the 'spend' for each quarter

# Check if df_by_Dom_Online_All is not None and has the expected columns
if df_by_Dom_Online_All is not None and 'time_period_value' in df_by_Dom_Online_All.columns and 'spend' in df_by_Dom_Online_All.columns:
    # Group by quarter and sum the spend
    UK_spending_by_Dom_Online_All = df_by_Dom_Online_All.groupby('time_period_value')['spend'].sum().reset_index()
   
 # Rename the column
    UK_spending_by_Dom_Online_All = UK_spending_by_Dom_Online_All.rename(columns={'spend': 'Dom_spend_Online_All'})
    print(UK_spending_by_Dom_Online_All)
else:
    print("DataFrame is empty or missing required columns.")
    
# Save the result to a CSV file
csv_filename = "UK_spending_by_Dom_Online_All.csv"
UK_spending_by_Dom_Online_All.to_csv(csv_filename, index=False)

print(f"CSV file '{csv_filename}' has been created successfully.")

In [None]:
# Quaterly Domestic Online Spend Adjusted Quarterly Value ------------------------------ The Drop of Cardholders --- mcg = 'All'

# Fromula : Adjust_Quarterly_Dom_Online_Spend = ( Dom_spend_All_cardholders (2019Q1) / Dom_spend_All_cardholders (quarter)) × Dom_spend_All (quarter)

import pandas as pd

# Load the two datasets
df_cardholders = pd.read_csv("UK_spending_by_mcg_All.csv")
df_all = pd.read_csv("UK_spending_by_Dom_Online_All.csv")

# Get 2025Q1 values
cardholders_2019Q1 = df_cardholders.loc[df_cardholders["time_period_value"] == "2019Q1", "Spend_mcg_All_cardholders"].values[0]
# NO NEED ---- dom_all_2025Q1 = df_all.loc[df_all["time_period_value"] == "2025Q1", "Dom_spend_Online_All"].values[0]

# Calculate Adjusted Quarterly Domestic Spend
df_cardholders["Adjust_Quarterly_Dom_Spend_Online"] = (
      cardholders_2019Q1 / df_cardholders["Spend_mcg_All_cardholders"] 
) * df_all["Dom_spend_Online_All"]


# Save or view the result
df_cardholders.to_csv("Adjusted_Dom_Spend_Online_mcg.csv", index=False)
print(df_cardholders)




In [None]:
# Adjusted Quaterly Domestic Online Spend Ratio ------------ when adjust both the online and total spending using the same base quarter (like 2025Q1), 
# it's applying the same scaling factor to both series. 
# This preserves the original ratio between them from that base quarter across all other quarters.

# So if both series are adjusted using the same method and base, the ratio between them will remain constant.

import pandas as pd

# Load the two CSV files
online_spending = pd.read_csv("Adjusted_Dom_Spend_Online_mcg.csv")
total_spending = pd.read_csv("Adjusted_Dom_Spend_mcg.csv")

# Merge the two DataFrames on 'time_period_value'
merged_df = pd.merge(online_spending, total_spending, on="time_period_value", how="inner")

# Calculate the online spending ratio
merged_df["online_spending_ratio"] = (merged_df["Adjust_Quarterly_Dom_Spend_Online"] / merged_df["Adjust_Quarterly_Dom_Spend"]) * 100

# Save the result to a new CSV file
merged_df[["time_period_value", "online_spending_ratio"]].to_csv("Q_Online_Spending_Ratio_Dom_Ad_mcg.csv", index=False)

print("Online spending ratio by quarter has been saved to 'Q_Online_Spending_Ratio_Dom_Ad_mcg.csv'.")

import pandas as pd

# Load the merged file with online spending ratio
df = pd.read_csv("Q_Online_Spending_Ratio_Dom_Ad_mcg.csv")

# Check if all values in the 'online_spending_ratio' column are the same
unique_ratios = df["online_spending_ratio"].unique()

# Output the unique values
unique_ratios


In [None]:
 # Filter Q1 2019 data ----------------- Dylan's Codes 
    #  change yearly_mcc_df to the one to investgate in and it should add a column for indexed based on 2019 number of carholders

q1_2019 = yearly_mcc_df[yearly_mcc_df["time_period_value"] == "2019Q1"]
 
# Create a lookup dictionary for Q1 2019 spend by (country, mcc)
q1_2019_lookup = q1_2019.set_index(["destination_country", "mcc"])["spend"].to_dict()
 
# Create a lookup dictionary for current spend by (country, mcc, quarter)
q1_cardholders_lookup = q1_2019.set_index(["destination_country", "mcc"])["cardholders"].to_dict()
 
# Function to calculate adjusted spend
def calculate_adjusted_spend(row):
    key = (row["destination_country"], row["mcc"])
    q1_cardholders = q1_cardholders_lookup.get(key)
    if q1_cardholders is not None and row["cardholders"] != 0:
        return (q1_cardholders / row["cardholders"]) * row["spend"]
    else:
        return None
# Apply the function to create the new column
yearly_mcc_df["adjusted_spend"] = yearly_mcc_df.apply(calculate_adjusted_spend, axis=1)

In [None]:
# Summarise the data by UK Cardholder Abroad Spending All Quarterly --------------- Cardholders' Number Total Quarterly --- NO NEED

UK_spending_by_Intl_All1 = '''SELECT time_period_value, cardholders, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcc = 'All' 
and merchant_channel = 'All' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
and destination_country != 'UNITED KINGDOM' 
GROUP BY cardholders, destination_country, 
time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''
df_by_Intl_All = bq.read_bq_table_sql(client, UK_spending_by_Intl_All1)
df_by_Intl_All.head()

# Caculate UK Abroad Total Spending Quarterly

# Assuming df_by_Intl_All is the DataFrame returned from the BigQuery query
# Then group by 'time_period_value' and sum the 'spend' for each quarter

# Check if df_by_Dom_All is not None and has the expected columns
if df_by_Intl_All is not None and 'time_period_value' in df_by_Intl_All.columns and 'spend' in df_by_Intl_All.columns:
    # Group by quarter and sum the spend
    UK_spending_by_Intl_All1 = df_by_Intl_All.groupby('time_period_value')['cardholders'].sum().reset_index()
   
 # Rename the column
    UK_spending_by_Intl_All1 = UK_spending_by_Intl_All1.rename(columns={'cardholders': 'Intl_spend_All_cardholders'})
    print(UK_spending_by_Intl_All1)
else:
    print("DataFrame is empty or missing required columns.")
    
# Save the result to a CSV file
csv_filename = "UK_spending_by_Intl_All1.csv"
UK_spending_by_Intl_All1.to_csv(csv_filename, index=False)

print(f"CSV file '{csv_filename}' has been created successfully.")


In [None]:
# Summarise the data by country - Abroad Total 
# Summarise the data by UK Cardholder Abroad Spending All

UK_spending_by_Intl_All = '''SELECT time_period_value, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcc = 'All' 
and mcg = 'All'
and merchant_channel = 'All' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
and destination_country != 'UNITED KINGDOM' 
GROUP BY destination_country, 
time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''
df_by_Intl_All = bq.read_bq_table_sql(client, UK_spending_by_Intl_All)
df_by_Intl_All = df_by_Intl_All.rename(columns={'spend': 'abroad_spend_all'})
df_by_Intl_All.head()

# UK Cardholder Household Spending Quarterly Abroad Totals

# Assuming df_by_Intl_All is the DataFrame returned from the BigQuery query
# Then group by 'time_period_value' and sum the 'abroad_spend_all' for each quarter

# Check if df_by_Intl_All is not None and has the expected columns
if df_by_Intl_All is not None and 'time_period_value' in df_by_Intl_All.columns and 'abroad_spend_all' in df_by_Intl_All.columns:
    # Group by quarter and sum the total_spend
    Q_spending_by_Intl_All = df_by_Intl_All.groupby('time_period_value')['abroad_spend_all'].sum().reset_index()
   
    print(Q_spending_by_Intl_All)
else:
    print("DataFrame is empty or missing required columns.")
    
    
import pandas as pd

# Check if df_by_Intl_All is defined and has the expected columns
if 'df_by_Intl_All' in globals() and df_by_Intl_All is not None and \
   'time_period_value' in df_by_Intl_All.columns and 'abroad_spend_all' in df_by_Intl_All.columns:
    
    # Group by quarter and sum the abroad_spend_all
    Q_spending_by_Intl_All = df_by_Intl_All.groupby('time_period_value')['abroad_spend_all'].sum().reset_index()
    
    # Save to CSV
    Q_spending_by_Intl_All.to_csv("Q_spending_by_Intl_All.csv", index=False)
    print("CSV file 'Q_spending_by_Intl_All.csv' has been created successfully.")
else:
    print("DataFrame is empty or missing required columns.")

In [None]:
# To calculate the total spend abroad increased % on average from 2022Q1 to 2025Q2

import pandas as pd

# Load the CSV file
df = pd.read_csv("Q_spending_by_Intl_All.csv")

# Ensure time_period_value is treated as string
df['time_period_value'] = df['time_period_value'].astype(str)

# Filter data from 2022Q1 to 2025Q2
quarters = ['2022Q1', '2022Q2', '2022Q3', '2022Q4',
            '2023Q1', '2023Q2', '2023Q3', '2023Q4',
            '2024Q1', '2024Q2', '2024Q3', '2024Q4',
            '2025Q1', '2025Q2']

df_filtered = df[df['time_period_value'].isin(quarters)].copy()

# Sort by time_period_value to ensure chronological order
df_filtered = df_filtered.sort_values(by='time_period_value')

# Calculate quarter-on-quarter percentage change
df_filtered['QoQ_%_Change'] = df_filtered['abroad_spend_all'].pct_change() * 100

# Calculate the average QoQ change excluding the first NaN
average_qoq_change = df_filtered['QoQ_%_Change'].dropna().mean()

print(f"Average quarter-on-quarter percentage change in abroad spending from 2022Q1 to 2025Q2: {average_qoq_change:.2f}%")



In [None]:
# Summarise the data by country - Abroad Total - Adjust Figures --- mcg = 'All'
# Summarise the data by UK Cardholder Abroad Spending All

# --------------------------- The Drop of Cardholders

# Fromula : Adjust_Quarterly_Intl_Spend = ( Intl_spend_All_cardholders (2019Q1) / Intl_spend_All_cardholders (quarter)) × Intl_spend_All (quarter)

import pandas as pd

# Load the two datasets
df_cardholders = pd.read_csv("UK_spending_by_mcg_All.csv")
df_all = pd.read_csv("Q_spending_by_Intl_All.csv")

# Get 2025Q1 values
cardholders_2019Q1 = df_cardholders.loc[df_cardholders["time_period_value"] == "2019Q1", "Spend_mcg_All_cardholders"].values[0]
#  No NEED ---- Intl_all_2025Q1 = df_all.loc[df_all["time_period_value"] == "2025Q1", "abroad_spend_all"].values[0]

# Calculate Adjusted Quarterly Domestic Spend
df_cardholders["Adjust_Quarterly_Intl_Spend"] = (
     cardholders_2019Q1 / df_cardholders["Spend_mcg_All_cardholders"]
) * df_all["abroad_spend_all"]

# Save or view the result
df_cardholders.to_csv("Adjusted_Intl_Spend_mcg.csv", index=False)
print(df_cardholders)



In [None]:
# Summarise the data by country - Abroad Total - Adjust Figures --- NO NEED
# Summarise the data by UK Cardholder Abroad Spending All

# --------------------------- The Drop of Cardholders

# Fromula : Adjust_Quarterly_Intl_Spend = ( Intl_spend_All_cardholders (2019Q1) / Intl_spend_All_cardholders (quarter)) × Intl_spend_All (quarter)

import pandas as pd

# Load the two datasets
df_cardholders = pd.read_csv("UK_spending_by_Intl_All1.csv")
df_all = pd.read_csv("Q_spending_by_Intl_All.csv")

# Get 2025Q1 values
cardholders_2019Q1 = df_cardholders.loc[df_cardholders["time_period_value"] == "2019Q1", "Intl_spend_All_cardholders"].values[0]
#  No NEED ---- Intl_all_2025Q1 = df_all.loc[df_all["time_period_value"] == "2025Q1", "abroad_spend_all"].values[0]

# Calculate Adjusted Quarterly Domestic Spend
df_cardholders["Adjust_Quarterly_Intl_Spend"] = (
     cardholders_2019Q1 / df_cardholders["Intl_spend_All_cardholders"]
) * df_all["abroad_spend_all"]

# Save or view the result
df_cardholders.to_csv("Adjusted_Intl_Spend.csv", index=False)
print(df_cardholders)



In [None]:
# Summarise the data by UK Cardholder Abroad Online Spending All Quarterly --------------- Cardholders' Number Total Quarterly --- NO NEED

UK_spending_by_Intl_Online1 = '''SELECT time_period_value, cardholders, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcc = 'All' 
and merchant_channel = 'Online' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
and destination_country != 'UNITED KINGDOM' 
GROUP BY cardholders, destination_country, 
time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''
df_by_Intl_Online = bq.read_bq_table_sql(client, UK_spending_by_Intl_Online1)
df_by_Intl_Online.head()

# Caculate UK Abroad Online Total Spending Quarterly

# Assuming df_by_Intl_Online is the DataFrame returned from the BigQuery query
# Then group by 'time_period_value' and sum the 'spend' for each quarter

# Check if df_by_Dom_All is not None and has the expected columns
if df_by_Intl_Online is not None and 'time_period_value' in df_by_Intl_Online.columns and 'spend' in df_by_Intl_Online.columns:
    # Group by quarter and sum the spend
    UK_spending_by_Intl_Online1 = df_by_Intl_Online.groupby('time_period_value')['cardholders'].sum().reset_index()
   
 # Rename the column
    UK_spending_by_Intl_Online1 = UK_spending_by_Intl_Online1.rename(columns={'cardholders': 'Intl_spend_Online_cardholders'})
    print(UK_spending_by_Intl_Online1)
else:
    print("DataFrame is empty or missing required columns.")
    
# Save the result to a CSV file
csv_filename = "UK_spending_by_Intl_Online1.csv"
UK_spending_by_Intl_Online1.to_csv(csv_filename, index=False)

print(f"CSV file '{csv_filename}' has been created successfully.")

In [None]:
# Summarise the data by country - Quarterly Abroad Online
# Summarise the data by UK Cardholder Abroad Online Spending All 

UK_spending_by_online_Intl_All = '''SELECT time_period_value, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcc = 'All' 
and mcg = 'All'
and merchant_channel = 'Online' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
and destination_country != 'UNITED KINGDOM' 
GROUP BY destination_country, 
time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''
df_by_online_Intl_All = bq.read_bq_table_sql(client, UK_spending_by_online_Intl_All)
df_by_online_Intl_All = df_by_online_Intl_All.rename(columns={'spend': 'online_Intl_All'})
df_by_online_Intl_All.head()

# UK Cardholder Household Online Spending Quarterly Abroad Totals

# Assuming df_by_online_Intl_All is the DataFrame returned from the BigQuery query
# Then group by 'time_period_value' and sum the 'total_spend' for each quarter

# Check if df_by_online_Intl_All is not None and has the expected columns
if df_by_online_Intl_All is not None and 'time_period_value' in df_by_online_Intl_All.columns and 'online_Intl_All' in df_by_online_Intl_All.columns:
    # Group by quarter and sum the total_spend
    UK_spending_by_online_Intl_All = df_by_online_Intl_All.groupby('time_period_value')['online_Intl_All'].sum().reset_index()
   
    print(UK_spending_by_online_Intl_All)
else:
    print("DataFrame is empty or missing required columns.")
    
import pandas as pd

# Check if df_by_online_Intl_All is defined and has the expected columns
if 'df_by_online_Intl_All' in globals() and df_by_online_Intl_All is not None and \
   'time_period_value' in df_by_online_Intl_All.columns and 'online_Intl_All' in df_by_online_Intl_All.columns:
    
    # Group by quarter and sum the abroad_spend_all
    Q_spending_by_online_Intl_All = df_by_online_Intl_All.groupby('time_period_value')['online_Intl_All'].sum().reset_index()
    
    # Save to CSV
    Q_spending_by_online_Intl_All.to_csv("Q_spending_by_Online_Intl_All.csv", index=False)
    print("CSV file 'Q_spending_by_Online_Intl_All.csv' has been created successfully.")
else:
    print("DataFrame is empty or missing required columns.") 

In [None]:
# Quaterly Abroad Online Spend Adjusted Quarterly Value ------------------------------ The Drop of Cardholders --- mcg = 'All'

# Fromula : Adjust_Quarterly_Intl_Spend = ( Intl_spend_Online_cardholders (quarter) / Intl_spend_Online_cardholders (2019Q1)) × online_Intl_All (quarter)

import pandas as pd

# Load the two datasets
df_cardholders = pd.read_csv("UK_spending_by_mcg_All.csv")
df_all = pd.read_csv("Q_spending_by_Online_Intl_All.csv")

# Get 2025Q1 values
cardholders_2019Q1 = df_cardholders.loc[df_cardholders["time_period_value"] == "2019Q1", "Spend_mcg_All_cardholders"].values[0]
# No NEED --- Intl_online_2025Q1 = df_all.loc[df_all["time_period_value"] == "2025Q1", "online_Intl_All"].values[0]

# Calculate Adjusted Quarterly Domestic Spend
df_cardholders["Adjust_Quarterly_Intl_Spend_Online"] = (
    cardholders_2019Q1 / df_cardholders["Spend_mcg_All_cardholders"]
) * df_all["online_Intl_All"]

# Save or view the result
df_cardholders.to_csv("Adjusted_Intl_Spend_Online_mcg.csv", index=False)
print(df_cardholders)




In [None]:
# To calculate using adjusted online spending abroad, averaging the MoM change from Q2->Q3 from 2022-2024

import pandas as pd

# Load the CSV file
file_path = "Adjusted_Intl_Spend_Online_mcg.csv"
df = pd.read_csv(file_path)

# Extract year and quarter from 'time_period_value'
df[['year', 'quarter']] = df['time_period_value'].str.extract(r'(\d{4})Q(\d)').astype(int)

# Filter for Q2 and Q3 from 2022 to 2024
filtered_df = df[df['year'].isin([2022, 2023, 2024]) & df['quarter'].isin([2, 3])]

# Group by year and calculate MoM change from Q2 to Q3
q2_q3_changes = []
for year in [2022, 2023, 2024]:
    q2 = filtered_df[(filtered_df['year'] == year) & (filtered_df['quarter'] == 2)]['Adjust_Quarterly_Intl_Spend_Online']
    q3 = filtered_df[(filtered_df['year'] == year) & (filtered_df['quarter'] == 3)]['Adjust_Quarterly_Intl_Spend_Online']
    if not q2.empty and not q3.empty:
        change = ((q3.values[0] - q2.values[0]) / q2.values[0]) * 100
        q2_q3_changes.append(change)

# Calculate average MoM change from Q2 to Q3 across 2022–2024
average_change = sum(q2_q3_changes) / 3

print(f"Average MoM change in adjusted international online spending from Q2 to Q3 (2022–2024): {average_change:.2f}%")




In [None]:
# To calculate using adjusted total spending abroad, averaging the MoM change from Q2->Q3 from 2022-2024

import pandas as pd

# Load the CSV file
file_path = "Adjusted_Intl_Spend.csv"
df = pd.read_csv(file_path)

# Extract year and quarter from 'time_period_value'
df[['year', 'quarter']] = df['time_period_value'].str.extract(r'(\d{4})Q(\d)').astype(int)

# Filter for Q2 and Q3 from 2022 to 2024
filtered_df = df[df['year'].isin([2022, 2023, 2024]) & df['quarter'].isin([2, 3])]

# Group by year and calculate MoM change from Q2 to Q3
q2_q3_changes = []
for year in [2022, 2023, 2024]:
    q2 = filtered_df[(filtered_df['year'] == year) & (filtered_df['quarter'] == 2)]['Adjust_Quarterly_Intl_Spend']
    q3 = filtered_df[(filtered_df['year'] == year) & (filtered_df['quarter'] == 3)]['Adjust_Quarterly_Intl_Spend']
    if not q2.empty and not q3.empty:
        change = ((q3.values[0] - q2.values[0]) / q2.values[0]) * 100
        q2_q3_changes.append(change)

# Calculate average MoM change from Q2 to Q3 across 2022–2024
average_change = sum(q2_q3_changes) / 3

print(f"Average MoM change in adjusted international total spending from Q2 to Q3 (2022–2024): {average_change:.2f}%")




In [None]:
# Quaterly Abroad Online Spend Adjusted Quarterly Value ------------------------------ The Drop of Cardholders --- NO NEED 

# Fromula : Adjust_Quarterly_Intl_Spend = ( Intl_spend_Online_cardholders (quarter) / Intl_spend_Online_cardholders (2019Q1)) × online_Intl_All (quarter)

import pandas as pd

# Load the two datasets
df_cardholders = pd.read_csv("UK_spending_by_Intl_Online1.csv")
df_all = pd.read_csv("Q_spending_by_Online_Intl_All.csv")

# Get 2025Q1 values
cardholders_2019Q1 = df_cardholders.loc[df_cardholders["time_period_value"] == "2019Q1", "Intl_spend_Online_cardholders"].values[0]
# No NEED --- Intl_online_2025Q1 = df_all.loc[df_all["time_period_value"] == "2025Q1", "online_Intl_All"].values[0]

# Calculate Adjusted Quarterly Domestic Spend
df_cardholders["Adjust_Quarterly_Intl_Spend_Online"] = (
    cardholders_2019Q1 / df_cardholders["Intl_spend_Online_cardholders"]
) * df_all["online_Intl_All"]

# Save or view the result
df_cardholders.to_csv("Adjusted_Intl_Spend_Online.csv", index=False)
print(df_cardholders)




In [None]:
# Quaterly Abroad Online Spend Ratio

import pandas as pd

# Load the two CSV files
online_spending = pd.read_csv("Q_spending_by_Online_Intl_All.csv")
total_spending = pd.read_csv("Q_spending_by_Intl_All.csv")

# Merge the two DataFrames on 'time_period_value'
merged_df = pd.merge(online_spending, total_spending, on="time_period_value", how="inner")

# Calculate the online spending ratio
merged_df["online_spending_ratio"] = (merged_df["online_Intl_All"] / merged_df["abroad_spend_all"]) * 100

# Save the result to a new CSV file
merged_df[["time_period_value", "online_spending_ratio"]].to_csv("Q_Online_Spending_Ratio_Intl.csv", index=False)

print("Online spending ratio by quarter has been saved to 'Q_Online_Spending_Ratio_Intl.csv'.")


In [None]:
# Quarterly Comparison for UK Cardholder Domestic vs Abroad Online Spending Ratios

import pandas as pd
import matplotlib.pyplot as plt

# Load the CSV files
intl_ratio_file = "Q_Online_Spending_Ratio_Intl.csv"
dom_ratio_file = "Q_Online_Spending_Ratio_Dom_Ad_mcg.csv"

# Read the data
intl_df = pd.read_csv(intl_ratio_file)
dom_df = pd.read_csv(dom_ratio_file)

# Ensure the time_period_value column is sorted and consistent
intl_df = intl_df.sort_values("time_period_value")
dom_df = dom_df.sort_values("time_period_value")

# Merge the two datasets on time_period_value
merged_df = pd.merge(intl_df, dom_df, on="time_period_value", how="inner")

# Plot the indexed trends
plt.figure(figsize=(12, 6))
plt.plot(merged_df["time_period_value"], merged_df.iloc[:, 1], label="International Online Spending Ratio", marker='o')
plt.plot(merged_df["time_period_value"], merged_df.iloc[:, 2], label="Domestic Online Spending Ratio", marker='o')
plt.xticks(rotation=45)
plt.xlabel("Quarter")
plt.ylabel("Online Spending Ratio (%)")
plt.title("Quarterly Online Spending Ratio: International vs Domestic")
plt.legend()
plt.tight_layout()
plt.grid(True)
plt.savefig("Online_Spending_Ratio_Comparison.png")
plt.show()



In [None]:
# Quaterly Adjusted Abroad Online Spend Ratio  --- mcg = 'All'

import pandas as pd

# Load the two CSV files
online_spending = pd.read_csv("Adjusted_Intl_Spend_Online_mcg.csv")
total_spending = pd.read_csv("Adjusted_Intl_Spend_mcg.csv")

# Merge the two DataFrames on 'time_period_value'
merged_df = pd.merge(online_spending, total_spending, on="time_period_value", how="inner")

# Calculate the online spending ratio
merged_df["Adjusted_online_Intl_ratio"] = (merged_df["Adjust_Quarterly_Intl_Spend_Online"] / merged_df["Adjust_Quarterly_Intl_Spend"]) * 100

# Save the result to a new CSV file
merged_df[["time_period_value", "Adjusted_online_Intl_ratio"]].to_csv("Adjusted_Online_Intl_Ratio_mcg.csv", index=False)

print("Online spending ratio by quarter has been saved to 'Adjusted_Online_Intl_Ratio_mcg.csv'.")


In [None]:
# Quaterly Adjusted Abroad Online Spend Ratio

import pandas as pd

# Load the two CSV files
online_spending = pd.read_csv("Adjusted_Intl_Spend_Online.csv")
total_spending = pd.read_csv("Adjusted_Intl_Spend.csv")

# Merge the two DataFrames on 'time_period_value'
merged_df = pd.merge(online_spending, total_spending, on="time_period_value", how="inner")

# Calculate the online spending ratio
merged_df["Adjusted_online_Intl_ratio"] = (merged_df["Adjust_Quarterly_Intl_Spend_Online"] / merged_df["Adjust_Quarterly_Intl_Spend"]) * 100

# Save the result to a new CSV file
merged_df[["time_period_value", "Adjusted_online_Intl_ratio"]].to_csv("Adjusted_Online_Intl_Ratio.csv", index=False)

print("Online spending ratio by quarter has been saved to 'Adjusted_Online_Intl_Ratio.csv'.")


In [None]:
# Quaterly Adjusted Domestic Online Spend Ratio --- mcg = 'All'

import pandas as pd

# Load the two CSV files
online_spending = pd.read_csv("Adjusted_Dom_Spend_Online_mcg.csv")
total_spending = pd.read_csv("Adjusted_Dom_Spend_mcg.csv")

# Merge the two DataFrames on 'time_period_value'
merged_df = pd.merge(online_spending, total_spending, on="time_period_value", how="inner")

# Calculate the online spending ratio
merged_df["Adjusted_online_Dom_ratio"] = (merged_df["Adjust_Quarterly_Dom_Spend_Online"] / merged_df["Adjust_Quarterly_Dom_Spend"]) * 100

# Save the result to a new CSV file
merged_df[["time_period_value", "Adjusted_online_Dom_ratio"]].to_csv("Adjusted_Online_Dom_Ratio_mcg.csv", index=False)

print("Online spending ratio by quarter has been saved to 'Adjusted_Online_Dom_Ratio_mcg.csv'.")

In [None]:
# Quarterly Comparison for UK Cardholder Domestic vs Abroad Online Spending Ratios - Adjusted Values --- mcg = 'All'

import pandas as pd
import matplotlib.pyplot as plt

# Load the CSV files
intl_ratio_file = "Adjusted_Online_Intl_Ratio_mcg.csv"
dom_ratio_file = "Adjusted_Online_Dom_Ratio_mcg.csv"

# Read the data
intl_df = pd.read_csv(intl_ratio_file)
dom_df = pd.read_csv(dom_ratio_file)

# Ensure the time_period_value column is sorted and consistent
intl_df = intl_df.sort_values("time_period_value")
dom_df = dom_df.sort_values("time_period_value")

# Merge the two datasets on time_period_value
merged_df = pd.merge(intl_df, dom_df, on="time_period_value", how="inner")

# Plot the indexed trends
plt.figure(figsize=(12, 6))
plt.plot(merged_df["time_period_value"], merged_df.iloc[:, 1], label="Adjusted International Online Spending Ratio", marker='o')
plt.plot(merged_df["time_period_value"], merged_df.iloc[:, 2], label="Adjusted Domestic Online Spending Ratio", marker='o')
plt.xticks(rotation=45)
plt.xlabel("Quarter")
plt.ylabel("Online Spending Ratio (%)")
plt.title("Quarterly Online Spending Ratio: International vs Domestic (Adjusted Value)")
plt.legend()
plt.tight_layout()
plt.grid(True)
plt.savefig("Online_Spending_Ratio_Comparison.png")
plt.show()




In [None]:
# Calculate the averages for the files Adjusted_Online_Intl_Ratio_mcg.csv and Adjusted_Online_Dom_Ratio_mcg.csv

import pandas as pd

# Load the two CSV files
intl_df = pd.read_csv("Adjusted_Online_Intl_Ratio_mcg.csv")
dom_df = pd.read_csv("Adjusted_Online_Dom_Ratio_mcg.csv")

# Filter the data from 2019Q1 to 2025Q2
quarters = [
    f"{year}Q{q}" for year in range(2019, 2026) for q in range(1, 5)
]
quarters = quarters[:(4 * (2025 - 2019) + 2)]  # Up to 2025Q2

intl_filtered = intl_df[intl_df['time_period_value'].isin(quarters)]
dom_filtered = dom_df[dom_df['time_period_value'].isin(quarters)]

# Calculate the average adjusted online ratio for each file
intl_avg = intl_filtered['Adjusted_online_Intl_ratio'].mean()
dom_avg = dom_filtered['Adjusted_online_Dom_ratio'].mean()

print(f"Average Adjusted Online International Ratio (2019Q1 to 2025Q2): {intl_avg:.2f}")
print(f"Average Adjusted Online Domestic Ratio (2019Q1 to 2025Q2): {dom_avg:.2f}")



In [None]:
# To calculate the average difference in online spending ratios between international and domestic markets for:

# 2019Q1 to 2021Q4 (average gap: )
# 2022Q1 to 2025Q2 (average gap: )

import pandas as pd

# Load the CSV files
intl_df = pd.read_csv("Adjusted_Online_Intl_Ratio_mcg.csv")
dom_df = pd.read_csv("Adjusted_Online_Dom_Ratio_mcg.csv")

# Ensure time_period_value is treated as string
intl_df['time_period_value'] = intl_df['time_period_value'].astype(str)
dom_df['time_period_value'] = dom_df['time_period_value'].astype(str)

# Merge the two datasets on time_period_value
merged_df = pd.merge(intl_df, dom_df, on='time_period_value', suffixes=('_intl', '_dom'))

# Filter for the two periods
period1 = merged_df[merged_df['time_period_value'].isin([
    '2019Q1', '2019Q2', '2019Q3', '2019Q4',
    '2020Q1', '2020Q2', '2020Q3', '2020Q4',
    '2021Q1', '2021Q2', '2021Q3', '2021Q4'
])]

period2 = merged_df[merged_df['time_period_value'].isin([
    '2022Q1', '2022Q2', '2022Q3', '2022Q4',
    '2023Q1', '2023Q2', '2023Q3', '2023Q4',
    '2024Q1', '2024Q2', '2024Q3', '2024Q4',
    '2025Q1', '2025Q2'
])]

# Calculate average difference in ratios for each period
avg_diff_period1 = (period1['Adjusted_online_Intl_ratio'] - period1['Adjusted_online_Dom_ratio']).mean()
avg_diff_period2 = (period2['Adjusted_online_Intl_ratio'] - period2['Adjusted_online_Dom_ratio']).mean()

print(f"Average difference in ratios from 2019Q1 to 2021Q4: {avg_diff_period1:.2f}%")
print(f"Average difference in ratios from 2022Q1 to 2025Q2: {avg_diff_period2:.2f}%")



In [None]:
# To calculate the seasonal trend in international online spending since 2022—specifically:

# Overall average ratio from 2022Q1 to 2025Q2
# Q3 average ratio (July to September: 2022Q3, 2023Q3, 2024Q3)

import pandas as pd

# Load the CSV file
df = pd.read_csv("Adjusted_Online_Intl_Ratio_mcg.csv")

# Ensure time_period_value is treated as string
df['time_period_value'] = df['time_period_value'].astype(str)

# Filter data from 2022Q1 to 2025Q2
df_recent = df[df['time_period_value'].between('2022Q1', '2025Q2')]

# Calculate the average online ratio for the entire period
overall_avg = df_recent['Adjusted_online_Intl_ratio'].mean()

# Define Q3 quarters
q3_quarters = ['2022Q3', '2023Q3', '2024Q3']

# Filter Q3 data
df_q3 = df_recent[df_recent['time_period_value'].isin(q3_quarters)]

# Calculate the average online ratio for Q3
q3_avg = df_q3['Adjusted_online_Intl_ratio'].mean()

print(f"Average online ratio from 2022Q1 to 2025Q2: {overall_avg:.1f}%")
print(f"Average online ratio in Q3 quarters (2022Q3, 2023Q3, 2024Q3): {q3_avg:.1f}%")



In [None]:
# To calculate the average change in international online spending from Q3 to Q4 for the years 2022, 2023, and 2024

import pandas as pd

# Load the CSV file
df = pd.read_csv("Adjusted_Online_Intl_Ratio_mcg.csv")

# Ensure time_period_value is treated as string
df['time_period_value'] = df['time_period_value'].astype(str)

# Define Q3 and Q4 periods for 2022, 2023, and 2024
q3_periods = ['2022Q3', '2023Q3', '2024Q3']
q4_periods = ['2022Q4', '2023Q4', '2024Q4']

# Filter the data for Q3 and Q4 periods
df_q3 = df[df['time_period_value'].isin(q3_periods)].copy()
df_q4 = df[df['time_period_value'].isin(q4_periods)].copy()

# Merge Q3 and Q4 data on year extracted from time_period_value
df_q3['year'] = df_q3['time_period_value'].str[:4]
df_q4['year'] = df_q4['time_period_value'].str[:4]
merged = pd.merge(df_q3, df_q4, on='year', suffixes=('_q3', '_q4'))

# Calculate the percentage change from Q3 to Q4
merged['qoq_change'] = ((merged['Adjusted_online_Intl_ratio_q4'] - merged['Adjusted_online_Intl_ratio_q3']) /
                        merged['Adjusted_online_Intl_ratio_q3']) * 100

# Calculate the average change
average_change = merged['qoq_change'].mean()

# Display the result
print(f"Average quarter-on-quarter percentage change in international online spending ratio from Q3 to Q4 (2022–2024): {average_change:.2f}%")



In [None]:
# To calculate the percentage increase from 2019Q1 to 2025Q1

import pandas as pd

# Load the CSV file
df = pd.read_csv("Adjusted_Online_Dom_Ratio_mcg.csv")

# Extract the values for 2019Q1 and 2025Q1
value_2019Q1 = df.loc[df['time_period_value'] == '2019Q1', 'Adjusted_online_Dom_ratio'].values[0]
value_2025Q1 = df.loc[df['time_period_value'] == '2025Q1', 'Adjusted_online_Dom_ratio'].values[0]

# Calculate the percentage increase
percentage_increase = ((value_2025Q1 - value_2019Q1) / value_2019Q1) * 100

# Print the result
print(f"The percentage increase in Adjusted_online_Dom_ratio from 2019Q1 to 2025Q1 is {percentage_increase:.2f}%.")



In [None]:
# To calculate the percentage change in yearly average ratios from 2019 to 2024 for both domestic and international online spending

import pandas as pd

# Load the CSV files
dom_df = pd.read_csv("Adjusted_Online_Dom_Ratio_mcg.csv")
intl_df = pd.read_csv("Adjusted_Online_Intl_Ratio_mcg.csv")

# Convert time_period_value to datetime to extract year
dom_df['year'] = pd.to_datetime(dom_df['time_period_value'].str.replace('Q\d', '', regex=True) + '-01')
intl_df['year'] = pd.to_datetime(intl_df['time_period_value'].str.replace('Q\d', '', regex=True) + '-01')

# Extract year as integer
dom_df['year'] = dom_df['year'].dt.year
intl_df['year'] = intl_df['year'].dt.year

# Group by year and calculate average ratio
dom_avg = dom_df.groupby('year')['Adjusted_online_Dom_ratio'].mean()
intl_avg = intl_df.groupby('year')['Adjusted_online_Intl_ratio'].mean()

# Calculate percentage change from 2019 to 2024
dom_change = ((dom_avg[2024] - dom_avg[2019]) / dom_avg[2019]) * 100
intl_change = ((intl_avg[2024] - intl_avg[2019]) / intl_avg[2019]) * 100

(dom_change, intl_change)

