In [None]:
project_path = "/home/jupyter"
import os
import sys
sys.path.append(project_path)
from google.cloud import bigquery, storage

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px

from fintrans_toolbox.src import bq_utils as bq

client = bigquery.Client()

In [None]:
# Summarise the data by UK Cardholder Spending All Monthly --------------- Cardholders' Number Total Monthly ----  mcg = 'All'

UK_spending_by_mcg_All = '''SELECT time_period_value, cardholders, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Month' 
and mcg = 'All' 
and merchant_channel = 'All' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
 
GROUP BY cardholders, destination_country, 
time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''
df_by_mcg_All = bq.read_bq_table_sql(client, UK_spending_by_mcg_All)
df_by_mcg_All.head()

# Caculate UK Domestic Total Spending Quarterly

# Assuming df_by_mcg_All is the DataFrame returned from the BigQuery query
# Then group by 'time_period_value' and sum the 'spend' for each quarter

# Check if df_by_mcg_All is not None and has the expected columns
if df_by_mcg_All is not None and 'time_period_value' in df_by_mcg_All.columns and 'spend' in df_by_mcg_All.columns:
    # Group by quarter and sum the spend
    UK_spending_by_mcg_All = df_by_mcg_All.groupby('time_period_value')['cardholders'].sum().reset_index()
   
 # Rename the column
    UK_spending_by_mcg_All = UK_spending_by_mcg_All.rename(columns={'cardholders': 'Spend_mcg_All_cardholders'})
    print(UK_spending_by_mcg_All)
else:
    print("DataFrame is empty or missing required columns.")
    
    
# Save to CSV
UK_spending_by_mcg_All.to_csv("UK_Spend_Mth_All_cardholders.csv", index=False)


In [None]:
# Summarise the data by UK Cardholder Spending Online Monthly --------------- Cardholders' Number Total Monthly ----  mcg = 'All'

UK_spending_by_mcg_Online = '''SELECT time_period_value, cardholders, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Month' 
and mcg = 'All' 
and merchant_channel = 'Online' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
 
GROUP BY cardholders, destination_country, 
time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''
df_by_mcg_All = bq.read_bq_table_sql(client, UK_spending_by_mcg_Online)
df_by_mcg_All.head()

# Caculate UK Domestic Total Spending Quarterly

# Assuming df_by_mcg_All is the DataFrame returned from the BigQuery query
# Then group by 'time_period_value' and sum the 'spend' for each quarter

# Check if df_by_mcg_All is not None and has the expected columns
if df_by_mcg_All is not None and 'time_period_value' in df_by_mcg_All.columns and 'spend' in df_by_mcg_All.columns:
    # Group by quarter and sum the spend
    UK_spending_by_mcg_Online = df_by_mcg_All.groupby('time_period_value')['cardholders'].sum().reset_index()
   
 # Rename the column
    UK_spending_by_mcg_Online = UK_spending_by_mcg_Online.rename(columns={'cardholders': 'Spend_mcg_Online_cardholders'})
    print(UK_spending_by_mcg_Online)
else:
    print("DataFrame is empty or missing required columns.")
    
    
# Save to CSV
UK_spending_by_mcg_Online.to_csv("UK_Spend_Mth_Online_cardholders.csv", index=False)


In [None]:
# Summarise the data by UK Cardholder Spending Face-to-Face Monthly --------------- Cardholders' Number Total Monthly ----  mcg = 'All'

UK_spending_by_mcg_F2F = '''SELECT time_period_value, cardholders, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Month' 
and mcg = 'All' 
and merchant_channel = 'Face to Face' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
 
GROUP BY cardholders, destination_country, 
time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''
df_by_mcg_All = bq.read_bq_table_sql(client, UK_spending_by_mcg_F2F)
df_by_mcg_All.head()

# Caculate UK Domestic Total Spending Quarterly

# Assuming df_by_mcg_All is the DataFrame returned from the BigQuery query
# Then group by 'time_period_value' and sum the 'spend' for each quarter

# Check if df_by_mcg_All is not None and has the expected columns
if df_by_mcg_All is not None and 'time_period_value' in df_by_mcg_All.columns and 'spend' in df_by_mcg_All.columns:
    # Group by quarter and sum the spend
    UK_spending_by_mcg_F2F = df_by_mcg_All.groupby('time_period_value')['cardholders'].sum().reset_index()
   
 # Rename the column
    UK_spending_by_mcg_F2F = UK_spending_by_mcg_F2F.rename(columns={'cardholders': 'Spend_mcg_F2F_cardholders'})
    print(UK_spending_by_mcg_F2F)
else:
    print("DataFrame is empty or missing required columns.")
    
    
# Save to CSV
UK_spending_by_mcg_F2F.to_csv("UK_Spend_Mth_F2F_cardholders.csv", index=False)


In [None]:
# Total UK Cardholder Spending Monthly

# Summarise the data by UK Cardholder Online Spending Total Monthly

UK_spending_UK_All = '''SELECT time_period_value, SUM(spend) AS total_spend
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Month' 
and mcg = 'All'
and merchant_channel = 'All' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
GROUP BY 
time_period_value 
ORDER BY time_period_value'''
df_by_UK_All = bq.read_bq_table_sql(client, UK_spending_UK_All)
df_by_UK_All = df_by_UK_All.rename(columns={'total_spend': 'UK_spend_All'})
df_by_UK_All.head()

print(df_by_UK_All)

# Save the DataFrame to a CSV file
csv_filename = "UK_All_Spending_Mth.csv"
df_by_UK_All.to_csv(csv_filename, index=False)

print(f"CSV file '{csv_filename}' has been created successfully.")

In [None]:
# Total UK Cardholder Online Monthly

# Summarise the data by UK Cardholder Online Spending Total Monthly

UK_spending_Online_All = '''SELECT time_period_value, SUM(spend) AS total_spend
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Month' 
and mcg = 'All'
and merchant_channel = 'Online' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
GROUP BY 
time_period_value 
ORDER BY time_period_value'''
df_by_Online_All = bq.read_bq_table_sql(client, UK_spending_Online_All)
df_by_Online_All = df_by_Online_All.rename(columns={'total_spend': 'Online_spend_All'})
df_by_Online_All.head()

print(df_by_Online_All)

# Save the DataFrame to a CSV file
csv_filename = "UK_Online_Spending_Mth.csv"
df_by_Online_All.to_csv(csv_filename, index=False)

print(f"CSV file '{csv_filename}' has been created successfully.")

In [None]:
# Online Monthly Ratio

import pandas as pd

# Load the UK online and all spending data
online_spending = pd.read_csv("UK_Online_Spending_Mth.csv")
all_spending = pd.read_csv("UK_All_Spending_Mth.csv")

# Merge the two datasets on the common time column
merged_df = pd.merge(online_spending, all_spending, on='time_period_value')

# Calculate the online spending ratio
merged_df['online_spending_ratio'] = (merged_df['Online_spend_All'] / merged_df['UK_spend_All']) * 100

# Save the result to a new CSV file
merged_df.to_csv("UK_Online_Spending_Ratio_Mth.csv", index=False)

# Display the first few rows of the result
merged_df.head(25)


In [None]:
# Monthly Online Spending Ratio

import pandas as pd
import matplotlib.pyplot as plt

# Load the UK online spending ratio data
file_path = "UK_Online_Spending_Ratio_Mth.csv"
df = pd.read_csv(file_path)

# Ensure the time column is treated as categorical or datetime for proper plotting
if 'time_period_value' in df.columns:
    time_col = 'time_period_value'
elif 'Month' in df.columns:
    time_col = 'Month'
else:
    time_col = df.columns[0]  # fallback to first column

# Plot the line chart
plt.figure(figsize=(10, 6))
plt.plot(df[time_col], df['online_spending_ratio'], marker='o', linestyle='-')
plt.title('UK Monthly Online Spending Ratio')
plt.xlabel('Month')
plt.ylabel('Online Spending Ratio (%)')
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()

# Save the chart
plt.savefig("UK_Online_Spending_Ratio_Chart_Mth.png")
print("Line chart saved as UK_Online_Spending_Ratio_Chart_Mth.png")


In [None]:
# Monthly Online Spending Ratio

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

# Load the data from the CSV file
df = pd.read_csv("UK_Online_Spending_Ratio_Mth.csv")

# Convert time_period_value to datetime format for better plotting
df['time_period_value'] = pd.to_datetime(df['time_period_value'], format='%Y%m')

# Plotting
plt.figure(figsize=(12, 6))
plt.plot(df['time_period_value'], df['online_spending_ratio'], marker='o', linestyle='-', color='skyblue')

# Formatting the x-axis to show all months
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1))
plt.xticks(rotation=45)

# Adding labels and title
plt.title("UK Online Spending Ratio Over Time")
plt.xlabel("Time Period")
plt.ylabel("Online Spending Ratio (%)")
plt.grid(True)
plt.tight_layout()

# Save the plot
plt.savefig("UK_Online_Spending_Ratio_LineChart.png")
plt.show()



In [None]:
# Monthly Online Spending Ratio ---------- Final

import pandas as pd
import matplotlib.pyplot as plt

# Read the CSV file
df = pd.read_csv("UK_Online_Spending_Ratio_Mth.csv")

# Ensure 'time_period_value' is treated as string
df['time_period_value'] = df['time_period_value'].astype(str)

# Filter data starting from 201901
df = df[df['time_period_value'] >= '201901']

# Create shorter labels for the X-axis (e.g., '201901' -> 'Jan 19')
month_map = {
    '01': 'Jan', '02': 'Feb', '03': 'Mar', '04': 'Apr', '05': 'May', '06': 'Jun',
    '07': 'Jul', '08': 'Aug', '09': 'Sep', '10': 'Oct', '11': 'Nov', '12': 'Dec'
}
df['short_label'] = df['time_period_value'].apply(lambda x: f"{month_map.get(x[4:], '')} {x[:4][-2:]}")

# Plot the line chart
plt.figure(figsize=(12, 6))
plt.plot(df['short_label'], df['online_spending_ratio'], marker='o', linestyle='-', color='green')
plt.title("UK Online Spending Ratio Over Time (Monthly 2019 Jan - 2025 June)")
plt.xlabel("Time Period")
plt.ylabel("Online Spending Ratio (%)")
plt.xticks(rotation=90)
plt.grid(True)
plt.tight_layout()

# Save the chart
plt.savefig("UK_Online_Spending_Ratio_LineChart_Mth.png")
plt.show()



In [None]:
# Stacked Chart Monthly Online Spending Ratio vs F2F Ratio ---------- New

import pandas as pd
import matplotlib.pyplot as plt

# Load the CSV file
df = pd.read_csv("UK_Online_Spending_Ratio_Mth.csv")

# Ensure 'time_period_value' is treated as string
df['time_period_value'] = df['time_period_value'].astype(str)

# Filter data starting from 201901
df = df[df['time_period_value'] >= '201901']

# Create shorter labels for the X-axis (e.g., '201901' -> 'Jan 19')
month_map = {
    '01': 'Jan', '02': 'Feb', '03': 'Mar', '04': 'Apr', '05': 'May', '06': 'Jun',
    '07': 'Jul', '08': 'Aug', '09': 'Sep', '10': 'Oct', '11': 'Nov', '12': 'Dec'
}
df['short_label'] = df['time_period_value'].apply(lambda x: f"{month_map.get(x[4:], '')} {x[:4][-2:]}")

# Calculate Face-to-Face spending ratio
df['f2f_spending_ratio'] = 100 - df['online_spending_ratio']

# Prepare data for stacked area chart
stacked_data = df[['online_spending_ratio', 'f2f_spending_ratio']].T

# Plot the stacked area chart
plt.figure(figsize=(14, 7))
plt.stackplot(df['short_label'], stacked_data, labels=['Online Spending', 'Face-to-Face Spending'], colors=['green', 'lightgray'])

# Add vertical lines at key online spending ratio thresholds
for threshold in [40, 50, 60]:
    plt.axhline(y=threshold, color='red', linestyle='--', linewidth=1)
    plt.text(len(df['short_label']) - 1, threshold + 1, f'{threshold}%', color='red', ha='right')

# Add horizontal lines to indicate yearly boundaries
years = sorted(set(x[:4] for x in df['time_period_value']))
for year in years:
    first_month_index = df[df['time_period_value'].str.startswith(year)].index[0]
    plt.axvline(x=first_month_index, color='blue', linestyle=':', linewidth=1)
    plt.text(first_month_index, 102, year, color='blue', rotation=90, va='top')

# Final chart formatting
plt.title("100% Stacked Area Chart: Online vs Face-to-Face Spending in UK (2019 Jan - 2025 Jun)")
plt.xlabel("Time Period")
plt.ylabel("Spending Ratio (%)")
plt.xticks(rotation=90)
plt.legend(title="Channels", bbox_to_anchor=(1.02, 1), loc='upper left')
plt.tight_layout()

# Save the chart
plt.savefig("UK_Online_vs_F2F_Spending_StackedAreaChart.png")
plt.show()



In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the CSV file
df = pd.read_csv("UK_Online_Spending_Ratio_Mth.csv")

# Ensure 'time_period_value' is treated as string
df['time_period_value'] = df['time_period_value'].astype(str)

# Filter data starting from 201901
df = df[df['time_period_value'] >= '201901']

# Create shorter labels for the X-axis (e.g., '201901' -> 'Jan 19')
month_map = {
    '01': 'Jan', '02': 'Feb', '03': 'Mar', '04': 'Apr', '05': 'May', '06': 'Jun',
    '07': 'Jul', '08': 'Aug', '09': 'Sep', '10': 'Oct', '11': 'Nov', '12': 'Dec'
}
df['short_label'] = df['time_period_value'].apply(lambda x: f"{month_map.get(x[4:], '')} {x[:4][-2:]}")

# Calculate Face-to-Face spending ratio
df['f2f_spending_ratio'] = 100 - df['online_spending_ratio']

# Prepare data for stacked area chart
stacked_data = df[['online_spending_ratio', 'f2f_spending_ratio']].T

# Plot the stacked area chart
plt.figure(figsize=(14, 7))
plt.stackplot(df['short_label'], stacked_data, labels=['Online', 'Face-to-Face'], colors=['green', 'lightgray'])

# Add vertical grey dotted lines for each month
for label in df['short_label']:
    plt.axvline(x=label, color='gray', linestyle=':', linewidth=0.5)

# Add horizontal lines for yearly indications
years = sorted(set([label[-2:] for label in df['short_label']]))
for year in years:
    year_labels = [label for label in df['short_label'] if label.endswith(year)]
    if year_labels:
        plt.axvline(x=year_labels[0], color='black', linestyle='--', linewidth=1)
        
# Add horizontal lines to indicate yearly boundaries
years = sorted(set(x[:4] for x in df['time_period_value']))
for year in years:
    first_month_index = df[df['time_period_value'].str.startswith(year)].index[0]
    plt.axvline(x=first_month_index, color='blue', linestyle=':', linewidth=1)
    plt.text(first_month_index, 102, year, color='blue', rotation=90, va='top')
    
# Add horizontal lines for ratio thresholds
for ratio in [40, 50, 60]:
    plt.axhline(y=ratio, color='red', linestyle='--', linewidth=1)
    plt.text(df['short_label'].iloc[-1], ratio + 1, f'{ratio}%', color='red', va='bottom')

# Final chart formatting
plt.title("100% Stacked Area Chart: Online vs Face-to-Face Spending in UK (Monthly)")
plt.xlabel("Time Period")
plt.ylabel("Spending Ratio (%)")
plt.xticks(rotation=90)
plt.legend(title="Channels", bbox_to_anchor=(1, 1), loc='upper left')
plt.grid(False)
plt.tight_layout()

# Save the chart
plt.savefig("UK_Online_vs_F2F_Spending_StackedAreaChart.png")
plt.show()



In [None]:
# The average percentage drop in online spending ratio from November to December for the years 2021 to 2024.
# The average percentage increase in online spending ratio from December to January for the years 2022 to 2025.

import pandas as pd

# Load the dataset
df = pd.read_csv('UK_Online_Spending_Ratio_Mth.csv')

# Ensure time_period_value is treated as string for slicing
df['time_period_value'] = df['time_period_value'].astype(str)

# Function to calculate percentage change
def percent_change(old, new):
    return ((new - old) / old) * 100

# Calculate average drop from November to December for 2021 to 2024
nov_dec_drops = []
for year in ['2021', '2022', '2023', '2024']:
    nov = df[df['time_period_value'] == f'{year}11']['online_spending_ratio']
    dec = df[df['time_period_value'] == f'{year}12']['online_spending_ratio']
    if not nov.empty and not dec.empty:
        drop = percent_change(nov.values[0], dec.values[0])
        nov_dec_drops.append(drop)

avg_nov_dec_drop = sum(nov_dec_drops) / len(nov_dec_drops) if nov_dec_drops else None

# Calculate average increase from December to January for 2022 to 2025
dec_jan_increases = []
for year in ['2022', '2023', '2024', '2025']:
    dec = df[df['time_period_value'] == f'{year}12']['online_spending_ratio']
    jan = df[df['time_period_value'] == f'{str(int(year)+1)}01']['online_spending_ratio']
    if not dec.empty and not jan.empty:
        increase = percent_change(dec.values[0], jan.values[0])
        dec_jan_increases.append(increase)

avg_dec_jan_increase = sum(dec_jan_increases) / len(dec_jan_increases) if dec_jan_increases else None

# Print the results
print(f"Average percentage drop in online spending ratio from Nov to Dec (2021-2024): {avg_nov_dec_drop:.2f}%")
print(f"Average percentage increase in online spending ratio from Dec to Jan (2022-2025): {avg_dec_jan_increase:.2f}%")



In [None]:
# The average increase in Face-to-Face (F2F) ratio from November to December for 2021 to 2024.
# The average decrease in F2F ratio from December to January for 2022 to 2025.

import pandas as pd

# Load the dataset
df = pd.read_csv('UK_Online_Spending_Ratio_Mth.csv')

# Calculate F2F ratio
df['F2F_ratio'] = 100 - df['online_spending_ratio']

# Convert time_period_value to string for easier slicing
df['time_period_value'] = df['time_period_value'].astype(str)

# Function to calculate percentage change from one month to the next
def calculate_percentage_change(df, year, from_month, to_month):
    from_period = f"{year}{from_month:02d}"
    to_period = f"{year}{to_month:02d}"
    from_value = df.loc[df['time_period_value'] == from_period, 'F2F_ratio']
    to_value = df.loc[df['time_period_value'] == to_period, 'F2F_ratio']
    if not from_value.empty and not to_value.empty:
        return ((to_value.values[0] - from_value.values[0]) / from_value.values[0]) * 100
    return None

# Calculate F2F ratio increase from Nov to Dec for 2021 to 2024
nov_to_dec_changes = []
for year in range(2021, 2025):
    change = calculate_percentage_change(df, year, 11, 12)
    if change is not None:
        nov_to_dec_changes.append(change)

# Calculate F2F ratio decrease from Dec to Jan for 2022 to 2025
dec_to_jan_changes = []
for year in range(2022, 2026):
    change = calculate_percentage_change(df, year - 1, 12, 1)
    if change is not None:
        dec_to_jan_changes.append(change)

# Compute averages
avg_nov_to_dec_increase = sum(nov_to_dec_changes) / len(nov_to_dec_changes) if nov_to_dec_changes else None
avg_dec_to_jan_decrease = sum(dec_to_jan_changes) / len(dec_to_jan_changes) if dec_to_jan_changes else None

# Display results
print(f"Average F2F ratio increase from Nov to Dec (2021-2024): {avg_nov_to_dec_increase:.2f}%")
print(f"Average F2F ratio decrease from Dec to Jan (2022-2025): {avg_dec_to_jan_decrease:.2f}%")



In [None]:
# Monthy All Spend Adjusted Monthy Value --------------------------- --- mcg = 'All'

# Fromula : Adjust_Monthly_All_Spend = ( UK_spend_All_cardholders (201901) / Spend_mcg_All_cardholders (month)) × UK_spend_All (month)

import pandas as pd

# Load the two datasets
df_cardholders = pd.read_csv("UK_Spend_Mth_All_cardholders.csv")
df_all = pd.read_csv("UK_All_Spending_Mth.csv")

# Ensure time_period_value is treated as string for consistency
df_cardholders['time_period_value'] = df_cardholders['time_period_value'].astype(str)
df_all['time_period_value'] = df_all['time_period_value'].astype(str)

# Check if '201901' exists in cardholders_df
if '201901' not in df_cardholders['time_period_value'].values:
    raise ValueError("The time_period_value '201901' is not found in UK_Spend_Mth_All_cardholders.csv")
    
# Get 2019Q1 values
cardholders_201901 = df_cardholders.loc[df_cardholders["time_period_value"] == "201901", "Spend_mcg_All_cardholders"].values[0]

# Calculate Adjusted Quarterly Online Spend
df_cardholders["Adjust_Monthly_All_Spend"] = (
    cardholders_201901 / df_cardholders["Spend_mcg_All_cardholders"] 
) * df_all["UK_spend_All"]

# Save or view the result
df_cardholders.to_csv("Adjusted_All_Spend_Mth.csv", index=False)
print(df_cardholders)


In [None]:
# Monthy Online Spend Adjusted Monthy Value --------------------------- --- mcg = 'All'

# Fromula : Adjust_Monthly_Online_Spend = ( Online_spend_All_cardholders (201901) / Spend_mcg_All_cardholders (month)) × Online_spend_All (month)

import pandas as pd

# Load the two datasets
df_cardholders = pd.read_csv("UK_Spend_Mth_All_cardholders.csv")
df_all = pd.read_csv("UK_Online_Spending_Mth.csv")

# Ensure time_period_value is treated as string for consistency
df_cardholders['time_period_value'] = df_cardholders['time_period_value'].astype(str)
df_all['time_period_value'] = df_all['time_period_value'].astype(str)

# Check if '201901' exists in cardholders_df
if '201901' not in df_cardholders['time_period_value'].values:
    raise ValueError("The time_period_value '201901' is not found in UK_Spend_Mth_All_cardholders.csv")
    
# Get 2019Q1 values
cardholders_201901 = df_cardholders.loc[df_cardholders["time_period_value"] == "201901", "Spend_mcg_All_cardholders"].values[0]

# Calculate Adjusted Quarterly Online Spend
df_cardholders["Adjust_Monthly_Online_Spend"] = (
    cardholders_201901 / df_cardholders["Spend_mcg_All_cardholders"] 
) * df_all["Online_spend_All"]

# Save or view the result
df_cardholders.to_csv("Adjusted_Online_Spend_Mth.csv", index=False)
print(df_cardholders)


In [None]:
# Calculate the Month-on-Month % Change using the adjusted online spend values. --------------- NOT IN USE
# Identify the average and range of increases in Jan and dips in Dec from 2021 to 2024.

import pandas as pd

# Load the CSV file
df = pd.read_csv("Adjusted_Online_Spend_Mth.csv")

# Ensure the time_period_value column is treated as string for date parsing
df['time_period_value'] = df['time_period_value'].astype(str)

# Sort the data by time_period_value to ensure chronological order
df = df.sort_values(by='time_period_value')

# Calculate Month-on-Month % Change using Adjust_Monthly_Online_Spend
df['MoM_%_Change'] = df['Adjust_Monthly_Online_Spend'].pct_change() * 100

# Extract December and January data from 2021 to 2024
december_months = [f"{year}12" for year in range(2021, 2024 + 1)]
january_months = [f"{year}01" for year in range(2021, 2025 + 1)]

december_changes = df[df['time_period_value'].isin(december_months)]['MoM_%_Change']
january_changes = df[df['time_period_value'].isin(january_months)]['MoM_%_Change']

# Calculate the average and range for December dips and January increases
december_avg = december_changes.mean()
december_min = december_changes.min()
december_max = december_changes.max()

january_avg = january_changes.mean()
january_min = january_changes.min()
january_max = january_changes.max()

print(f"Average January MoM % Increase: {january_avg:.2f}% (Range: {january_min:.2f}% to {january_max:.2f}%)")
print(f"Average December MoM % Decrease: {december_avg:.2f}% (Range: {december_min:.2f}% to {december_max:.2f}%)")



In [None]:
# --------------- NOT IN USE

import pandas as pd

# Load the CSV file
df = pd.read_csv("Adjusted_Online_Spend_Mth.csv")

# Ensure the time_period_value column is treated as string for date parsing
df['time_period_value'] = df['time_period_value'].astype(str)

# Sort the data by time_period_value to ensure chronological order
df = df.sort_values(by='time_period_value')

# Calculate Month-on-Month % Change using Adjust_Monthly_Online_Spend
df['MoM_%_Change'] = df['Adjust_Monthly_Online_Spend'].pct_change() * 100

# Extract December and January data from 2021 to 2024 (Dec) and 2021 to 2025 (Jan)
december_months = [f"{year}12" for year in range(2021, 2024)]
january_months = [f"{year}01" for year in range(2021, 2025)]

december_changes = df[df['time_period_value'].isin(december_months)]['MoM_%_Change']
january_changes = df[df['time_period_value'].isin(january_months)]['MoM_%_Change']

# Calculate the average and range for December dips and January increases
december_avg = december_changes.mean()
december_min = december_changes.min()
december_max = december_changes.max()

january_avg = january_changes.mean()
january_min = january_changes.min()
january_max = january_changes.max()

print(f"Average January MoM % Increase: {january_avg:.2f}% (Range: {january_min:.2f}% to {january_max:.2f}%)")
print(f"Average December MoM % Decrease: {december_avg:.2f}% (Range: {december_min:.2f}% to {december_max:.2f}%)")



In [None]:
# Calculate the Month-on-Month % Change using the adjusted UK All spend values.
# Identify the average and range of increases in Jan and dips in Dec from 2021 to 2024.

import pandas as pd

# Load the CSV file
df = pd.read_csv("Adjusted_All_Spend_Mth.csv")

# Ensure the time_period_value column is treated as string for date parsing
df['time_period_value'] = df['time_period_value'].astype(str)

# Sort the data by time_period_value to ensure chronological order
df = df.sort_values(by='time_period_value')

# Calculate Month-on-Month % Change using Adjust_Monthly_Online_Spend
df['MoM_%_Change'] = df['Adjust_Monthly_All_Spend'].pct_change() * 100

# Extract December and January data from 2021 to 2024
december_months = [f"{year}12" for year in range(2021, 2024)]
january_months = [f"{year}01" for year in range(2022, 2025)]

december_changes = df[df['time_period_value'].isin(december_months)]['MoM_%_Change']
january_changes = df[df['time_period_value'].isin(january_months)]['MoM_%_Change']

# Calculate the average and range for December dips and January increases
december_avg = december_changes.mean()
december_min = december_changes.min()
december_max = december_changes.max()

january_avg = january_changes.mean()
january_min = january_changes.min()
january_max = january_changes.max()

print(f"Average January MoM % Decrease: {january_avg:.2f}% (Range: {january_min:.2f}% to {january_max:.2f}%)")
print(f"Average December MoM % Increase: {december_avg:.2f}% (Range: {december_min:.2f}% to {december_max:.2f}%)")



In [None]:
# Total UK Cardholder Face to Face Monthly

# Summarise the data by UK Cardholder F2F Spending Total Monthly

UK_spending_F2F_All = '''SELECT time_period_value, SUM(spend) AS total_spend
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Month' 
and mcg = 'All'
and merchant_channel = 'Face to Face' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
GROUP BY 
time_period_value 
ORDER BY time_period_value'''
df_by_F2F_All = bq.read_bq_table_sql(client, UK_spending_F2F_All)
df_by_F2F_All = df_by_F2F_All.rename(columns={'total_spend': 'F2F_spend_All'})
df_by_F2F_All.head()

print(df_by_F2F_All)

# Save the DataFrame to a CSV file
csv_filename = "UK_F2F_Spending_Mth.csv"
df_by_F2F_All.to_csv(csv_filename, index=False)

print(f"CSV file '{csv_filename}' has been created successfully.")

In [None]:
# Monthly Face to Face Spend Adjusted Monthly Value --------------------------- --- mcg = 'All'

# Fromula : Adjust_Monthly_F2F_Spend = ( F2F_spend_All_cardholders (201901) / Spend_mcg_All_cardholders (month)) × Online_spend_All (month)

import pandas as pd

# Load the two datasets
df_cardholders = pd.read_csv("UK_Spend_Mth_All_cardholders.csv")
df_all = pd.read_csv("UK_F2F_Spending_Mth.csv")

# Ensure time_period_value is treated as string for consistency
df_cardholders['time_period_value'] = df_cardholders['time_period_value'].astype(str)
df_all['time_period_value'] = df_all['time_period_value'].astype(str)

# Check if '201901' exists in cardholders_df
if '201901' not in df_cardholders['time_period_value'].values:
    raise ValueError("The time_period_value '201901' is not found in UK_Spend_Mth_All_cardholders.csv")
    
# Get 2019Q1 values
cardholders_201901 = df_cardholders.loc[df_cardholders["time_period_value"] == "201901", "Spend_mcg_All_cardholders"].values[0]

# Calculate Adjusted Quarterly Online Spend
df_cardholders["Adjust_Monthly_F2F_Spend"] = (
    cardholders_201901 / df_cardholders["Spend_mcg_All_cardholders"] 
) * df_all["F2F_spend_All"]

# Save or view the result
df_cardholders.to_csv("Adjusted_F2F_Spend_Mth.csv", index=False)
print(df_cardholders)

In [None]:
# Calculate the Month-on-Month % Change using the adjusted Face-to-Face spend values. --------------- NOT IN USE
# Identify the average and range of increases in Dec and dips in Nov from 2021 to 2024.

import pandas as pd

# Load the CSV file
df = pd.read_csv("Adjusted_F2F_Spend_Mth.csv")

# Ensure the time_period_value column is treated as string for date parsing
df['time_period_value'] = df['time_period_value'].astype(str)

# Sort the data by time_period_value to ensure chronological order
df = df.sort_values(by='time_period_value')

# Calculate Month-on-Month % Change using Adjust_Monthly_Online_Spend
df['MoM_%_Change'] = df['Adjust_Monthly_F2F_Spend'].pct_change() * 100

# Extract December and January data from 2019 to 2024
nov_months = [f"{year}11" for year in range(2021, 2024)]
dec_months = [f"{year}12" for year in range(2021, 2024)]

nov_changes = df[df['time_period_value'].isin(nov_months)]['MoM_%_Change']
dec_changes = df[df['time_period_value'].isin(dec_months)]['MoM_%_Change']

# Calculate the average and range for December dips and January increases
nov_avg = nov_changes.mean()
nov_min = nov_changes.min()
nov_max = nov_changes.max()

dec_avg = dec_changes.mean()
dec_min = dec_changes.min()
dec_max = dec_changes.max()

print(f"Average Dec MoM % Increase: {dec_avg:.2f}% (Range: {dec_min:.2f}% to {dec_max:.2f}%)")
print(f"Average Nov MoM % Decrease: {nov_avg:.2f}% (Range: {nov_min:.2f}% to {nov_max:.2f}%)")


In [None]:
# Calculate the Month-on-Month % Change using the adjusted Face-to-Face spend values.
# Identify the average and range of increases in Dec and dips in Nov from 2022 to 2025.

import pandas as pd

# Load the CSV file
df = pd.read_csv("Adjusted_F2F_Spend_Mth.csv")

# Ensure the time_period_value column is treated as string for date parsing
df['time_period_value'] = df['time_period_value'].astype(str)

# Sort the data by time_period_value to ensure chronological order
df = df.sort_values(by='time_period_value')

# Calculate Month-on-Month % Change using Adjust_Monthly_Online_Spend
df['MoM_%_Change'] = df['Adjust_Monthly_F2F_Spend'].pct_change() * 100

# Extract December and January data from 2019 to 2024
dec_months = [f"{year}12" for year in range(2021, 2024)]
jan_months = [f"{year}01" for year in range(2022, 2025)]

dec_changes = df[df['time_period_value'].isin(dec_months)]['MoM_%_Change']
jan_changes = df[df['time_period_value'].isin(jan_months)]['MoM_%_Change']

# Calculate the average and range for December dips and January increases
dec_avg = jan_changes.mean()
dec_min = jan_changes.min()
dec_max = jan_changes.max()

jan_avg = jan_changes.mean()
jan_min = jan_changes.min()
jan_max = jan_changes.max()

print(f"Average Jan MoM % Increase: {jan_avg:.2f}% (Range: {jan_min:.2f}% to {jan_max:.2f}%)")
print(f"Average Dec MoM % Decrease: {dec_avg:.2f}% (Range: {dec_min:.2f}% to {dec_max:.2f}%)")


In [None]:
# Indexed Spend F2F All  ---------------- mcg = 'All'
# Indexed card spending data (average 2019 equals 100) is calculated :
# Spend=(Month Spend / Average Spend in 2019) × 100
#Indexed Spend = (F2F_spend_All / Average F2F Spend in 2019 (which is 201901 - 201912 ave)) × 100

import pandas as pd
import matplotlib.pyplot as plt

# Load the actual data from the CSV file
df = pd.read_csv("Adjusted_F2F_Spend_Mth.csv")

# Filter rows starting from 201901
df = df[df['time_period_value'] >= 201901]

# Calculate the average adjusted F2F spend for 2019
df_2019 = df[df['time_period_value'].between(201901, 201912)]
base_avg = df_2019['Adjust_Monthly_F2F_Spend'].mean()

# Compute Indexed Spend
df['Indexed Spend F2F All'] = (df['Adjust_Monthly_F2F_Spend'] / base_avg) * 100

# Create shorter labels for the X-axis (e.g., '201901' -> 'Jan 19')
def format_period(period):
    period_str = str(period)
    year = period_str[:4]
    month = period_str[4:]
    month_map = {
        '01': 'Jan', '02': 'Feb', '03': 'Mar', '04': 'Apr', '05': 'May', '06': 'Jun',
        '07': 'Jul', '08': 'Aug', '09': 'Sep', '10': 'Oct', '11': 'Nov', '12': 'Dec'
    }
    return f"{month_map.get(month, month)} {year[2:]}"

df['Formatted Period'] = df['time_period_value'].apply(format_period)

# Plot the Indexed Spend over time
plt.figure(figsize=(12, 6))
plt.plot(df['Formatted Period'], df['Indexed Spend F2F All'], marker='o', linestyle='-')
plt.title('Indexed All F2F Spend Over Time Monthly (Base: 2019 Average = 100)')
plt.xlabel('Time Period')
plt.ylabel('Indexed Spend')
plt.xticks(rotation=90)
plt.grid(True)
plt.tight_layout()
plt.savefig("indexed_spend_plot.png")
plt.show()

# Save to CSV
df.to_csv("Indexed_All_F2F_Mth.csv", index=False)



In [None]:
# Indexed Spend Online All  ---------------- mcg = 'All'
# Indexed card spending data (average 2019 equals 100) is calculated :
# Spend=(Month Spend / Average Spend in 2019) × 100
#Indexed Spend = (Online_spend_All / Average Online Spend in 2019 (which is 201901 - 201912 ave)) × 100

import pandas as pd
import matplotlib.pyplot as plt

# Load the actual data from the CSV file
df = pd.read_csv("Adjusted_Online_Spend_Mth.csv")

# Filter rows starting from 201901
df = df[df['time_period_value'] >= 201901]

# Calculate the average adjusted F2F spend for 2019
df_2019 = df[df['time_period_value'].between(201901, 201912)]
base_avg = df_2019['Adjust_Monthly_Online_Spend'].mean()

# Compute Indexed Spend
df['Indexed Spend Online All'] = (df['Adjust_Monthly_Online_Spend'] / base_avg) * 100

# Create shorter labels for the X-axis (e.g., '201901' -> 'Jan 19')
def format_period(period):
    period_str = str(period)
    year = period_str[:4]
    month = period_str[4:]
    month_map = {
        '01': 'Jan', '02': 'Feb', '03': 'Mar', '04': 'Apr', '05': 'May', '06': 'Jun',
        '07': 'Jul', '08': 'Aug', '09': 'Sep', '10': 'Oct', '11': 'Nov', '12': 'Dec'
    }
    return f"{month_map.get(month, month)} {year[2:]}"

df['Formatted Period'] = df['time_period_value'].apply(format_period)

# Plot the Indexed Spend over time
plt.figure(figsize=(12, 6))
plt.plot(df['Formatted Period'], df['Indexed Spend Online All'], marker='o', linestyle='-')
plt.title('Indexed All Online Spend Over Time Monthly (Base: 2019 Average = 100)')
plt.xlabel('Time Period')
plt.ylabel('Indexed Spend')
plt.xticks(rotation=90)
plt.grid(True)
plt.tight_layout()
plt.savefig("indexed_spend_plot.png")
plt.show()

# Save to CSV
df.to_csv("Indexed_All_Online_Mth.csv", index=False)



In [None]:
# Indexed Spend All  ---------------- mcg = 'All'
# Indexed card spending data (average 2019 equals 100) is calculated :
# Spend=(Month Spend / Average Spend in 2019) × 100
#Indexed Spend = (UK_spend_All / Average All Spend in 2019 (which is 201901 - 201912 ave)) × 100

import pandas as pd
import matplotlib.pyplot as plt

# Load the actual data from the CSV file
df = pd.read_csv("Adjusted_All_Spend_Mth.csv")

# Filter rows starting from 201901
df = df[df['time_period_value'] >= 201901]

# Calculate the average adjusted F2F spend for 2019
df_2019 = df[df['time_period_value'].between(201901, 201912)]
base_avg = df_2019['Adjust_Monthly_All_Spend'].mean()

# Compute Indexed Spend
df['Indexed Spend UK All'] = (df['Adjust_Monthly_All_Spend'] / base_avg) * 100

# Create shorter labels for the X-axis (e.g., '201901' -> 'Jan 19')
def format_period(period):
    period_str = str(period)
    year = period_str[:4]
    month = period_str[4:]
    month_map = {
        '01': 'Jan', '02': 'Feb', '03': 'Mar', '04': 'Apr', '05': 'May', '06': 'Jun',
        '07': 'Jul', '08': 'Aug', '09': 'Sep', '10': 'Oct', '11': 'Nov', '12': 'Dec'
    }
    return f"{month_map.get(month, month)} {year[2:]}"

df['Formatted Period'] = df['time_period_value'].apply(format_period)

# Plot the Indexed Spend over time
plt.figure(figsize=(12, 6))
plt.plot(df['Formatted Period'], df['Indexed Spend UK All'], marker='o', linestyle='-')
plt.title('Indexed All UK Spend Over Time Monthly (Base: 2019 Average = 100)')
plt.xlabel('Time Period')
plt.ylabel('Indexed Spend')
plt.xticks(rotation=90)
plt.grid(True)
plt.tight_layout()
plt.savefig("indexed_spend_plot.png")
plt.show()

# Save to CSV
df.to_csv("Indexed_All_UK_Mth.csv", index=False)



In [None]:
# Indexed Line Chart for UK All, Online vs F2F Totals - Indexed Value  ---- Adjust mcg = 'All'

import pandas as pd
import matplotlib.pyplot as plt

# Load the CSV files
uk_df = pd.read_csv("Indexed_All_UK_Mth.csv")
online_df = pd.read_csv("Indexed_All_Online_Mth.csv")
f2f_df = pd.read_csv("Indexed_All_F2F_Mth.csv")

# Plot the line chart
plt.figure(figsize=(12, 6))

# Plot each series with distinct markers and line styles
plt.plot(uk_df['Formatted Period'], uk_df['Indexed Spend UK All'], label='Spend All', marker='o', linestyle='-')
plt.plot(online_df['Formatted Period'], online_df['Indexed Spend Online All'], label='Online All', marker='^', linestyle='--')
plt.plot(f2f_df['Formatted Period'], f2f_df['Indexed Spend F2F All'], label='F2F All', marker='s', linestyle=':')

# Add vertical line at June 2021 if present in the data
if "Jun 21" in uk_df['Formatted Period'].values:
    plt.axvline(x="Jun 21", color='gray', linestyle='--', label="June 2021")

# Customize the plot
plt.title('Indexed Spend Comparison: All vs Online vs F2F')
plt.xlabel('Time Period')
plt.ylabel('Indexed Spend Monthly (Base 2019 = 100)')
plt.xticks(rotation=90)
plt.legend()
plt.grid(True)
plt.tight_layout()

# Save the plot
plt.savefig("indexed_spend_comparison_Mth.png")
plt.show()



In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the CSV files
uk_df = pd.read_csv("Indexed_All_UK_Mth.csv")
online_df = pd.read_csv("Indexed_All_Online_Mth.csv")
f2f_df = pd.read_csv("Indexed_All_F2F_Mth.csv")

# Create the plot
plt.figure(figsize=(12, 6))

# Plot each series with solid lines and dot markers
plt.plot(uk_df['Formatted Period'], uk_df['Indexed Spend UK All'], label='Spend All', marker='o', linestyle='-')
plt.plot(online_df['Formatted Period'], online_df['Indexed Spend Online All'], label='Online All', marker='o', linestyle='-')
plt.plot(f2f_df['Formatted Period'], f2f_df['Indexed Spend F2F All'], label='F2F All', marker='o', linestyle='-')

# Add vertical line at June 2021
plt.axvline(x='Jun 21', color='gray', linestyle='--', label='June 2021')

# Customize the plot
plt.title('Indexed Spend Comparison Monthly: All vs Online vs F2F (2019 Jan to 2025 June)')
plt.xlabel('Time Period')
plt.ylabel('Indexed Spend (Base 2019 ave = 100)')
plt.xticks(rotation=90)
plt.legend()
plt.grid(True)
plt.tight_layout()

# Save the plot
plt.savefig("indexed_spend_comparison_Mth.png")
plt.show()



In [None]:
# Growth Rate 202506 to 201901 for Indexed_All_Online_Mth.csv

import pandas as pd

# Load the CSV file
df = pd.read_csv('Indexed_All_Online_Mth.csv')

# Check if 201901 and 202506 are present
available_periods = df['time_period_value'].astype(str).tolist()
start_period = '201901'
end_period = '202506'

# Find the closest available period to 202506 if it's missing
if end_period not in available_periods:
    # Convert to integers and find the maximum available period
    available_periods_int = [int(p) for p in available_periods]
    closest_end_period = max(available_periods_int)
    end_period = str(closest_end_period)

# Extract the values for the start and end periods
value_start = df.loc[df['time_period_value'].astype(str) == start_period, 'Indexed Spend Online All'].values[0]
value_end = df.loc[df['time_period_value'].astype(str) == end_period, 'Indexed Spend Online All'].values[0]

# Calculate the percentage growth
growth_rate = ((value_end - value_start) / value_start) * 100

print(f"Growth rate from {start_period} to {end_period} is {growth_rate:.2f}%")




In [None]:
# Growth Rate 202506 to 202106 for Indexed_All_Online_Mth.csv

import pandas as pd

# Load the CSV file
df = pd.read_csv('Indexed_All_Online_Mth.csv')

# Check if 201901 and 202506 are present
available_periods = df['time_period_value'].astype(str).tolist()
start_period = '202106'
end_period = '202506'

# Find the closest available period to 202506 if it's missing
if end_period not in available_periods:
    # Convert to integers and find the maximum available period
    available_periods_int = [int(p) for p in available_periods]
    closest_end_period = max(available_periods_int)
    end_period = str(closest_end_period)

# Extract the values for the start and end periods
value_start = df.loc[df['time_period_value'].astype(str) == start_period, 'Indexed Spend Online All'].values[0]
value_end = df.loc[df['time_period_value'].astype(str) == end_period, 'Indexed Spend Online All'].values[0]

# Calculate the percentage growth
growth_rate = ((value_end - value_start) / value_start) * 100

print(f"Growth rate from {start_period} to {end_period} is {growth_rate:.2f}%")




In [None]:
# Growth Rate 202506 to 201901 for Indexed_All_F2F_Mth.csv

import pandas as pd

# Load the CSV file
df = pd.read_csv('Indexed_All_F2F_Mth.csv')

# Check if 201901 and 202506 are present
available_periods = df['time_period_value'].astype(str).tolist()
start_period = '201901'
end_period = '202506'

# Find the closest available period to 202506 if it's missing
if end_period not in available_periods:
    # Convert to integers and find the maximum available period
    available_periods_int = [int(p) for p in available_periods]
    closest_end_period = max(available_periods_int)
    end_period = str(closest_end_period)

# Extract the values for the start and end periods
value_start = df.loc[df['time_period_value'].astype(str) == start_period, 'Indexed Spend F2F All'].values[0]
value_end = df.loc[df['time_period_value'].astype(str) == end_period, 'Indexed Spend F2F All'].values[0]

# Calculate the percentage growth
growth_rate = ((value_end - value_start) / value_start) * 100

print(f"Growth rate from {start_period} to {end_period} is {growth_rate:.2f}%")




In [None]:
# Growth Rate 202506 to 202106 for Indexed_All_F2F_Mth.csv

import pandas as pd

# Load the CSV file
df = pd.read_csv('Indexed_All_F2F_Mth.csv')

# Check if 201901 and 202506 are present
available_periods = df['time_period_value'].astype(str).tolist()
start_period = '202106'
end_period = '202506'

# Find the closest available period to 202506 if it's missing
if end_period not in available_periods:
    # Convert to integers and find the maximum available period
    available_periods_int = [int(p) for p in available_periods]
    closest_end_period = max(available_periods_int)
    end_period = str(closest_end_period)

# Extract the values for the start and end periods
value_start = df.loc[df['time_period_value'].astype(str) == start_period, 'Indexed Spend F2F All'].values[0]
value_end = df.loc[df['time_period_value'].astype(str) == end_period, 'Indexed Spend F2F All'].values[0]

# Calculate the percentage growth
growth_rate = ((value_end - value_start) / value_start) * 100

print(f"Growth rate from {start_period} to {end_period} is {growth_rate:.2f}%")




In [None]:
# Growth Rate 202506 to 201901 for Indexed_All_UK_Mth.csv

import pandas as pd

# Load the CSV file
df = pd.read_csv('Indexed_All_UK_Mth.csv')

# Check if 201901 and 202506 are present
available_periods = df['time_period_value'].astype(str).tolist()
start_period = '201901'
end_period = '202506'

# Find the closest available period to 202506 if it's missing
if end_period not in available_periods:
    # Convert to integers and find the maximum available period
    available_periods_int = [int(p) for p in available_periods]
    closest_end_period = max(available_periods_int)
    end_period = str(closest_end_period)

# Extract the values for the start and end periods
value_start = df.loc[df['time_period_value'].astype(str) == start_period, 'Indexed Spend UK All'].values[0]
value_end = df.loc[df['time_period_value'].astype(str) == end_period, 'Indexed Spend UK All'].values[0]

# Calculate the percentage growth
growth_rate = ((value_end - value_start) / value_start) * 100

print(f"Growth rate from {start_period} to {end_period} is {growth_rate:.2f}%")




In [None]:
# Growth Rate 202506 to 202106 for Indexed_All_UK_Mth.csv

import pandas as pd

# Load the CSV file
df = pd.read_csv('Indexed_All_UK_Mth.csv')

# Check if 201901 and 202506 are present
available_periods = df['time_period_value'].astype(str).tolist()
start_period = '202106'
end_period = '202506'

# Find the closest available period to 202506 if it's missing
if end_period not in available_periods:
    # Convert to integers and find the maximum available period
    available_periods_int = [int(p) for p in available_periods]
    closest_end_period = max(available_periods_int)
    end_period = str(closest_end_period)

# Extract the values for the start and end periods
value_start = df.loc[df['time_period_value'].astype(str) == start_period, 'Indexed Spend UK All'].values[0]
value_end = df.loc[df['time_period_value'].astype(str) == end_period, 'Indexed Spend UK All'].values[0]

# Calculate the percentage growth
growth_rate = ((value_end - value_start) / value_start) * 100

print(f"Growth rate from {start_period} to {end_period} is {growth_rate:.2f}%")




In [None]:
# Joint all the tables together

import pandas as pd

# Load the six CSV files
df_cardholders = pd.read_csv('UK_Spend_Mth_All_cardholders.csv')
df_cardholder_Online = pd.read_csv('UK_Spend_Mth_Online_cardholders.csv')
df_cardholder_F2F = pd.read_csv('UK_Spend_Mth_F2F_cardholders.csv')
df_all_spend = pd.read_csv('UK_All_Spending_Mth.csv')
df_online_spend = pd.read_csv('UK_Online_Spending_Mth.csv')
df_f2f_spend = pd.read_csv('UK_F2F_Spending_Mth.csv')

# Merge all dataframes on 'time_period_value'
merged_df = df_cardholders.merge(df_cardholder_Online, on='time_period_value', how='outer')
merged_df = merged_df.merge(df_cardholder_F2F, on='time_period_value', how='outer')
merged_df = merged_df.merge(df_all_spend, on='time_period_value', how='outer')
merged_df = merged_df.merge(df_online_spend, on='time_period_value', how='outer')
merged_df = merged_df.merge(df_f2f_spend, on='time_period_value', how='outer')

# Save the merged dataframe to a new CSV file
merged_df.to_csv('UK_Spend_All_in_One.csv', index=False)

print("All six tables have been successfully merged into 'UK_Spend_All_in_One.csv'.")



In [None]:
# Add three columns Online per Cardholder, F2F per Cardholder and All per Cardholder for per cardholder spending by month which the formula is: 
# Online_spend_All/Spend_mcg_All_cardholders, F2F_spend_All/Spend_mcg_All_cardholders and UK_spend_All/Spend_mcg_All_cardholders

import pandas as pd

# Load the four CSV files
df_cardholders = pd.read_csv('UK_Spend_Mth_All_cardholders.csv')
df_cardholder_Online = pd.read_csv('UK_Spend_Mth_Online_cardholders.csv')
df_cardholder_F2F = pd.read_csv('UK_Spend_Mth_F2F_cardholders.csv')
df_all_spend = pd.read_csv('UK_All_Spending_Mth.csv')
df_online_spend = pd.read_csv('UK_Online_Spending_Mth.csv')
df_f2f_spend = pd.read_csv('UK_F2F_Spending_Mth.csv')

# Merge all dataframes on 'time_period_value'
merged_df = df_cardholders.merge(df_cardholder_Online, on='time_period_value', how='outer')
merged_df = merged_df.merge(df_cardholder_F2F, on='time_period_value', how='outer')
merged_df = merged_df.merge(df_all_spend, on='time_period_value', how='outer')
merged_df = merged_df.merge(df_online_spend, on='time_period_value', how='outer')
merged_df = merged_df.merge(df_f2f_spend, on='time_period_value', how='outer')

# Calculate the new columns
merged_df['Online per Cardholder'] = merged_df['Online_spend_All'] / merged_df['Spend_mcg_Online_cardholders']
merged_df['F2F per Cardholder'] = merged_df['F2F_spend_All'] / merged_df['Spend_mcg_F2F_cardholders']
merged_df['All per Cardholder'] = merged_df['UK_spend_All'] / merged_df['Spend_mcg_All_cardholders']

# Save the updated dataset to a new CSV file
merged_df.to_csv('UK_Spend_Per_Cardholder.csv', index=False)

print("The updated dataset with per cardholder spending has been saved to 'UK_Spend_Per_Cardholder.csv'.")



In [None]:
# the percentage change from January 2019 (201901) to June 2025 (202506) for : -------------- WAY ONE
# Online per Cardholder
# F2F per Cardholder
# All per Cardholder

import pandas as pd

# Load the four CSV files
df_cardholders = pd.read_csv('UK_Spend_Mth_All_cardholders.csv')
df_all_spend = pd.read_csv('UK_All_Spending_Mth.csv')
df_online_spend = pd.read_csv('UK_Online_Spending_Mth.csv')
df_f2f_spend = pd.read_csv('UK_F2F_Spending_Mth.csv')


# Merge all datasets on 'time_period_value'
df_merged = df_cardholders.merge(df_all_spend, on="time_period_value") \
                         .merge(df_online_spend, on="time_period_value") \
                         .merge(df_f2f_spend, on="time_period_value")

# Calculate per cardholder spending columns
df_merged["Online per Cardholder"] = df_merged["Online_spend_All"] / df_merged["Spend_mcg_All_cardholders"]
df_merged["F2F per Cardholder"] = df_merged["F2F_spend_All"] / df_merged["Spend_mcg_All_cardholders"]
df_merged["All per Cardholder"] = df_merged["UK_spend_All"] / df_merged["Spend_mcg_All_cardholders"]

# Extract values for 201901 and 202506 (or latest available)
start_row = df_merged[df_merged["time_period_value"] == 201901]
end_row = df_merged[df_merged["time_period_value"] == 202506]

# If 202506 is not available, use the latest available period
if end_row.empty:
    end_row = df_merged.sort_values("time_period_value").iloc[[-1]]

# Calculate percentage change for each metric
def percentage_change(start, end):
    return ((end - start) / start) * 100

change_online = percentage_change(start_row["Online per Cardholder"].values[0], end_row["Online per Cardholder"].values[0])
change_f2f = percentage_change(start_row["F2F per Cardholder"].values[0], end_row["F2F per Cardholder"].values[0])
change_all = percentage_change(start_row["All per Cardholder"].values[0], end_row["All per Cardholder"].values[0])

# Display the results
print(f"Percentage change in Online per Cardholder from 201901 to {end_row['time_period_value'].values[0]}: {change_online:.2f}%")
print(f"Percentage change in F2F per Cardholder from 201901 to {end_row['time_period_value'].values[0]}: {change_f2f:.2f}%")
print(f"Percentage change in All per Cardholder from 201901 to {end_row['time_period_value'].values[0]}: {change_all:.2f}%")



In [None]:
# the percentage change from January 2019 (201901) to June 2025 (202506) for : -------------- WAY ONE ----------- USE THIS ONE
# Online per Cardholder
# F2F per Cardholder
# All per Cardholder

import pandas as pd

# Load all relevant CSV files
df_cardholders = pd.read_csv('UK_Spend_Mth_All_cardholders.csv')
df_cardholder_Online = pd.read_csv('UK_Spend_Mth_Online_cardholders.csv')
df_cardholder_F2F = pd.read_csv('UK_Spend_Mth_F2F_cardholders.csv')
df_all_spend = pd.read_csv('UK_All_Spending_Mth.csv')
df_online_spend = pd.read_csv('UK_Online_Spending_Mth.csv')
df_f2f_spend = pd.read_csv('UK_F2F_Spending_Mth.csv')

# Merge all datasets on 'time_period_value'
df_merged = df_cardholders.merge(df_cardholder_Online, on="time_period_value", how="outer") \
                          .merge(df_cardholder_F2F, on="time_period_value", how="outer") \
                          .merge(df_all_spend, on="time_period_value", how="outer") \
                          .merge(df_online_spend, on="time_period_value", how="outer") \
                          .merge(df_f2f_spend, on="time_period_value", how="outer")

# Display column names to identify correct ones
print("Columns in merged dataframe:", df_merged.columns.tolist())

# Try to identify correct column names for cardholders and spending
# Assuming columns contain 'cardholders' and 'spend' keywords
cardholder_online_col = [col for col in df_merged.columns if 'Online' in col and 'cardholders' in col][0]
cardholder_f2f_col = [col for col in df_merged.columns if 'F2F' in col and 'cardholders' in col][0]
cardholder_all_col = [col for col in df_merged.columns if 'All' in col and 'cardholders' in col][0]

spend_online_col = [col for col in df_merged.columns if 'Online' in col and 'spend' in col][0]
spend_f2f_col = [col for col in df_merged.columns if 'F2F' in col and 'spend' in col][0]
spend_all_col = [col for col in df_merged.columns if 'All' in col and 'spend' in col and 'UK' in col][0]

# Calculate per cardholder spending columns
df_merged["Online per Cardholder"] = df_merged[spend_online_col] / df_merged[cardholder_online_col]
df_merged["F2F per Cardholder"] = df_merged[spend_f2f_col] / df_merged[cardholder_f2f_col]
df_merged["All per Cardholder"] = df_merged[spend_all_col] / df_merged[cardholder_all_col]

# Extract values for 201901 and 202506 (or latest available)
start_row = df_merged[df_merged["time_period_value"] == 201901]
end_row = df_merged[df_merged["time_period_value"] == 202506]

# If 202506 is not available, use the latest available period
if end_row.empty:
    end_row = df_merged.sort_values("time_period_value").iloc[[-1]]

# Calculate percentage change for each metric
def percentage_change(start, end):
    return ((end - start) / start) * 100

change_online = percentage_change(start_row["Online per Cardholder"].values[0], end_row["Online per Cardholder"].values[0])
change_f2f = percentage_change(start_row["F2F per Cardholder"].values[0], end_row["F2F per Cardholder"].values[0])
change_all = percentage_change(start_row["All per Cardholder"].values[0], end_row["All per Cardholder"].values[0])

# Display the results
print(f"Percentage change in Online per Cardholder from 201901 to {end_row['time_period_value'].values[0]}: {change_online:.2f}%")
print(f"Percentage change in F2F per Cardholder from 201901 to {end_row['time_period_value'].values[0]}: {change_f2f:.2f}%")
print(f"Percentage change in All per Cardholder from 201901 to {end_row['time_period_value'].values[0]}: {change_all:.2f}%")



In [None]:
# the percentage change from January 2019 (201901) to June 2025 (202506) for : ----------- Way TWO --- Correct 
# Online per Cardholder
# F2F per Cardholder
# All per Cardholder

import pandas as pd

# Load the CSV file
df = pd.read_csv('UK_Spend_Per_Cardholder.csv')

# Extract the values for 201901 and 202506
start_row = df[df['time_period_value'] == 201901]
end_row = df[df['time_period_value'] == 202506]

# Check if both time periods exist
if start_row.empty or end_row.empty:
    print("One or both of the specified time periods (201901, 202506) are missing in the data.")
else:
    # Calculate percentage change for each metric
    online_change = ((end_row['Online per Cardholder'].values[0] - start_row['Online per Cardholder'].values[0]) / start_row['Online per Cardholder'].values[0]) * 100
    f2f_change = ((end_row['F2F per Cardholder'].values[0] - start_row['F2F per Cardholder'].values[0]) / start_row['F2F per Cardholder'].values[0]) * 100
    all_change = ((end_row['All per Cardholder'].values[0] - start_row['All per Cardholder'].values[0]) / start_row['All per Cardholder'].values[0]) * 100

    # Print the results
    print(f"Percentage change from 201901 to 202506:")
    print(f"Online per Cardholder: {online_change:.2f}%")
    print(f"F2F per Cardholder: {f2f_change:.2f}%")
    print(f"All per Cardholder: {all_change:.2f}%")

