In [None]:
# Figure 1

import pandas as pd
import matplotlib.pyplot as plt

# Load the CSV files
uk_df = pd.read_csv("Indexed_All_UK_Mth.csv")
online_df = pd.read_csv("Indexed_All_Online_Mth.csv")
f2f_df = pd.read_csv("Indexed_All_F2F_Mth.csv")

# Create the plot
plt.figure(figsize=(12, 6))

# Plot each series with solid lines and dot markers
plt.plot(uk_df['Formatted Period'], uk_df['Indexed Spend UK All'], label='Spend All', marker='o', linestyle='-')
plt.plot(online_df['Formatted Period'], online_df['Indexed Spend Online All'], label='Online All', marker='o', linestyle='-')
plt.plot(f2f_df['Formatted Period'], f2f_df['Indexed Spend F2F All'], label='F2F All', marker='o', linestyle='-')

# Add vertical line at June 2021
plt.axvline(x='Jun 21', color='gray', linestyle='--', label='June 2021')

# Customize the plot
plt.title('Indexed Spend Comparison Monthly: All vs Online vs F2F (2019 Jan to 2025 June)')
plt.xlabel('Time Period')
plt.ylabel('Indexed Spend (Base 2019 ave = 100)')
plt.xticks(rotation=90)
plt.legend()
plt.grid(True)
plt.tight_layout()

# Save the plot
plt.savefig("indexed_spend_comparison_Mth.png")
plt.show()



In [None]:
# Growth Rate 202506 to 201901 for Indexed_All_Online_Mth.csv

import pandas as pd

# Load the CSV file
df = pd.read_csv('Indexed_All_Online_Mth.csv')

# Check if 201901 and 202506 are present
available_periods = df['time_period_value'].astype(str).tolist()
start_period = '201901'
end_period = '202506'

# Find the closest available period to 202506 if it's missing
if end_period not in available_periods:
    # Convert to integers and find the maximum available period
    available_periods_int = [int(p) for p in available_periods]
    closest_end_period = max(available_periods_int)
    end_period = str(closest_end_period)

# Extract the values for the start and end periods
value_start = df.loc[df['time_period_value'].astype(str) == start_period, 'Indexed Spend Online All'].values[0]
value_end = df.loc[df['time_period_value'].astype(str) == end_period, 'Indexed Spend Online All'].values[0]

# Calculate the percentage growth
growth_rate = ((value_end - value_start) / value_start) * 100

print(f"Growth rate from {start_period} to {end_period} is {growth_rate:.2f}%")


In [None]:
# Growth Rate 202506 to 201901 for Indexed_All_F2F_Mth.csv

import pandas as pd

# Load the CSV file
df = pd.read_csv('Indexed_All_F2F_Mth.csv')

# Check if 201901 and 202506 are present
available_periods = df['time_period_value'].astype(str).tolist()
start_period = '201901'
end_period = '202506'

# Find the closest available period to 202506 if it's missing
if end_period not in available_periods:
    # Convert to integers and find the maximum available period
    available_periods_int = [int(p) for p in available_periods]
    closest_end_period = max(available_periods_int)
    end_period = str(closest_end_period)

# Extract the values for the start and end periods
value_start = df.loc[df['time_period_value'].astype(str) == start_period, 'Indexed Spend F2F All'].values[0]
value_end = df.loc[df['time_period_value'].astype(str) == end_period, 'Indexed Spend F2F All'].values[0]

# Calculate the percentage growth
growth_rate = ((value_end - value_start) / value_start) * 100

print(f"Growth rate from {start_period} to {end_period} is {growth_rate:.2f}%")


In [None]:
# Growth Rate 202506 to 201901 for Indexed_All_UK_Mth.csv

import pandas as pd

# Load the CSV file
df = pd.read_csv('Indexed_All_UK_Mth.csv')

# Check if 201901 and 202506 are present
available_periods = df['time_period_value'].astype(str).tolist()
start_period = '201901'
end_period = '202506'

# Find the closest available period to 202506 if it's missing
if end_period not in available_periods:
    # Convert to integers and find the maximum available period
    available_periods_int = [int(p) for p in available_periods]
    closest_end_period = max(available_periods_int)
    end_period = str(closest_end_period)

# Extract the values for the start and end periods
value_start = df.loc[df['time_period_value'].astype(str) == start_period, 'Indexed Spend UK All'].values[0]
value_end = df.loc[df['time_period_value'].astype(str) == end_period, 'Indexed Spend UK All'].values[0]

# Calculate the percentage growth
growth_rate = ((value_end - value_start) / value_start) * 100

print(f"Growth rate from {start_period} to {end_period} is {growth_rate:.2f}%")


In [None]:
# Growth Rate 202506 to 202106 for Indexed_All_Online_Mth.csv

import pandas as pd

# Load the CSV file
df = pd.read_csv('Indexed_All_Online_Mth.csv')

# Check if 201901 and 202506 are present
available_periods = df['time_period_value'].astype(str).tolist()
start_period = '202106'
end_period = '202506'

# Find the closest available period to 202506 if it's missing
if end_period not in available_periods:
    # Convert to integers and find the maximum available period
    available_periods_int = [int(p) for p in available_periods]
    closest_end_period = max(available_periods_int)
    end_period = str(closest_end_period)

# Extract the values for the start and end periods
value_start = df.loc[df['time_period_value'].astype(str) == start_period, 'Indexed Spend Online All'].values[0]
value_end = df.loc[df['time_period_value'].astype(str) == end_period, 'Indexed Spend Online All'].values[0]

# Calculate the percentage growth
growth_rate = ((value_end - value_start) / value_start) * 100

print(f"Growth rate from {start_period} to {end_period} is {growth_rate:.2f}%")


In [None]:
# Growth Rate 202506 to 202106 for Indexed_All_F2F_Mth.csv

import pandas as pd

# Load the CSV file
df = pd.read_csv('Indexed_All_F2F_Mth.csv')

# Check if 201901 and 202506 are present
available_periods = df['time_period_value'].astype(str).tolist()
start_period = '202106'
end_period = '202506'

# Find the closest available period to 202506 if it's missing
if end_period not in available_periods:
    # Convert to integers and find the maximum available period
    available_periods_int = [int(p) for p in available_periods]
    closest_end_period = max(available_periods_int)
    end_period = str(closest_end_period)

# Extract the values for the start and end periods
value_start = df.loc[df['time_period_value'].astype(str) == start_period, 'Indexed Spend F2F All'].values[0]
value_end = df.loc[df['time_period_value'].astype(str) == end_period, 'Indexed Spend F2F All'].values[0]

# Calculate the percentage growth
growth_rate = ((value_end - value_start) / value_start) * 100

print(f"Growth rate from {start_period} to {end_period} is {growth_rate:.2f}%")


In [None]:
# Growth Rate 202506 to 202106 for Indexed_All_UK_Mth.csv

import pandas as pd

# Load the CSV file
df = pd.read_csv('Indexed_All_UK_Mth.csv')

# Check if 201901 and 202506 are present
available_periods = df['time_period_value'].astype(str).tolist()
start_period = '202106'
end_period = '202506'

# Find the closest available period to 202506 if it's missing
if end_period not in available_periods:
    # Convert to integers and find the maximum available period
    available_periods_int = [int(p) for p in available_periods]
    closest_end_period = max(available_periods_int)
    end_period = str(closest_end_period)

# Extract the values for the start and end periods
value_start = df.loc[df['time_period_value'].astype(str) == start_period, 'Indexed Spend UK All'].values[0]
value_end = df.loc[df['time_period_value'].astype(str) == end_period, 'Indexed Spend UK All'].values[0]

# Calculate the percentage growth
growth_rate = ((value_end - value_start) / value_start) * 100

print(f"Growth rate from {start_period} to {end_period} is {growth_rate:.2f}%")


In [None]:
# the percentage change from January 2019 (201901) to June 2025 (202506) for : ----------- Way TWO --- Correct 
# Online per Cardholder
# F2F per Cardholder
# All per Cardholder

import pandas as pd

# Load the CSV file
df = pd.read_csv('UK_Spend_Per_Cardholder.csv')

# Extract the values for 201901 and 202506
start_row = df[df['time_period_value'] == 201901]
end_row = df[df['time_period_value'] == 202506]

# Check if both time periods exist
if start_row.empty or end_row.empty:
    print("One or both of the specified time periods (201901, 202506) are missing in the data.")
else:
    # Calculate percentage change for each metric
    online_change = ((end_row['Online per Cardholder'].values[0] - start_row['Online per Cardholder'].values[0]) / start_row['Online per Cardholder'].values[0]) * 100
    f2f_change = ((end_row['F2F per Cardholder'].values[0] - start_row['F2F per Cardholder'].values[0]) / start_row['F2F per Cardholder'].values[0]) * 100
    all_change = ((end_row['All per Cardholder'].values[0] - start_row['All per Cardholder'].values[0]) / start_row['All per Cardholder'].values[0]) * 100

    # Print the results
    print(f"Percentage change from 201901 to 202506:")
    print(f"Online per Cardholder: {online_change:.2f}%")
    print(f"F2F per Cardholder: {f2f_change:.2f}%")
    print(f"All per Cardholder: {all_change:.2f}%")


In [None]:
# Online Monthly Ratio

import pandas as pd

# Load the UK online and all spending data
online_spending = pd.read_csv("UK_Online_Spending_Mth.csv")
all_spending = pd.read_csv("UK_All_Spending_Mth.csv")

# Merge the two datasets on the common time column
merged_df = pd.merge(online_spending, all_spending, on='time_period_value')

# Calculate the online spending ratio
merged_df['online_spending_ratio'] = (merged_df['Online_spend_All'] / merged_df['UK_spend_All']) * 100

# Save the result to a new CSV file
merged_df.to_csv("UK_Online_Spending_Ratio_Mth.csv", index=False)


# Set display option to show all rows
pd.set_option('display.max_rows', None)

# Now display the full DataFrame
print(merged_df)



In [None]:
# Calculate the Month-on-Month % Change using the adjusted UK All spend values. ---------------- ALL
# Identify the average and range of increases in Jan and dips in Dec from 2021 to 2024.

import pandas as pd

# Load the CSV file
df = pd.read_csv("Adjusted_All_Spend_Mth.csv")

# Ensure the time_period_value column is treated as string for date parsing
df['time_period_value'] = df['time_period_value'].astype(str)

# Sort the data by time_period_value to ensure chronological order
df = df.sort_values(by='time_period_value')

# Calculate Month-on-Month % Change using Adjust_Monthly_Online_Spend
df['MoM_%_Change'] = df['Adjust_Monthly_All_Spend'].pct_change() * 100

# Extract December and January data from 2021 to 2024
december_months = [f"{year}12" for year in range(2021, 2024)]
january_months = [f"{year}01" for year in range(2022, 2025)]

december_changes = df[df['time_period_value'].isin(december_months)]['MoM_%_Change']
january_changes = df[df['time_period_value'].isin(january_months)]['MoM_%_Change']

# Calculate the average and range for December dips and January increases
december_avg = december_changes.mean()
december_min = december_changes.min()
december_max = december_changes.max()

january_avg = january_changes.mean()
january_min = january_changes.min()
january_max = january_changes.max()

print(f"Average January MoM % Decrease: {january_avg:.2f}% (Range: {january_min:.2f}% to {january_max:.2f}%)")
print(f"Average December MoM % Increase: {december_avg:.2f}% (Range: {december_min:.2f}% to {december_max:.2f}%)")


In [None]:
# Calculate month-on-month percentage changes for: --------------- ONLINE
# November to December
# December to January
# Compare these changes across years (e.g., 2021–2024 for Dec, 2022–2025 for Jan).
# Visualize the transitions to highlight patterns like:
# Online spending dips in December from November
# Online rebounds in January from December
# F2F spending surges in December, then drops in January

# The average percentage drop in online spending ratio from November to December for the years 2021 to 2024.
# The average percentage increase in online spending ratio from December to January for the years 2022 to 2025.

import pandas as pd

# Load the dataset
df = pd.read_csv('UK_Online_Spending_Ratio_Mth.csv')

# Ensure time_period_value is treated as string for slicing
df['time_period_value'] = df['time_period_value'].astype(str)

# Function to calculate percentage change
def percent_change(old, new):
    return ((new - old) / old) * 100

# Calculate average drop from November to December for 2021 to 2024
nov_dec_drops = []
for year in ['2021', '2022', '2023', '2024']:
    nov = df[df['time_period_value'] == f'{year}11']['online_spending_ratio']
    dec = df[df['time_period_value'] == f'{year}12']['online_spending_ratio']
    if not nov.empty and not dec.empty:
        drop = percent_change(nov.values[0], dec.values[0])
        nov_dec_drops.append(drop)

avg_nov_dec_drop = sum(nov_dec_drops) / len(nov_dec_drops) if nov_dec_drops else None

# Calculate average increase from December to January for 2022 to 2025
dec_jan_increases = []
for year in ['2022', '2023', '2024', '2025']:
    dec = df[df['time_period_value'] == f'{year}12']['online_spending_ratio']
    jan = df[df['time_period_value'] == f'{str(int(year)+1)}01']['online_spending_ratio']
    if not dec.empty and not jan.empty:
        increase = percent_change(dec.values[0], jan.values[0])
        dec_jan_increases.append(increase)

avg_dec_jan_increase = sum(dec_jan_increases) / len(dec_jan_increases) if dec_jan_increases else None

# Print the results
print(f"Average percentage drop in online spending ratio from Nov to Dec (2021-2024): {avg_nov_dec_drop:.2f}%")
print(f"Average percentage increase in online spending ratio from Dec to Jan (2022-2025): {avg_dec_jan_increase:.2f}%")


In [None]:
# The average increase in Face-to-Face (F2F) ratio from November to December for 2021 to 2024. ----------------- FACE-TO-FACE
# The average decrease in F2F ratio from December to January for 2022 to 2025.

import pandas as pd

# Load the dataset
df = pd.read_csv('UK_Online_Spending_Ratio_Mth.csv')

# Calculate F2F ratio
df['F2F_ratio'] = 100 - df['online_spending_ratio']

# Convert time_period_value to string for easier slicing
df['time_period_value'] = df['time_period_value'].astype(str)

# Function to calculate percentage change from one month to the next
def calculate_percentage_change(df, year, from_month, to_month):
    from_period = f"{year}{from_month:02d}"
    to_period = f"{year}{to_month:02d}"
    from_value = df.loc[df['time_period_value'] == from_period, 'F2F_ratio']
    to_value = df.loc[df['time_period_value'] == to_period, 'F2F_ratio']
    if not from_value.empty and not to_value.empty:
        return ((to_value.values[0] - from_value.values[0]) / from_value.values[0]) * 100
    return None

# Calculate F2F ratio increase from Nov to Dec for 2021 to 2024
nov_to_dec_changes = []
for year in range(2021, 2025):
    change = calculate_percentage_change(df, year, 11, 12)
    if change is not None:
        nov_to_dec_changes.append(change)

# Calculate F2F ratio decrease from Dec to Jan for 2022 to 2025
dec_to_jan_changes = []
for year in range(2022, 2026):
    change = calculate_percentage_change(df, year - 1, 12, 1)
    if change is not None:
        dec_to_jan_changes.append(change)

# Compute averages
avg_nov_to_dec_increase = sum(nov_to_dec_changes) / len(nov_to_dec_changes) if nov_to_dec_changes else None
avg_dec_to_jan_decrease = sum(dec_to_jan_changes) / len(dec_to_jan_changes) if dec_to_jan_changes else None

# Display results
print(f"Average F2F ratio increase from Nov to Dec (2021-2024): {avg_nov_to_dec_increase:.2f}%")
print(f"Average F2F ratio decrease from Dec to Jan (2022-2025): {avg_dec_to_jan_decrease:.2f}%")


In [None]:
# For calculating the average month-on-month % increase for each MCG 2021 Nov to 2024 Dec, especially for Face-to-Face during Dec peak periods.
# --------------------- Face-to-Face UP Nov to Dec 2021 to 2024

import pandas as pd

# Load the CSV file
file_path = "Top5_MCGs_MoM_Change_Per_Peak_Period_F2F.csv"
df = pd.read_csv(file_path)

# Filter for periods ending in January from 2022 to 2025
target_periods = ['202111 to 202112', '202211 to 202212', '202311 to 202312', '202411 to 202412']
filtered_df = df[df['Period'].isin(target_periods)]

# Group by MCG and calculate the average MoM % Change
average_mom_change = filtered_df.groupby('MCG')['MoM_%_Change'].mean().reset_index()

# Display the result
average_mom_change


In [None]:
# For calculating the average month-on-month % increase for each MCG, especially for Airlines and Travel Services during January peak periods.
# --------------------- Online UP Dec to Jan 2021 to 2025

import pandas as pd

# Load the CSV file
file_path = "Top5_MCG_Contributions_Per_PeakPeriod.csv"
df = pd.read_csv(file_path)

# Filter for periods ending in January from 2022 to 2025
target_periods = ['202112 to 202201', '202212 to 202301', '202312 to 202401']
filtered_df = df[df['Period'].isin(target_periods)]

# Group by MCG and calculate the average MoM % Change
average_mom_change = filtered_df.groupby('MCG')['MoM % Change'].mean().reset_index()

# Display the result
average_mom_change

In [None]:
# For calculating the average month-on-month % increase for each MCG, especially for Least contributors during January peak periods.
# --------------------- Online DOWN Nov to Dec 2021 to 2024

import pandas as pd

# Load the CSV file
file_path = "Least5_MCGs_MoM_Change_Per_Peak_Period_Online.csv"
df = pd.read_csv(file_path)

# Filter for periods ending in January from 2022 to 2025
target_periods = ['202111 to 202112', '202211 to 202212', '202311 to 202312', '202411 to 202412']
filtered_df = df[df['Period'].isin(target_periods)]

# Group by MCG and calculate the average MoM % Change
average_mom_change = filtered_df.groupby('MCG')['MoM_%_Change'].mean().reset_index()

# Display the result
average_mom_change

In [None]:
# Monthly Online Spending Ratio ---------- Final

import pandas as pd
import matplotlib.pyplot as plt

# Read the CSV file
df = pd.read_csv("UK_Online_Spending_Ratio_Mth.csv")

# Ensure 'time_period_value' is treated as string
df['time_period_value'] = df['time_period_value'].astype(str)

# Filter data starting from 201901
df = df[df['time_period_value'] >= '201901']

# Create shorter labels for the X-axis (e.g., '201901' -> 'Jan 19')
month_map = {
    '01': 'Jan', '02': 'Feb', '03': 'Mar', '04': 'Apr', '05': 'May', '06': 'Jun',
    '07': 'Jul', '08': 'Aug', '09': 'Sep', '10': 'Oct', '11': 'Nov', '12': 'Dec'
}
df['short_label'] = df['time_period_value'].apply(lambda x: f"{month_map.get(x[4:], '')} {x[:4][-2:]}")

# Plot the line chart
plt.figure(figsize=(12, 6))
plt.plot(df['short_label'], df['online_spending_ratio'], marker='o', linestyle='-', color='green')
plt.title("UK Online Spending Ratio Over Time (Monthly 2019 Jan - 2025 June)")
plt.xlabel("Time Period")
plt.ylabel("Online Spending Ratio (%)")
plt.xticks(rotation=90)
plt.grid(True)
plt.tight_layout()

# Save the chart
plt.savefig("UK_Online_Spending_Ratio_LineChart_Mth.png")
plt.show()



In [None]:
# Figure 2

import pandas as pd
import matplotlib.pyplot as plt

# Load the CSV file
df = pd.read_csv("UK_Online_Spending_Ratio_Mth.csv")

# Ensure 'time_period_value' is treated as string
df['time_period_value'] = df['time_period_value'].astype(str)

# Filter data starting from 201901
df = df[df['time_period_value'] >= '201901']

# Create shorter labels for the X-axis (e.g., '201901' -> 'Jan 19')
month_map = {
    '01': 'Jan', '02': 'Feb', '03': 'Mar', '04': 'Apr', '05': 'May', '06': 'Jun',
    '07': 'Jul', '08': 'Aug', '09': 'Sep', '10': 'Oct', '11': 'Nov', '12': 'Dec'
}
df['short_label'] = df['time_period_value'].apply(lambda x: f"{month_map.get(x[4:], '')} {x[:4][-2:]}")

# Calculate Face-to-Face spending ratio
df['f2f_spending_ratio'] = 100 - df['online_spending_ratio']

# Prepare data for stacked area chart
stacked_data = df[['online_spending_ratio', 'f2f_spending_ratio']].T

# Plot the stacked area chart
plt.figure(figsize=(14, 7))
plt.stackplot(df['short_label'], stacked_data, labels=['Online', 'Face-to-Face'], colors=['green', 'lightgray'])

# Add vertical grey dotted lines for each month
for label in df['short_label']:
    plt.axvline(x=label, color='gray', linestyle=':', linewidth=0.5)

# Add horizontal lines for yearly indications
years = sorted(set([label[-2:] for label in df['short_label']]))
for year in years:
    year_labels = [label for label in df['short_label'] if label.endswith(year)]
    if year_labels:
        plt.axvline(x=year_labels[0], color='black', linestyle='--', linewidth=1)
        
# Add horizontal lines to indicate yearly boundaries
years = sorted(set(x[:4] for x in df['time_period_value']))
for year in years:
    first_month_index = df[df['time_period_value'].str.startswith(year)].index[0]
    plt.axvline(x=first_month_index, color='blue', linestyle=':', linewidth=1)
    plt.text(first_month_index, 102, year, color='blue', rotation=90, va='top')
    
# Add horizontal lines for ratio thresholds
for ratio in [40, 50, 60]:
    plt.axhline(y=ratio, color='red', linestyle='--', linewidth=1)
    plt.text(df['short_label'].iloc[-1], ratio + 1, f'{ratio}%', color='red', va='bottom')

# Final chart formatting
plt.title("100% Stacked Area Chart: Online vs Face-to-Face Spending in UK (Monthly)")
plt.xlabel("Time Period")
plt.ylabel("Spending Ratio (%)")
plt.xticks(rotation=90)
plt.legend(title="Channels", bbox_to_anchor=(1, 1), loc='upper left')
plt.grid(False)
plt.tight_layout()

# Save the chart
plt.savefig("UK_Online_vs_F2F_Spending_StackedAreaChart.png")
plt.show()


In [None]:
# For calculating the average month-on-month % increase for each MCG, especially for Airlines and Travel Services during January peak periods.

import pandas as pd

# Load the CSV file
file_path = "Top5_MCG_Contributions_Per_PeakPeriod.csv"
df = pd.read_csv(file_path)

# Filter for periods ending in January from 2022 to 2025
target_periods = ['202112 to 202201', '202212 to 202301', '202312 to 202401']
filtered_df = df[df['Period'].isin(target_periods)]

# Group by MCG and calculate the average MoM % Change
average_mom_change = filtered_df.groupby('MCG')['MoM % Change'].mean().reset_index()

# Display the result
average_mom_change


In [None]:
# To calculate the average September increase for the ‘Education & Government’ MCG from September 2021 to September 2024

# Extract the relevant rows for each Aug and Sep.
# Calculate the MoM % change for 'EDUCATION & GOVERNMENT'.
# Compute the average across the four years.
# Confirm whether the average increase is around 48.7% or provide the exact figure.

import pandas as pd

# Load the CSV file
df = pd.read_csv("Adjusted_Online_MCGs_Mth.csv")

# Ensure time_period_value is treated as string
df['time_period_value'] = df['time_period_value'].astype(str)

# Define the August-to-September periods for 2021 to 2024
peak_periods = [
    ('202108', '202109'),
    ('202208', '202209'),
    ('202308', '202309'),
    ('202408', '202409')
]

# Store the month-on-month changes for EDUCATION & GOVERNMENT
changes = []

for aug, sep in peak_periods:
    df_aug = df[(df['time_period_value'] == aug) & (df['mcg'] == 'EDUCATION & GOVERNMENT')]
    df_sep = df[(df['time_period_value'] == sep) & (df['mcg'] == 'EDUCATION & GOVERNMENT')]
    
    if not df_aug.empty and not df_sep.empty:
        aug_spend = df_aug['adjusted_Online_spend'].values[0]
        sep_spend = df_sep['adjusted_Online_spend'].values[0]
        mom_change = ((sep_spend - aug_spend) / aug_spend) * 100
        changes.append(mom_change)

# Calculate the average change
average_change = sum(changes) / len(changes) if changes else None

# Display the result
print(f"Average MoM % Increase for 'EDUCATION & GOVERNMENT' from August to September (2021–2024): {average_change:.2f}%")


In [None]:
# Figure 4 - Quarterly Comparison for UK Cardholder Domestic vs Abroad Online Spending Ratios

import pandas as pd
import matplotlib.pyplot as plt

# Load the CSV files
intl_ratio_file = "Q_Online_Spending_Ratio_Intl.csv"
dom_ratio_file = "Q_Online_Spending_Ratio_Dom_Ad_mcg.csv"

# Read the data
intl_df = pd.read_csv(intl_ratio_file)
dom_df = pd.read_csv(dom_ratio_file)

# Ensure the time_period_value column is sorted and consistent
intl_df = intl_df.sort_values("time_period_value")
dom_df = dom_df.sort_values("time_period_value")

# Merge the two datasets on time_period_value
merged_df = pd.merge(intl_df, dom_df, on="time_period_value", how="inner")

# Plot the indexed trends
plt.figure(figsize=(12, 6))
plt.plot(merged_df["time_period_value"], merged_df.iloc[:, 1], label="International Online Spending Ratio", marker='o')
plt.plot(merged_df["time_period_value"], merged_df.iloc[:, 2], label="Domestic Online Spending Ratio", marker='o')
plt.xticks(rotation=45)
plt.xlabel("Quarter")
plt.ylabel("Online Spending Ratio (%)")
plt.title("Quarterly Online Spending Ratio: International vs Domestic")
plt.legend()
plt.tight_layout()
plt.grid(True)
plt.savefig("Online_Spending_Ratio_Comparison.png")
plt.show()

In [None]:
# Quarterly Comparison for UK Cardholder Domestic vs Abroad Online Spending Ratios - Adjusted Values --- mcg = 'All' - USED THIS ONE

import pandas as pd
import matplotlib.pyplot as plt

# Load the CSV files
intl_ratio_file = "Adjusted_Online_Intl_Ratio_mcg.csv"
dom_ratio_file = "Adjusted_Online_Dom_Ratio_mcg.csv"

# Read the data
intl_df = pd.read_csv(intl_ratio_file)
dom_df = pd.read_csv(dom_ratio_file)

# Ensure the time_period_value column is sorted and consistent
intl_df = intl_df.sort_values("time_period_value")
dom_df = dom_df.sort_values("time_period_value")

# Merge the two datasets on time_period_value
merged_df = pd.merge(intl_df, dom_df, on="time_period_value", how="inner")

# Plot the indexed trends
plt.figure(figsize=(12, 6))
plt.plot(merged_df["time_period_value"], merged_df.iloc[:, 1], label="Adjusted International Online Spending Ratio", marker='o')
plt.plot(merged_df["time_period_value"], merged_df.iloc[:, 2], label="Adjusted Domestic Online Spending Ratio", marker='o')
plt.xticks(rotation=45)
plt.xlabel("Quarter")
plt.ylabel("Online Spending Ratio (%)")
plt.title("Quarterly Online Spending Ratio: International vs Domestic (Adjusted Value)")
plt.legend()
plt.tight_layout()
plt.grid(True)
plt.savefig("Online_Spending_Ratio_Comparison.png")
plt.show()

In [None]:
# Calculate the averages for the files Adjusted_Online_Intl_Ratio_mcg.csv and Adjusted_Online_Dom_Ratio_mcg.csv

import pandas as pd

# Load the two CSV files
intl_df = pd.read_csv("Adjusted_Online_Intl_Ratio_mcg.csv")
dom_df = pd.read_csv("Adjusted_Online_Dom_Ratio_mcg.csv")

# Filter the data from 2019Q1 to 2025Q2
quarters = [
    f"{year}Q{q}" for year in range(2019, 2026) for q in range(1, 5)
]
quarters = quarters[:(4 * (2025 - 2019) + 2)]  # Up to 2025Q2

intl_filtered = intl_df[intl_df['time_period_value'].isin(quarters)]
dom_filtered = dom_df[dom_df['time_period_value'].isin(quarters)]

# Calculate the average adjusted online ratio for each file
intl_avg = intl_filtered['Adjusted_online_Intl_ratio'].mean()
dom_avg = dom_filtered['Adjusted_online_Dom_ratio'].mean()

print(f"Average Adjusted Online International Ratio (2019Q1 to 2025Q2): {intl_avg:.2f}")
print(f"Average Adjusted Online Domestic Ratio (2019Q1 to 2025Q2): {dom_avg:.2f}")

print(intl_filtered)

In [None]:
# To calculate the average difference in online spending ratios between international and domestic markets for:

# 2019Q1 to 2021Q4 (average gap: )
# 2022Q1 to 2025Q2 (average gap: )

import pandas as pd

# Load the CSV files
intl_df = pd.read_csv("Adjusted_Online_Intl_Ratio_mcg.csv")
dom_df = pd.read_csv("Adjusted_Online_Dom_Ratio_mcg.csv")

# Ensure time_period_value is treated as string
intl_df['time_period_value'] = intl_df['time_period_value'].astype(str)
dom_df['time_period_value'] = dom_df['time_period_value'].astype(str)

# Merge the two datasets on time_period_value
merged_df = pd.merge(intl_df, dom_df, on='time_period_value', suffixes=('_intl', '_dom'))

# Filter for the two periods
period1 = merged_df[merged_df['time_period_value'].isin([
    '2019Q1', '2019Q2', '2019Q3', '2019Q4',
    '2020Q1', '2020Q2', '2020Q3', '2020Q4',
    '2021Q1', '2021Q2', '2021Q3', '2021Q4'
])]

period2 = merged_df[merged_df['time_period_value'].isin([
    '2022Q1', '2022Q2', '2022Q3', '2022Q4',
    '2023Q1', '2023Q2', '2023Q3', '2023Q4',
    '2024Q1', '2024Q2', '2024Q3', '2024Q4',
    '2025Q1', '2025Q2'
])]

# Calculate average difference in ratios for each period
avg_diff_period1 = (period1['Adjusted_online_Intl_ratio'] - period1['Adjusted_online_Dom_ratio']).mean()
avg_diff_period2 = (period2['Adjusted_online_Intl_ratio'] - period2['Adjusted_online_Dom_ratio']).mean()

print(f"Average difference in ratios from 2019Q1 to 2021Q4: {avg_diff_period1:.2f}%")
print(f"Average difference in ratios from 2022Q1 to 2025Q2: {avg_diff_period2:.2f}%")


In [None]:
# To calculate the seasonal trend in international online spending since 2022—specifically:

# Overall average ratio from 2022Q1 to 2025Q2
# Q3 average ratio (July to September: 2022Q3, 2023Q3, 2024Q3)

import pandas as pd

# Load the CSV file
df = pd.read_csv("Adjusted_Online_Intl_Ratio_mcg.csv")

# Ensure time_period_value is treated as string
df['time_period_value'] = df['time_period_value'].astype(str)

# Filter data from 2022Q1 to 2025Q2
df_recent = df[df['time_period_value'].between('2022Q1', '2025Q2')]

# Calculate the average online ratio for the entire period
overall_avg = df_recent['Adjusted_online_Intl_ratio'].mean()

# Define Q3 quarters
q3_quarters = ['2022Q3', '2023Q3', '2024Q3']

# Filter Q3 data
df_q3 = df_recent[df_recent['time_period_value'].isin(q3_quarters)]

# Calculate the average online ratio for Q3
q3_avg = df_q3['Adjusted_online_Intl_ratio'].mean()

print(f"Average online ratio from 2022Q1 to 2025Q2: {overall_avg:.1f}%")
print(f"Average online ratio in Q3 quarters (2022Q3, 2023Q3, 2024Q3): {q3_avg:.1f}%")

In [None]:
# To calculate using adjusted online spending abroad, averaging the MoM change from Q2->Q3 from 2022-2024 --------------- NEED TO CHECK

import pandas as pd

# Load the CSV file
file_path = "Adjusted_Intl_Spend_Online_mcg.csv"
df = pd.read_csv(file_path)

# Extract year and quarter from 'time_period_value'
df[['year', 'quarter']] = df['time_period_value'].str.extract(r'(\d{4})Q(\d)').astype(int)

# Filter for Q2 and Q3 from 2022 to 2024
filtered_df = df[df['year'].isin([2022, 2023, 2024]) & df['quarter'].isin([2, 3])]

# Group by year and calculate MoM change from Q2 to Q3
q2_q3_changes = []
for year in [2022, 2023, 2024]:
    q2 = filtered_df[(filtered_df['year'] == year) & (filtered_df['quarter'] == 2)]['Adjust_Quarterly_Intl_Spend_Online']
    q3 = filtered_df[(filtered_df['year'] == year) & (filtered_df['quarter'] == 3)]['Adjust_Quarterly_Intl_Spend_Online']
    if not q2.empty and not q3.empty:
        change = ((q3.values[0] - q2.values[0]) / q2.values[0]) * 100
        q2_q3_changes.append(change)

# Calculate average MoM change from Q2 to Q3 across 2022–2024
average_change = sum(q2_q3_changes) / 3

print(f"Average MoM change in adjusted international online spending from Q2 to Q3 (2022–2024): {average_change:.2f}%")


In [None]:
# To calculate using adjusted total spending abroad, averaging the MoM change from Q2->Q3 from 2022-2024 --------------- NEED TO CHECK

import pandas as pd

# Load the CSV file
file_path = "Adjusted_Intl_Spend.csv"
df = pd.read_csv(file_path)

# Extract year and quarter from 'time_period_value'
df[['year', 'quarter']] = df['time_period_value'].str.extract(r'(\d{4})Q(\d)').astype(int)

# Filter for Q2 and Q3 from 2022 to 2024
filtered_df = df[df['year'].isin([2022, 2023, 2024]) & df['quarter'].isin([2, 3])]

# Group by year and calculate MoM change from Q2 to Q3
q2_q3_changes = []
for year in [2022, 2023, 2024]:
    q2 = filtered_df[(filtered_df['year'] == year) & (filtered_df['quarter'] == 2)]['Adjust_Quarterly_Intl_Spend']
    q3 = filtered_df[(filtered_df['year'] == year) & (filtered_df['quarter'] == 3)]['Adjust_Quarterly_Intl_Spend']
    if not q2.empty and not q3.empty:
        change = ((q3.values[0] - q2.values[0]) / q2.values[0]) * 100
        q2_q3_changes.append(change)

# Calculate average MoM change from Q2 to Q3 across 2022–2024
average_change = sum(q2_q3_changes) / 3

print(f"Average MoM change in adjusted international total spending from Q2 to Q3 (2022–2024): {average_change:.2f}%")

In [None]:
# To calculate the average change in international online spending from Q2 to Q3 for the years 2022, 2023, and 2024 ----------- NEED TO CHECK
# To calculate using adjusted online spending abroad, averaging the MoM change from Q2->Q3 from 2022-2024  ----------------- WRONG ONE

import pandas as pd

# Load the CSV file
df = pd.read_csv("Adjusted_Online_Intl_Ratio_mcg.csv")

# Ensure time_period_value is treated as string
df['time_period_value'] = df['time_period_value'].astype(str)

# Define Q3 and Q4 periods for 2022, 2023, and 2024
q2_periods = ['2022Q2', '2023Q2', '2024Q2']
q3_periods = ['2022Q3', '2023Q3', '2024Q3']

# Filter the data for Q3 and Q4 periods
df_q2 = df[df['time_period_value'].isin(q2_periods)].copy()
df_q3 = df[df['time_period_value'].isin(q3_periods)].copy()

# Merge Q3 and Q4 data on year extracted from time_period_value
df_q2['year'] = df_q2['time_period_value'].str[:4]
df_q3['year'] = df_q3['time_period_value'].str[:4]
merged = pd.merge(df_q2, df_q3, on='year', suffixes=('_q2', '_q3'))

# Calculate the percentage change from Q3 to Q4
merged['qoq_change'] = ((merged['Adjusted_online_Intl_ratio_q3'] - merged['Adjusted_online_Intl_ratio_q2']) /
                        merged['Adjusted_online_Intl_ratio_q2']) * 100

# Calculate the average change
average_change = merged['qoq_change'].mean()

# Display the result
print(f"Average quarter-on-quarter percentage change in international online spending ratio from Q2 to Q3 (2022–2024): {average_change:.2f}%")


In [None]:
# To calculate the total spend abroad increased % on average from 2022Q1 to 2025Q2 ------------ NEED TO CHECK ----------------- WRONG ONE

import pandas as pd

# Load the CSV file
df = pd.read_csv("Q_spending_by_Intl_All.csv")

# Ensure time_period_value is treated as string
df['time_period_value'] = df['time_period_value'].astype(str)

# Filter data from 2022Q1 to 2025Q2
quarters = ['2022Q1', '2022Q2', '2022Q3', '2022Q4',
            '2023Q1', '2023Q2', '2023Q3', '2023Q4',
            '2024Q1', '2024Q2', '2024Q3', '2024Q4',
            '2025Q1', '2025Q2']

df_filtered = df[df['time_period_value'].isin(quarters)].copy()

# Sort by time_period_value to ensure chronological order
df_filtered = df_filtered.sort_values(by='time_period_value')

# Calculate quarter-on-quarter percentage change
df_filtered['QoQ_%_Change'] = df_filtered['abroad_spend_all'].pct_change() * 100

# Calculate the average QoQ change excluding the first NaN
average_qoq_change = df_filtered['QoQ_%_Change'].dropna().mean()

print(f"Average quarter-on-quarter percentage change in abroad spending from 2022Q1 to 2025Q2: {average_qoq_change:.2f}%")


In [None]:
# To calculate the percentage change in yearly average ratios from 2019 to 2024 for both domestic and international online spending

import pandas as pd

# Load the CSV files
dom_df = pd.read_csv("Adjusted_Online_Dom_Ratio_mcg.csv")
intl_df = pd.read_csv("Adjusted_Online_Intl_Ratio_mcg.csv")

# Convert time_period_value to datetime to extract year
dom_df['year'] = pd.to_datetime(dom_df['time_period_value'].str.replace('Q\d', '', regex=True) + '-01')
intl_df['year'] = pd.to_datetime(intl_df['time_period_value'].str.replace('Q\d', '', regex=True) + '-01')

# Extract year as integer
dom_df['year'] = dom_df['year'].dt.year
intl_df['year'] = intl_df['year'].dt.year

# Group by year and calculate average ratio
dom_avg = dom_df.groupby('year')['Adjusted_online_Dom_ratio'].mean()
intl_avg = intl_df.groupby('year')['Adjusted_online_Intl_ratio'].mean()

# Calculate percentage change from 2019 to 2024
dom_change = (dom_avg[2024] - dom_avg[2019])
intl_change = (intl_avg[2024] - intl_avg[2019]) 

(dom_change, intl_change)


In [None]:
# To calculate the percentage increase from 2019Q1 to 2025Q1

import pandas as pd

# Load the CSV file
df = pd.read_csv("Adjusted_Online_Dom_Ratio_mcg.csv")

# Extract the values for 2019Q1 and 2025Q1
value_2019Q1 = df.loc[df['time_period_value'] == '2019Q1', 'Adjusted_online_Dom_ratio'].values[0]
value_2025Q1 = df.loc[df['time_period_value'] == '2025Q1', 'Adjusted_online_Dom_ratio'].values[0]

# Calculate the percentage increase
percentage_increase = (value_2025Q1 - value_2019Q1)

# Print the result
print(f"The percentage increase in Adjusted_online_Dom_ratio from 2019Q1 to 2025Q1 is {percentage_increase:.2f}%.")