In [None]:
project_path = "/home/jupyter"
import os
import sys

sys.path.append(project_path)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
from google.cloud import bigquery

from fintrans_toolbox.src import bq_utils as bq
from fintrans_toolbox.src import table_utils as t


client = bigquery.Client()


In [None]:
#Total MCG F2F Monthly

Total_MCGs_F2F_Mth = '''
WITH mcg_quarterly_spend AS (
  SELECT 
   time_period_value,
    SUM(spend) AS total_spend,
    mcg
  FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
  WHERE time_period = 'Month'
  AND mcg != 'All'
  AND mcc = 'All'
  AND cardholder_origin_country = 'All' 
  AND cardholder_origin = 'UNITED KINGDOM' 
  AND merchant_channel = 'Face to Face'
  GROUP BY time_period_value, mcg
)

SELECT *
FROM mcg_quarterly_spend
ORDER BY time_period_value, mcg;
'''

# Run the query and load into a DataFrame
df_Total_MCGs_F2F_Mth = client.query(Total_MCGs_F2F_Mth).to_dataframe()

# Save to CSV
df_Total_MCGs_F2F_Mth.to_csv('Total_MCGs_F2F_Mth.csv', index=False)

print(df_Total_MCGs_F2F_Mth)

In [None]:
#Total MCG Online Monthly

Total_MCGs_Online_Mth = '''
WITH mcg_quarterly_spend AS (
  SELECT 
    time_period_value,
    SUM(spend) AS total_spend,
    mcg
  FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
  WHERE time_period = 'Month'
  AND mcg != 'All'
  AND mcc = 'All'
  AND cardholder_origin_country = 'All' 
  AND cardholder_origin = 'UNITED KINGDOM' 
  AND merchant_channel = 'Online'
  GROUP BY time_period_value, mcg
)

SELECT *
FROM mcg_quarterly_spend
ORDER BY time_period_value, mcg;
'''
# Run the query and load into a DataFrame
df_Total_MCGs_Online_Mth = client.query(Total_MCGs_Online_Mth).to_dataframe()

# Save to CSV
df_Total_MCGs_Online_Mth.to_csv('Total_MCGs_Online_Mth.csv', index=False)

print(df_Total_MCGs_Online_Mth)

In [None]:
# Summarise the data by UK Cardholder Spending All Monthly --------------- Cardholders' Number Total Monthly ---- mcg = 'All'

UK_spending_by_mcg_All = '''SELECT time_period_value, cardholders, mcg, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Month' 
and mcg = 'All' 
and merchant_channel = 'All' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
 
GROUP BY cardholders, mcg, 
time_period_value, spend 
ORDER BY time_period_value, mcg'''
df_by_mcg_All = bq.read_bq_table_sql(client, UK_spending_by_mcg_All)
df_by_mcg_All.head()

# Caculate UK Domestic Total Spending Quarterly

# Assuming df_by_mcg_All is the DataFrame returned from the BigQuery query
# Then group by 'time_period_value' and sum the 'spend' for each quarter

# Check if df_by_mcg_All is not None and has the expected columns
if df_by_mcg_All is not None and 'time_period_value' in df_by_mcg_All.columns and 'spend' in df_by_mcg_All.columns:
    # Group by quarter and sum the spend
    UK_spending_by_mcg_All = df_by_mcg_All.groupby('time_period_value')['cardholders'].sum().reset_index()
   
 # Rename the column
    UK_spending_by_mcg_All = UK_spending_by_mcg_All.rename(columns={'cardholders': 'total_cardholders'})
    print(UK_spending_by_mcg_All)
else:
    print("DataFrame is empty or missing required columns.")

    # Save the result to a CSV file
csv_filename = "Total_MCGs_Cardholder_Mth.csv"
UK_spending_by_mcg_All.to_csv(csv_filename, index=False)

print(f"CSV file '{csv_filename}' has been created successfully.")

In [None]:
# Adjusted values for Online MCGs based on 201901 cardholders --------- Monthly Modifies

import pandas as pd

# Load the data from the CSV files
cardholders_df = pd.read_csv("Total_MCGs_Cardholder_Mth.csv")
online_df = pd.read_csv("Total_MCGs_Online_Mth.csv")

# Check if '201901' exists in cardholders_df
if '201901' not in cardholders_df['time_period_value'].astype(str).values:
    raise ValueError("The time_period_value '201901' is not found in Total_MCGs_Cardholder_Mth.csv")

# Merge the dataframes on 'time_period_value'
merged_df = pd.merge(online_df, cardholders_df, on='time_period_value', how='inner')

# Get the base cardholders value from 201901
base_cardholders = cardholders_df.loc[cardholders_df['time_period_value'].astype(str) == '201901', 'total_cardholders'].values[0]

# Calculate the adjusted online spend
merged_df['adjusted_Online_spend'] = (base_cardholders / merged_df['total_cardholders']) * merged_df['total_spend']

# Save the result to a new CSV file
merged_df[["time_period_value", "mcg", "adjusted_Online_spend"]].to_csv("Adjusted_Online_MCGs_Mth.csv", index=False)

# Display the first few rows of the updated dataframe
print("The adjusted online spend data has been saved to 'Adjusted_Online_MCGs_Mth.csv'.")
merged_df.head(40)

In [None]:
# Nominal Change in adjusted_Online_spend per MCG across months
# Formula: Nominal Change=Adjusted Spend current month − Adjusted Spend previous month

# Percent Change relative to the previous month
# Formula: Percent Change = ( Nominal Change / Adjusted Spend previous month ) × 100

import pandas as pd

# Load the CSV file
df = pd.read_csv("Adjusted_Online_MCGs_Mth.csv")

# Sort the data by 'mcg' and 'time_period_value' to ensure correct order for change calculation
df_sorted = df.sort_values(by=['mcg', 'time_period_value'])

# Group by 'mcg' and calculate nominal and percent change in 'adjusted_Online_spend'
df_sorted['nominal_change'] = df_sorted.groupby('mcg')['adjusted_Online_spend'].diff()
df_sorted['percent_change'] = df_sorted.groupby('mcg')['adjusted_Online_spend'].pct_change() * 100

# Select and reorder the desired columns
result_df = df_sorted[['time_period_value', 'mcg', 'adjusted_Online_spend', 'nominal_change', 'percent_change']]

# Save the result to a new CSV file
output_file = 'Adjusted_Online_MCGs_Mth_with_Changes.csv'
df_sorted.to_csv(output_file, index=False)

# Display the result
print(result_df.head())



In [None]:
# Adjusted values for Face to Face MCGs based on 2019Q1 cardholders --------- Monthly Modifies

import pandas as pd

# Load the data from the CSV files
cardholders_df = pd.read_csv("Total_MCGs_Cardholder_Mth.csv")
f2f_df = pd.read_csv("Total_MCGs_F2F_Mth.csv")

# Ensure time_period_value is treated as string for consistency
cardholders_df['time_period_value'] = cardholders_df['time_period_value'].astype(str)
f2f_df['time_period_value'] = f2f_df['time_period_value'].astype(str)

# Check if '201901' exists in cardholders_df
if '201901' not in cardholders_df['time_period_value'].values:
    raise ValueError("The time_period_value '201901' is not found in Total_MCGs_Cardholder_Mth.csv")

# Get the base cardholders value from 201901
base_cardholders = cardholders_df.loc[cardholders_df['time_period_value'] == '201901', 'total_cardholders'].values[0]

# Merge the dataframes on 'time_period_value'
merged_df = pd.merge(f2f_df, cardholders_df, on='time_period_value', how='inner')

# Calculate the adjusted F2F spend
merged_df['adjusted_F2F_spend'] = (base_cardholders / merged_df['total_cardholders']) * merged_df['total_spend']

# Save the result to a new CSV file
merged_df[["time_period_value", "mcg", "adjusted_F2F_spend"]].to_csv("Adjusted_F2F_MCGs_Mth.csv", index=False)

# Display the first 40 rows of the updated dataframe
merged_df.head(40)



In [None]:
# Nominal Change in adjusted_F2F_spend per MCG across months
# Formula: Nominal Change = Adjusted Spend current month − Adjusted Spend previous month

# Percent Change relative to the previous month
# Formula: Percent Change = ( Nominal Change / Adjusted Spend previous month ) × 100

import pandas as pd

# Load the CSV file
df = pd.read_csv("Adjusted_F2F_MCGs_Mth.csv")

# Sort the data by 'mcg' and 'time_period_value' to ensure correct order for change calculation
df_sorted = df.sort_values(by=['mcg', 'time_period_value'])

# Group by 'mcg' and calculate nominal and percent change in 'adjusted_Online_spend'
df_sorted['nominal_change'] = df_sorted.groupby('mcg')['adjusted_F2F_spend'].diff()
df_sorted['percent_change'] = df_sorted.groupby('mcg')['adjusted_F2F_spend'].pct_change() * 100


# Select and reorder the desired columns
result_df = df_sorted[['time_period_value', 'mcg', 'adjusted_F2F_spend', 'nominal_change', 'percent_change']]


# Save the result to a new CSV file
output_file = 'Adjusted_F2F_MCGs_Mth_with_Changes.csv'
df_sorted.to_csv(output_file, index=False)


# Display the result
print(result_df.head())


In [None]:
# Compare Monthly, Quarterly , Yearly MCG changes 

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load the CSV file
df = pd.read_csv("Adjusted_Online_MCGs_Mth.csv")

# Convert time_period_value entries like 201901 into a readable format like "Jan 19"
def format_period(period):
    period_str = str(period)
    year = period_str[:4]
    month = period_str[4:]
    month_map = {
        '01': 'Jan', '02': 'Feb', '03': 'Mar', '04': 'Apr', '05': 'May', '06': 'Jun',
        '07': 'Jul', '08': 'Aug', '09': 'Sep', '10': 'Oct', '11': 'Nov', '12': 'Dec'
    }
    return f"{month_map.get(month, month)} {year[2:]}"

df['Formatted Period'] = df['time_period_value'].apply(format_period)

# Convert Formatted Period to datetime for quarter and year extraction
df['Formatted Period Date'] = pd.to_datetime(df['time_period_value'].astype(str), format='%Y%m')

# Sort the data by MCG and date
df.sort_values(by=['mcg', 'Formatted Period Date'], inplace=True)

# Calculate monthly percent change in adjusted_Online_spend for each MCG
df['monthly_pct_change'] = df.groupby('mcg')['adjusted_Online_spend'].pct_change() * 100

# Calculate total monthly change in adjusted_Online_spend
monthly_total = df.groupby('Formatted Period')['adjusted_Online_spend'].sum().pct_change() * 100
monthly_total.name = 'total_monthly_pct_change'

# Merge total monthly change back to the original dataframe
df = df.merge(monthly_total, on='Formatted Period')

# Calculate contribution of each MCG to total monthly change
df['contribution_to_total_change'] = (df['monthly_pct_change'] * df['adjusted_Online_spend']) / df.groupby('Formatted Period')['adjusted_Online_spend'].transform('sum')

# Identify top 3 MCGs contributing to change each month
top_monthly_drivers = df.groupby('Formatted Period').apply(lambda x: x.nlargest(3, 'contribution_to_total_change'))[['mcg', 'contribution_to_total_change']]

# Add quarter and year columns using the datetime version
df['quarter'] = df['Formatted Period Date'].dt.to_period('Q')
df['year'] = df['Formatted Period Date'].dt.year

# Identify top 3 MCGs contributing to change each quarter
quarterly_df = df.groupby(['quarter', 'mcg'])['adjusted_Online_spend'].sum().reset_index()
quarterly_df['quarterly_pct_change'] = quarterly_df.groupby('mcg')['adjusted_Online_spend'].pct_change() * 100
quarterly_total = quarterly_df.groupby('quarter')['adjusted_Online_spend'].sum().pct_change() * 100
quarterly_total.name = 'total_quarterly_pct_change'
quarterly_df = quarterly_df.merge(quarterly_total, on='quarter')
quarterly_df['contribution_to_total_change'] = (quarterly_df['quarterly_pct_change'] * quarterly_df['adjusted_Online_spend']) / quarterly_df.groupby('quarter')['adjusted_Online_spend'].transform('sum')
top_quarterly_drivers = quarterly_df.groupby('quarter').apply(lambda x: x.nlargest(3, 'contribution_to_total_change'))[['mcg', 'contribution_to_total_change']]

# Identify top 3 MCGs contributing to change each year
yearly_df = df.groupby(['year', 'mcg'])['adjusted_Online_spend'].sum().reset_index()
yearly_df['yearly_pct_change'] = yearly_df.groupby('mcg')['adjusted_Online_spend'].pct_change() * 100
yearly_total = yearly_df.groupby('year')['adjusted_Online_spend'].sum().pct_change() * 100
yearly_total.name = 'total_yearly_pct_change'
yearly_df = yearly_df.merge(yearly_total, on='year')
yearly_df['contribution_to_total_change'] = (yearly_df['yearly_pct_change'] * yearly_df['adjusted_Online_spend']) / yearly_df.groupby('year')['adjusted_Online_spend'].transform('sum')
top_yearly_drivers = yearly_df.groupby('year').apply(lambda x: x.nlargest(3, 'contribution_to_total_change'))[['mcg', 'contribution_to_total_change']]

# Save the results to CSV files
df.to_csv("monthly_mcg_changes.csv", index=False)
top_monthly_drivers.to_csv("top_monthly_drivers.csv")
top_quarterly_drivers.to_csv("top_quarterly_drivers.csv")
top_yearly_drivers.to_csv("top_yearly_drivers.csv")

# Plot top MCGs contribution over time (monthly)
plt.figure(figsize=(12, 6))
for mcg in df['mcg'].unique():
    mcg_data = df[df['mcg'] == mcg]
    plt.plot(mcg_data['Formatted Period'], mcg_data['contribution_to_total_change'], label=mcg)

plt.title("MCG Contribution to Total Monthly Change")
plt.xticks(rotation=90)
plt.xlabel("Formatted Period")
plt.ylabel("Contribution to Total Change (%)")
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.savefig("mcg_contribution_plot.png")
plt.show()




In [None]:
# ALL MCG Monthly Changes 2019 to 202

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Load the dataset
df = pd.read_csv("Adjusted_Online_MCGs_Mth.csv")

# Convert time_period_value entries like 201901 into a readable format like "Jan 19"
def format_period(period):
    period_str = str(period)
    year = period_str[:4]
    month = period_str[4:]
    month_map = {
        '01': 'Jan', '02': 'Feb', '03': 'Mar', '04': 'Apr', '05': 'May', '06': 'Jun',
        '07': 'Jul', '08': 'Aug', '09': 'Sep', '10': 'Oct', '11': 'Nov', '12': 'Dec'
    }
    return f"{month_map.get(month, month)} {year[2:]}"

df['Month Period'] = df['time_period_value'].apply(format_period)

# Convert Formatted Period to datetime for quarter and year extraction
df['Month'] = pd.to_datetime(df['time_period_value'].astype(str), format='%Y%m')

# Sort the data by MCG and date
df.sort_values(by=['mcg', 'Month'], inplace=True)

# Calculate monthly percent change per MCG
df['monthly_pct_change'] = df.groupby('mcg')['adjusted_Online_spend'].pct_change() * 100

# Calculate contribution to total monthly change
monthly_total_spend = df.groupby('Month Period')['adjusted_Online_spend'].transform('sum')
df['contribution_to_total_change'] = (df['monthly_pct_change'] * df['adjusted_Online_spend']) / monthly_total_spend

# Pivot the data for plotting
pivot_df = df.pivot(index='Month Period', columns='mcg', values='contribution_to_total_change')

# Plotting with log scale
plt.figure(figsize=(14, 8))
for column in pivot_df.columns:
    plt.plot(pivot_df.index, pivot_df[column], label=column)

plt.yscale('symlog')  # Use symmetric log scale to handle both positive and negative values
plt.xlabel('Month')
plt.ylabel('Contribution to Total Change (%)')
plt.title('MCG Contribution to Total Monthly Change (Log Scale)')
plt.xticks(rotation=90)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), fontsize='small')
plt.tight_layout()
plt.grid(True)

# Save the plot
plt.savefig("mcg_contribution_logscale.png")
print("Chart saved as mcg_contribution_logscale.png")



In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Load the dataset
df = pd.read_csv("Adjusted_Online_MCGs_Mth.csv")

# Ensure the date column is in datetime format
df['time_period_value'] = pd.to_datetime(df['time_period_value'], format='%Y%m')

# Sort the data by MCG and date
df.sort_values(by=['mcg', 'time_period_value'], inplace=True)

# Calculate monthly percent change in adjusted online spend per MCG
df['monthly_pct_change'] = df.groupby('mcg')['adjusted_Online_spend'].pct_change()

# Calculate contribution to total monthly change
monthly_total_spend = df.groupby('time_period_value')['adjusted_Online_spend'].transform('sum')
df['contribution_to_total_change'] = (df['monthly_pct_change'] * df['adjusted_Online_spend']) / monthly_total_spend

# Identify top 5 MCGs with highest absolute contribution across all months
top_mcg = df.groupby('mcg')['contribution_to_total_change'].apply(lambda x: x.abs().sum()).nlargest(5).index

# Filter the dataframe to include only top 5 MCGs
df_top = df[df['mcg'].isin(top_mcg)]

# Plotting
plt.figure(figsize=(12, 6))
for mcg in top_mcg:
    mcg_data = df_top[df_top['mcg'] == mcg]
    plt.plot(mcg_data['time_period_value'], mcg_data['contribution_to_total_change'], label=mcg)

plt.yscale('log')
plt.xlabel('Month')
plt.ylabel('Contribution to Total Monthly Change (log scale)')
plt.title('Top 5 MCGs Driving Monthly Change in Adjusted Online Spend')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.xticks(rotation=45)
plt.savefig("top_5_mcg_contribution_log_chart.png")
plt.show()



In [None]:
# MCG-level contributions to the monthly online spending ratio change for each Dec-to-Jan peak period from 2021 to 2025. It calculates:

# MoM % change for each MCG
# Weighted contribution based on each MCG’s share of total online spend
# A stacked bar chart to visualize which MCGs drove the change

import pandas as pd
import matplotlib.pyplot as plt

# Load the CSV file
df = pd.read_csv("Adjusted_Online_MCGs_Mth.csv")

# Ensure time_period_value is treated as string
df['time_period_value'] = df['time_period_value'].astype(str)

# Define the December-to-January peak periods
peak_pairs = [
    ('202112', '202201'),
    ('202212', '202301'),
    ('202312', '202401'),
    ('202412', '202501')
]

# Prepare a list to collect contribution data
contributions = []

# Analyze each peak period
for dec, jan in peak_pairs:
    # Filter data for December and January
    df_dec = df[df['time_period_value'] == dec]
    df_jan = df[df['time_period_value'] == jan]

    # Merge on MCG to align December and January values
    merged = pd.merge(df_dec, df_jan, on='mcg', suffixes=('_dec', '_jan'))

    # Calculate MoM % change - for each MCG individually - calculates the percentage change in online spend from December to January.
    merged['mom_change'] = ((merged['adjusted_Online_spend_jan'] - merged['adjusted_Online_spend_dec']) /
                            merged['adjusted_Online_spend_dec']) * 100

    # Calculate share of each MCG in January -  the total online spend across all MCGs for January of that specific Dec–Jan period.
    total_jan_spend = merged['adjusted_Online_spend_jan'].sum()
    merged['jan_share'] = merged['adjusted_Online_spend_jan'] / total_jan_spend

    # Weighted contribution - to scale the impact of each MCG’s MoM % change by its importance in the overall online spend.
    merged['weighted_contribution'] = merged['mom_change'] * merged['jan_share']

    # Add time period label
    merged['period'] = f"{dec}-{jan}"

    # Append to contributions list
    contributions.append(merged[['mcg', 'weighted_contribution', 'period']])

# Concatenate all contributions
contrib_df = pd.concat(contributions)

# Pivot for stacked bar chart
pivot_df = contrib_df.pivot(index='period', columns='mcg', values='weighted_contribution').fillna(0)

# Plot stacked bar chart
plt.figure(figsize=(14, 7))
pivot_df.plot(kind='bar', stacked=True, colormap='tab20', figsize=(14, 7))
plt.title("MCG Contributions to Online Spending Ratio Change (Dec–Jan Peaks)")
plt.xlabel("Period")
plt.ylabel("Weighted Contribution to MoM % Change")
plt.xticks(rotation=45)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.savefig("MCG_Online_Contribution_StackedBar.png")
plt.show()



In [None]:
# top 5 MCGs for each Dec-to-Jan peak period based on their Month-on-Month % change in online spending, and to generate a summary table.

import pandas as pd

# Load the data
df = pd.read_csv("Adjusted_Online_MCGs_Mth.csv")

# Ensure time_period_value is treated as string
df['time_period_value'] = df['time_period_value'].astype(str)

# Define the December-to-January peak periods
peak_periods = [
    ('202112', '202201'),
    ('202212', '202301'),
    ('202312', '202401'),
    ('202412', '202501')
]

# Prepare a list to collect top 5 MCGs per period
top_contributors = []

# Analyze each period
for dec, jan in peak_periods:
    # Filter data for December and January
    dec_data = df[df['time_period_value'] == dec].set_index('mcg')
    jan_data = df[df['time_period_value'] == jan].set_index('mcg')

    # Join the two months on MCG
    combined = jan_data[['adjusted_Online_spend']].join(
        dec_data[['adjusted_Online_spend']],
        lsuffix='_jan', rsuffix='_dec'
    )

    # Drop rows with missing data
    combined.dropna(inplace=True)

    # Calculate MoM % change
    combined['MoM_%_Change'] = ((combined['adjusted_Online_spend_jan'] - combined['adjusted_Online_spend_dec']) /
                                combined['adjusted_Online_spend_dec']) * 100

    # Sort and get top 5 contributors
    top5 = combined.sort_values(by='MoM_%_Change', ascending=False).head(5)
    for mcg, row in top5.iterrows():
        top_contributors.append({
            'Period': f"{dec} to {jan}",
            'MCG': mcg,
            'MoM_%_Change': round(row['MoM_%_Change'], 2)
        })

# Create a summary DataFrame
summary_df = pd.DataFrame(top_contributors)

# Save to CSV
summary_df.to_csv("Top5_MCGs_MoM_Change_Per_Peak_Period_Online.csv", index=False)

# Display the summary
summary_df



In [None]:
# Least 5 MCGs for each Nov-to-Dec peak period based on their Month-on-Month % change in Online spending, and to generate a summary table.

import pandas as pd

# Load the data
df = pd.read_csv("Adjusted_Online_MCGs_Mth.csv")

# Ensure time_period_value is treated as string
df['time_period_value'] = df['time_period_value'].astype(str)

# Define the December-to-January peak periods
peak_periods = [
    ('202111', '202112'),
    ('202211', '202212'),
    ('202311', '202312'),
    ('202411', '202412')
]

# Prepare a list to collect top 5 MCGs per period
least_contributors = []

# Analyze each period
for nov, dec in peak_periods:
    # Filter data for December and January
    nov_data = df[df['time_period_value'] == nov].set_index('mcg')
    dec_data = df[df['time_period_value'] == dec].set_index('mcg')

    # Join the two months on MCG
    combined = dec_data[['adjusted_Online_spend']].join(
        nov_data[['adjusted_Online_spend']],
        lsuffix='_dec', rsuffix='_nov'
    )

    # Drop rows with missing data
    combined.dropna(inplace=True)

    # Calculate MoM % change
    combined['MoM_%_Change'] = ((combined['adjusted_Online_spend_dec'] - combined['adjusted_Online_spend_nov']) /
                                combined['adjusted_Online_spend_nov']) * 100

    # Sort and get top 5 contributors
    least5 = combined.sort_values(by='MoM_%_Change', ascending=False).head(5)
    for mcg, row in least5.iterrows():
        least_contributors.append({
            'Period': f"{nov} to {dec}",
            'MCG': mcg,
            'MoM_%_Change': round(row['MoM_%_Change'], 2)
        })

# Create a summary DataFrame
summary_df = pd.DataFrame(least_contributors)

# Save to CSV
summary_df.to_csv("Least5_MCGs_MoM_Change_Per_Peak_Period_Online.csv", index=False)

# Display the summary
summary_df



In [None]:
# top 5 MCGs for each Nov-to-Dec peak period based on their Month-on-Month % change in Face-to-Face spending, and to generate a summary table.

import pandas as pd

# Load the data
df = pd.read_csv("Adjusted_F2F_MCGs_Mth.csv")

# Ensure time_period_value is treated as string
df['time_period_value'] = df['time_period_value'].astype(str)

# Define the December-to-January peak periods
peak_periods = [
    ('202111', '202112'),
    ('202211', '202212'),
    ('202311', '202312'),
    ('202411', '202412')
]

# Prepare a list to collect top 5 MCGs per period
top_contributors = []

# Analyze each period
for nov, dec in peak_periods:
    # Filter data for December and January
    nov_data = df[df['time_period_value'] == nov].set_index('mcg')
    dec_data = df[df['time_period_value'] == dec].set_index('mcg')

    # Join the two months on MCG
    combined = dec_data[['adjusted_F2F_spend']].join(
        nov_data[['adjusted_F2F_spend']],
        lsuffix='_dec', rsuffix='_nov'
    )

    # Drop rows with missing data
    combined.dropna(inplace=True)

    # Calculate MoM % change
    combined['MoM_%_Change'] = ((combined['adjusted_F2F_spend_dec'] - combined['adjusted_F2F_spend_nov']) /
                                combined['adjusted_F2F_spend_nov']) * 100

    # Sort and get top 5 contributors
    top5 = combined.sort_values(by='MoM_%_Change', ascending=False).head(5)
    for mcg, row in top5.iterrows():
        top_contributors.append({
            'Period': f"{nov} to {dec}",
            'MCG': mcg,
            'MoM_%_Change': round(row['MoM_%_Change'], 2)
        })

# Create a summary DataFrame
summary_df = pd.DataFrame(top_contributors)

# Save to CSV
summary_df.to_csv("Top5_MCGs_MoM_Change_Per_Peak_Period_F2F.csv", index=False)

# Display the summary
summary_df



In [None]:
# Month-on-Month % Change for different Top 5 MCGs across each Dec-to-Jan peak period:

import pandas as pd
import matplotlib.pyplot as plt

# Load the CSV file
file_path = 'Top5_MCGs_MoM_Change_Per_Peak_Period_Online.csv'
df = pd.read_csv(file_path)

# Pivot the data to prepare for stacked bar chart
pivot_df = df.pivot(index='Period', columns='MCG', values='MoM_%_Change').fillna(0)

# Plot the stacked bar chart
pivot_df.plot(kind='bar', stacked=True, figsize=(12, 6))

# Customize the chart
plt.title('Top 5 MCGs MoM % Change per Peak Period')
plt.xlabel('Period')
plt.xticks(rotation=45)
plt.ylabel('MoM % Change')
plt.legend(title='MCG', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()

# Save the chart
plt.savefig('stacked_mcg_mom_change_chart.png')

# Show the chart
plt.show()




In [None]:
# the top 5 MCGs contributing to each December-to-January online spending peak from 2021 to 2025 
# — including their MoM % change, January share, and weighted contribution.

import pandas as pd

# Load the MCG-level adjusted online spending data
df = pd.read_csv("Adjusted_Online_MCGs_Mth.csv")

# Ensure time_period_value is treated as string
df['time_period_value'] = df['time_period_value'].astype(str)

# Define the December-to-January peak periods
peak_periods = [
    ('202112', '202201'),
    ('202212', '202301'),
    ('202312', '202401'),
    ('202412', '202501')
]

# Store results for each period
summary_list = []

# Analyze each peak period
for dec, jan in peak_periods:
    # Filter data for December and January
    df_dec = df[df['time_period_value'] == dec].copy()
    df_jan = df[df['time_period_value'] == jan].copy()

    # Merge on MCG to align December and January values
    merged = pd.merge(df_dec, df_jan, on='mcg', suffixes=('_dec', '_jan'))

    # Calculate MoM % change
    merged['mom_change'] = ((merged['adjusted_Online_spend_jan'] - merged['adjusted_Online_spend_dec']) /
                            merged['adjusted_Online_spend_dec']) * 100

    # Calculate January share
    total_jan_spend = merged['adjusted_Online_spend_jan'].sum()
    merged['jan_share'] = merged['adjusted_Online_spend_jan'] / total_jan_spend

    # Calculate weighted contribution
    merged['weighted_contribution'] = merged['mom_change'] * merged['jan_share']

    # Select top 5 MCGs by weighted contribution
    top5 = merged.sort_values(by='weighted_contribution', ascending=False).head(5)

    # Store summary
    for _, row in top5.iterrows():
        summary_list.append({
            'Period': f"{dec} to {jan}",
            'MCG': row['mcg'],
            'MoM % Change': round(row['mom_change'], 2),
            'Jan Share': round(row['jan_share'] * 100, 2),
            'Weighted Contribution': round(row['weighted_contribution'], 2)
        })

# Convert summary to DataFrame
summary_df = pd.DataFrame(summary_list)

# Save summary to CSV
summary_df.to_csv("Top5_MCG_Contributions_Per_PeakPeriod.csv", index=False)

# Display the summary
summary_df.head(15)



In [None]:
# For calculating the average month-on-month % increase for each MCG, especially for Airlines and Travel Services during January peak periods.
# ----------- Online

import pandas as pd

# Load the CSV file
file_path = "Top5_MCG_Contributions_Per_PeakPeriod.csv"
df = pd.read_csv(file_path)

# Filter for periods ending in January from 2022 to 2025
target_periods = ['202112 to 202201', '202212 to 202301', '202312 to 202401']
filtered_df = df[df['Period'].isin(target_periods)]

# Group by MCG and calculate the average MoM % Change
average_mom_change = filtered_df.groupby('MCG')['MoM % Change'].mean().reset_index()

# Display the result
average_mom_change



In [None]:
# For calculating the average month-on-month % increase for each MCG, especially for Least contributors during January peak periods.
# --------------------- Online

import pandas as pd

# Load the CSV file
file_path = "Least5_MCGs_MoM_Change_Per_Peak_Period_Online.csv"
df = pd.read_csv(file_path)

# Filter for periods ending in January from 2022 to 2025
target_periods = ['202111 to 202112', '202211 to 202212', '202311 to 202312', '202411 to 202412']
filtered_df = df[df['Period'].isin(target_periods)]

# Group by MCG and calculate the average MoM % Change
average_mom_change = filtered_df.groupby('MCG')['MoM_%_Change'].mean().reset_index()

# Display the result
average_mom_change

In [None]:
# For calculating the average month-on-month % increase for each MCG 2021 Nov to 2024 Dec, especially for Face-to-Face during Dec peak periods.

import pandas as pd

# Load the CSV file
file_path = "Top5_MCGs_MoM_Change_Per_Peak_Period_F2F.csv"
df = pd.read_csv(file_path)

# Filter for periods ending in January from 2022 to 2025
target_periods = ['202111 to 202112', '202211 to 202212', '202311 to 202312', '202411 to 202412']
filtered_df = df[df['Period'].isin(target_periods)]

# Group by MCG and calculate the average MoM % Change
average_mom_change = filtered_df.groupby('MCG')['MoM_%_Change'].mean().reset_index()

# Display the result
average_mom_change



In [None]:
# To calculate the average September increase for the ‘Education & Government’ MCG from September 2021 to September 2024

# Extract the relevant rows for each Aug and Sep.
# Calculate the MoM % change for 'EDUCATION & GOVERNMENT'.
# Compute the average across the four years.
# Confirm whether the average increase is around 48.7% or provide the exact figure.

import pandas as pd

# Load the CSV file
df = pd.read_csv("Adjusted_Online_MCGs_Mth.csv")

# Ensure time_period_value is treated as string
df['time_period_value'] = df['time_period_value'].astype(str)

# Define the August-to-September periods for 2021 to 2024
peak_periods = [
    ('202108', '202109'),
    ('202208', '202209'),
    ('202308', '202309'),
    ('202408', '202409')
]

# Store the month-on-month changes for EDUCATION & GOVERNMENT
changes = []

for aug, sep in peak_periods:
    df_aug = df[(df['time_period_value'] == aug) & (df['mcg'] == 'EDUCATION & GOVERNMENT')]
    df_sep = df[(df['time_period_value'] == sep) & (df['mcg'] == 'EDUCATION & GOVERNMENT')]
    
    if not df_aug.empty and not df_sep.empty:
        aug_spend = df_aug['adjusted_Online_spend'].values[0]
        sep_spend = df_sep['adjusted_Online_spend'].values[0]
        mom_change = ((sep_spend - aug_spend) / aug_spend) * 100
        changes.append(mom_change)

# Calculate the average change
average_change = sum(changes) / len(changes) if changes else None

# Display the result
print(f"Average MoM % Increase for 'EDUCATION & GOVERNMENT' from August to September (2021–2024): {average_change:.2f}%")



In [None]:
# Top 5 Online MCG

import pandas as pd
import matplotlib.pyplot as plt

# Load the dataset
df = pd.read_csv("Adjusted_Online_MCGs_Mth.csv")

# Convert time_period_value entries like 201901 into a readable format like "Jan 19"
def format_period(period):
    period_str = str(period)
    year = period_str[:4]
    month = period_str[4:]
    month_map = {
        '01': 'Jan', '02': 'Feb', '03': 'Mar', '04': 'Apr', '05': 'May', '06': 'Jun',
        '07': 'Jul', '08': 'Aug', '09': 'Sep', '10': 'Oct', '11': 'Nov', '12': 'Dec'
    }
    return f"{month_map.get(month, month)} {year[2:]}"

df['Month Period'] = df['time_period_value'].apply(format_period)

# Convert Formatted Period to datetime for quarter and year extraction
df['Month'] = pd.to_datetime(df['time_period_value'].astype(str), format='%Y%m')

# Sort the data by MCG and date
df.sort_values(by=['mcg', 'Month'], inplace=True)

# Calculate monthly percent change per MCG
df['monthly_pct_change'] = df.groupby('mcg')['adjusted_Online_spend'].pct_change()

# Calculate contribution to total monthly change
df['contribution_to_total_change'] = (
    df['monthly_pct_change'] * df['adjusted_Online_spend']
) / df.groupby('Month Period')['adjusted_Online_spend'].transform('sum')

# Aggregate total contribution per MCG to identify top 5 drivers
top_mcg = (
    df.groupby('mcg')['contribution_to_total_change']
    .sum()
    .abs()
    .sort_values(ascending=False)
    .head(5)
    .index
)

# Save the result to a new CSV file
output_file = 'top_5_mcg_percent_change_Online.csv'
top_mcg_df = df[df['mcg'].isin(top_mcg)][['Month Period', 'mcg', 'adjusted_Online_spend', 'monthly_pct_change', 'contribution_to_total_change']]
top_mcg_df.to_csv(output_file, index=False)

print(f"Top 5 MCGs with highest overall percent change saved to {output_file}.")



In [None]:
# Adjusted Value Line Chart for Top 5 Online Spending -------- Monthly

import pandas as pd
import matplotlib.pyplot as plt

# Load the dataset
file_path = "top_5_mcg_percent_change_Online.csv"
df = pd.read_csv(file_path)

# Plot the line chart
plt.figure(figsize=(14, 7))

# Plot each MCG's adjusted online spend over time
for mcg in df['mcg'].unique():
    mcg_data = df[df['mcg'] == mcg]
    plt.plot(mcg_data['Month Period'], mcg_data['adjusted_Online_spend'], marker='o', linestyle='-', label=mcg)

# Formatting the chart
plt.title("Top 5 MCGs Adjusted Online Spend Over Time")
plt.xlabel("Month")
plt.ylabel("Adjusted Online Spend")
plt.xticks(rotation=90)
plt.grid(True)
plt.legend(title="MCG", bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.savefig("top_5_mcg_adjusted_spend_line_chart.png")
plt.show()



In [None]:
# Indexed base on 2019 average = 100 ---------- RIGHT INDEXED TABLE ---------- Monthly Modifies ---------- NO NEED
# Indexed Spend UK All ----------- mcg = 'All' ---------- Face to Face 

import pandas as pd
import html

# Read the CSV file
df = pd.read_csv("Adjusted_F2F_MCGs_Mth.csv")

# Normalize HTML entities in the 'mcg' column
df['mcg'] = df['mcg'].apply(html.unescape)

# Filter rows for the year 2019 (months 01 to 12)
months_2019 = [f"2019{str(m).zfill(2)}" for m in range(1, 13)]
df_2019 = df[df['time_period_value'].astype(str).isin(months_2019)]

# Calculate the base average for each MCG
base_avg_per_mcg = df_2019.groupby('mcg')['adjusted_F2F_spend'].mean()

# Function to compute indexed spend with base average = 100 for each MCG
def compute_indexed(row):
    base_avg = base_avg_per_mcg.get(row['mcg'], None)
    if pd.notnull(base_avg) and base_avg != 0:
        return (row['adjusted_F2F_spend'] / base_avg) * 100
    else:
        return None

# Apply the function to compute the indexed spend
df['Indexed Spend F2F'] = df.apply(compute_indexed, axis=1)

# Save the updated DataFrame to a new CSV file
df.to_csv("Indexed_F2F_MCGs_Mth.csv", index=False)

# Output the first few rows to verify
df.head()

