In [None]:
project_path = "/home/jupyter"
import os
import sys

sys.path.append(project_path)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime
from google.cloud import bigquery

from fintrans_toolbox.src import bq_utils as bq
from fintrans_toolbox.src import table_utils as t

import ft_digital_trade.src.utils.read_data as read_utils
import ft_digital_trade.src.utils.clean_utils as clean_utils
import ft_digital_trade.src.utils.calculation_utils as calc_utils
import ft_digital_trade.src.utils.plot_utils as plot_utils

client = bigquery.Client()

In [None]:
# Taking data created by 'Analysis for section 4.py' 
online_df= pd.read_csv("online_mcg_totals_quarterly.csv")
online_df

In [None]:
# Cleaning up the table an
cleaned_online_df = online_df.drop(columns=['Unnamed: 0', 'online_spend'])
cleaned_total_spend_rows = cleaned_online_df[online_df['mcg'] == 'All'].copy()

cleaned_online_df = cleaned_online_df[cleaned_online_df['mcg'] != 'All']
cleaned_total_spend_rows

In [None]:
# Creating an analysis function to apply to both datasets


def analyse_online_spend(df, total_spend):
    # Largest MCGs in the most recent quarter
    latest_quarter = df['time_period_value'].iloc[-1]
    most_recent = df[df['time_period_value'] == latest_quarter].copy()
    top_3_spend = most_recent.nlargest(3, 'adjusted_online_spend')

    # Reformatting and renaming columns for clarity
    table = top_3_spend[[
        'mcg',
        'adjusted_online_spend',
        'percent_change',
        'contribution_to_all_change'
    ]].rename(columns={
        'mcg': 'MCG',
        'adjusted_online_spend': 'Adjusted Spend',
        'percent_change': '% Change QoQ',
        'contribution_to_all_change': 'Contribution to Total Change'
    })

    print(f"\nTop 3 MCGs in {latest_quarter} by Adjusted Online Spend:\n")
    print(table.to_string(index=False))
    print()

    
    # Annual analysis from 2019 to 2024
    df_copy = df.copy()
    df_copy['year'] = df_copy['time_period_value'].str[:4].astype(int)
    
    annual_data = df_copy.groupby(['year', 'mcg']).agg({
        'adjusted_online_spend': 'sum',
        'all_nominal_change': 'sum',
        'nominal_change' : 'sum',
        'percent_change': 'mean'}).reset_index() #Average quarter % changes
    
    annual_data = annual_data[annual_data['year'] != 2025]
    
    annual_data = annual_data.sort_values(['mcg', 'year'])
    annual_data['yoy_spend_change'] = annual_data.groupby('mcg')['adjusted_online_spend'].pct_change() * 100
    annual_data['yoy_nominal_change'] = annual_data.groupby('mcg')['adjusted_online_spend'].diff()
                                              
    
    # Filter for years 2019 and 2024
    data_2019 = annual_data[annual_data['year'] == 2019][['mcg', 'adjusted_online_spend']].rename(columns={'adjusted_online_spend': 'spend_2019'})
    data_2024 = annual_data[annual_data['year'] == 2024][['mcg', 'adjusted_online_spend']].rename(columns={'adjusted_online_spend': 'spend_2024'})

    # Merge the two datasets on 'mcg'
    growth_data = pd.merge(data_2019, data_2024, on='mcg')

    # Calculate the absolute and percentage change
    growth_data['absolute_change'] = growth_data['spend_2024'] - growth_data['spend_2019']
    growth_data['percent_change'] = (growth_data['absolute_change'] / growth_data['spend_2019']) * 100

    # Get top 5 MCGs with the highest increase
    top_gainers = growth_data.sort_values(by='absolute_change', ascending=False).head(5)

    # Get top 5 MCGs with the highest decrease
    top_losers = growth_data.sort_values(by='absolute_change').head(5)

    # Display the results
    print("Top 5 MCGs with the highest increase in adjusted online spend from 2019 to 2024:")
    print(top_gainers.to_string(index=False))

    print("\nTop 5 MCGs with the highest decrease in adjusted online spend from 2019 to 2024:")
    print(top_losers.to_string(index=False))
    print()
    
    # Largest YonY movements across time series
    biggest_percent_increases = annual_data.nlargest(10, 'yoy_spend_change')
    print("Top 10 biggest % increases:")
    for idx, row in biggest_percent_increases.iterrows():
        if not pd.isna(row['yoy_spend_change']):
            print(f"{row['mcg']} ({row['year']}): {row['yoy_spend_change']:+.1f}%")
    print()
    
    biggest_percent_decreases = annual_data.nsmallest(10, 'yoy_spend_change')
    print("Top 10 biggest % decreases:")
    for idx, row in biggest_percent_decreases.iterrows():
        if not pd.isna(row['yoy_spend_change']):
            print(f"{row['mcg']} ({row['year']}): {row['yoy_spend_change']:+.1f}%")
    print()

    # Filter total_spend to only include rows where mcg == 'All'
    total = total_spend[total_spend['mcg'] == 'All'][['time_period_value', 'adjusted_online_spend']]
    total = total.rename(columns={'adjusted_online_spend': 'total_adjusted_online_spend'})
    total_annual_spend = annual_data.groupby('year')['adjusted_online_spend'].sum().reset_index()
    total_annual_spend = total_annual_spend.rename(columns={'adjusted_online_spend': 'total_adjusted_online_spend'})
    
    df_merged = df.merge(total, on='time_period_value', how='left')
    annual_df_merged = annual_data.merge(total_annual_spend, on='year', how='left')


    # Calculate each MCG's share of the total spend
    df_merged['mcg_share'] = (df_merged['adjusted_online_spend'] / df_merged['total_adjusted_online_spend']) * 100
    annual_df_merged['mcg_share'] = (annual_df_merged['adjusted_online_spend'] / annual_df_merged['total_adjusted_online_spend']) * 100
    

    df_2019_2024 = annual_df_merged[annual_df_merged['year'].isin([2019, 2024])]

    # Pivot to get shares side by side
    pivot_df = df_2019_2024.pivot(index='mcg', columns='year', values='mcg_share').reset_index()

    # Calculate change from 2019 to 2024
    pivot_df['change_2019_to_2024'] = pivot_df[2024] - pivot_df[2019]
    print(pivot_df[['mcg', 'change_2019_to_2024']])


    return pivot_df
    
analyse_online_spend(cleaned_online_df, cleaned_total_spend_rows)