In [None]:
project_path = "/home/jupyter"
import os
import sys

sys.path.append(project_path)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
import plotly.express as px
from datetime import datetime
from google.cloud import bigquery

from fintrans_toolbox.src import bq_utils as bq
from fintrans_toolbox.src import table_utils as t

import ft_digital_trade.src.utils.read_data as read_utils
import ft_digital_trade.src.utils.clean_utils as clean_utils
import ft_digital_trade.src.utils.calculation_utils as calc_utils
import ft_digital_trade.src.utils.plot_utils as plot_utils

client = bigquery.Client()

In [None]:
# Getting a total abroad + online spend by UK cardholders
total_online = '''SELECT time_period_value, sum(spend) as total_online_spend
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
WHERE time_period = 'Quarter' 
  AND mcg = 'All'
  AND mcc = 'All'
  AND merchant_channel = 'Online'
  AND cardholder_origin = 'UNITED KINGDOM'
  AND cardholder_origin_country = 'All'
  AND destination_country != 'All'
GROUP BY time_period_value
ORDER BY time_period_value ASC'''
total_spend = bq.read_bq_table_sql(client, total_online)
#total_spend

In [None]:
# Total spend by UK cardholders
abroad_mccs = '''SELECT time_period_value, sum(spend) as total_spend_online, mcc
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
WHERE time_period = 'Quarter' 
  AND mcg != 'All'
  AND mcc != 'All'
  AND merchant_channel = 'Online'
  AND cardholder_origin = 'UNITED KINGDOM'
  AND cardholder_origin_country = 'All' 
  AND destination_country = 'REST OF  EUROPE'
GROUP BY time_period_value, mcc
ORDER BY time_period_value, mcc ASC'''
abroad_mcc_comparison = bq.read_bq_table_sql(client, abroad_mccs)
abroad_mcc_comparison

In [None]:
# Total spend by UK cardholders
domestic_mccs = '''SELECT time_period_value, sum(spend) as total_spend_dom, mcc
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
WHERE time_period = 'Quarter' 
  AND mcg != 'All'
  AND mcc != 'All'
  AND merchant_channel = 'Online'
  AND cardholder_origin = 'UNITED KINGDOM'
  AND cardholder_origin_country = 'All' 
  AND destination_country = 'UNITED KINGDOM'
GROUP BY time_period_value, mcc
ORDER BY time_period_value, mcc ASC'''
domestic_mcc_comparison = bq.read_bq_table_sql(client, domestic_mccs)
domestic_mcc_comparison

In [None]:
df_uk_pivot = domestic_mcc_comparison.pivot_table(index='time_period_value', columns='mcc', values='total_spend_dom', aggfunc='sum')
df_abroad_pivot = abroad_mcc_comparison.pivot_table(index='time_period_value', columns='mcc', values='total_spend_online', aggfunc='sum')

#df_uk_pivot

In [None]:
df_uk_pivot.columns = [f'UK_{col}' for col in df_uk_pivot.columns]
df_abroad_pivot.columns = [f'Abroad_{col}' for col in df_abroad_pivot.columns]

df_abroad_pivot

In [None]:
merged_table = pd.merge(df_uk_pivot, df_abroad_pivot, on=('time_period_value'))
merged_table = merged_table.reset_index()

#Melt the DataFrame to long format for Plotly Express
merged_table_melted = merged_table.melt(id_vars='time_period_value', var_name='Region_MCC', value_name='Total Spend')

merged_table_melted

In [None]:
# Abroad vs Domestic spending online by MCC
fig1 = px.line(
    merged_table_melted,
    x='time_period_value',
    y='Total Spend',
    color='Region_MCC',
    markers=True,
    title='Online, Domestic vs Abroad Spend Comparison'
)

# Show the plot
fig1.show()

In [None]:
# Abroad only comparison of MCCs
fig2 = px.line(
    abroad_mcc_comparison,
    x='time_period_value',
    y='total_spend_online',
    color='mcc',
    markers=True,
    title='Abroad + Online Spend by Selected MCCs'
)

# Show the plot
fig2.show()

In [None]:
# Selecting for a specific period to investige
merged_table_2019 = merged_table.head(4)
merged_table_2019

In [None]:
# Creating analysis tables

def analysis_calcs(df, total_spend):
    # Ensure 'time_period_value' is in both dataframes and align them
    if 'time_period_value' not in df.columns or 'time_period_value' not in total_spend.columns:
        raise ValueError("Both df and total_spend must contain 'time_period_value' column for alignment.")

    # Merge on 'time_period_value' to align both dataframes
    df_merged = pd.merge(df, total_spend, on='time_period_value', suffixes=('', '_total'))

    # Extract time column and numeric data
    time_col = df_merged[['time_period_value']]
    df_numeric = df.drop(columns='time_period_value')

    # Automatically detect the spend column from total_spend (prioritize numeric columns)
    spend_candidates = total_spend.drop(columns='time_period_value').select_dtypes(include='number')
    if spend_candidates.shape[1] == 0:
        raise ValueError("No numeric spend column found in total_spend.")
    total_spend_series = spend_candidates.iloc[:, 0]  # Use the first numeric column

    # Align total_spend_series with df_numeric
    total_spend_series = total_spend_series.reset_index(drop=True).reindex(df_numeric.index)

    # Quarter-on-quarter difference
    diff_df = df_numeric.diff().rename(columns=lambda x: f"{x}_QoQ_Diff")

    # % Change quarter-on-quarter
    pct_change_df = df_numeric.pct_change().rename(columns=lambda x: f"{x}_QoQ_Pct_Change")

    # Each column's % of total spend
    pct_of_total_df = df_numeric.div(total_spend_series, axis=0).rename(columns=lambda x: f"{x}_Pct_of_Total")

    # Rolling average (2-quarter)
    rolling_avg_df = df_numeric.rolling(window=2).mean().rename(columns=lambda x: f"{x}_2Q_Rolling_Avg")

    # Combine all results
    result_df = pd.concat([
        time_col,
        df_numeric,
        diff_df,
        pct_change_df,
        total_spend_series.rename("Total_Spend"),
        pct_of_total_df,
        rolling_avg_df,
    ], axis=1)

    # Format percentage columns
    pct_cols = [col for col in result_df.columns if 'Pct' in col]
    result_df[pct_cols] = result_df[pct_cols].applymap(lambda x: f"{x:.2%}" if pd.notnull(x) else "")

    # Format numeric columns to 2 decimal places
    num_cols = [col for col in result_df.columns if col not in pct_cols and col != 'time_period_value']
    result_df[num_cols] = result_df[num_cols].applymap(lambda x: f"{x:.2f}" if pd.notnull(x) else "")

    # Save to CSV
    result_df.to_csv("enhanced_quarterly_spend_analysis.csv", index=False)

    return result_df



analysis_df = analysis_calcs(merged_table, total_spend)
analysis_df

In [None]:
result_1 = analysis_df.loc[:, ["UK_MISC SPECIALTY RETAIL_QoQ_Diff", "Abroad_BOOK STORES_QoQ_Diff"]]
result_1