In [None]:
project_path = "/home/jupyter"
import os
import sys

sys.path.append(project_path)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import re
from google.cloud import bigquery

from fintrans_toolbox.src import bq_utils as bq
from fintrans_toolbox.src import table_utils as t

client = bigquery.Client()

In [None]:
# Summarise the data by mcg
UK_mcc_country = '''SELECT time_period_value, mcc, spend, destination_country
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter'
and time_period_value IN ("2024Q1","2024Q2","2024Q3","2024Q4")
and merchant_channel = 'Online'
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM'
and destination_country != 'UNITED KINGDOM'
and mcc IN ("LARGE DIGITAL GOODS MERCHANT","AIRLINES","LODGING","COMPUTER SOFTWARE STORES","TRAVEL AGENCIES","DIGITAL GOODS GAMES")
GROUP BY mcc, time_period_value, spend, destination_country
ORDER BY time_period_value, spend DESC'''

mcc_country_df = bq.read_bq_table_sql(client, UK_mcc_country)

mcc_country_df['year'] = mcc_country_df['time_period_value'].str[:4]

mcc_country_df.head()

In [None]:
mcc_country_df.to_csv('mcc_country_df.csv')
df = pd.read_csv('mcc_country_df.csv')
latest_year_mcc_country = df[df['year'] == 2024][['destination_country', 'spend','mcc']]
latest_year_mcc_country.to_csv('latest_year_mcc_country.csv', index=False)

In [None]:
# Aggregate total spend per MCC and country
agg_df = latest_year_mcc_country.groupby(["mcc", "destination_country"], as_index=False)["spend"].sum()

# Calculate total spend per MCC
total_mcc_spend = agg_df.groupby("mcc")["spend"].sum().reset_index()
total_mcc_spend.rename(columns={"spend": "total_mcc_spend"}, inplace=True)

# Merge to get total MCC spend in the aggregated dataframe
agg_df = agg_df.merge(total_mcc_spend, on="mcc")

# Calculate percentage contribution of each country within each MCC
agg_df["percentage"] = agg_df["spend"] / agg_df["total_mcc_spend"] * 100

# Identify the top-spending country for each MCC
idx = agg_df.groupby("mcc")["spend"].idxmax()
top_countries = agg_df.loc[idx]

# Create a bar chart
fig = px.bar(
    top_countries,
    x="mcc",
    y="percentage",
    color="destination_country",
    title="Top Country Percentage Contribution Within Each MCC",
    labels={"mcc": "Merchant Category Code", "percentage": "Percentage of Spend (%)"},
    barmode="group"
)

# Show the chart
fig.show()