In [None]:
project_path = "/home/jupyter"
import os
import sys

sys.path.append(project_path)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import re
from google.cloud import bigquery
import plotly.graph_objects as go


from fintrans_toolbox.src import bq_utils as bq
from fintrans_toolbox.src import table_utils as t

client = bigquery.Client()

In [None]:
# Summarise the data by mcg
UK_mcc_country = '''SELECT time_period_value, mcc, spend, destination_country
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter'
and time_period_value IN ("2024Q1","2024Q2","2024Q3","2024Q4")
and merchant_channel = 'Online'
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM'
and destination_country != 'UNITED KINGDOM'
and mcc IN ("LARGE DIGITAL GOODS MERCHANT","AIRLINES","LODGING","COMPUTER SOFTWARE STORES","TRAVEL AGENCIES","DIGITAL GOODS GAMES")
GROUP BY mcc, time_period_value, spend, destination_country
ORDER BY time_period_value, spend DESC'''

mcc_country_df = bq.read_bq_table_sql(client, UK_mcc_country)

mcc_country_df['year'] = mcc_country_df['time_period_value'].str[:4]

mcc_country_df.head()

mcc_country_df.to_csv('mcc_country_df.csv')

In [None]:
# Define the continent mapping
continent_map = {
    "UNITED STATES OF AMERICA": "North America",
    "REST OF  CANADA": "North America",
    "GERMANY": "Europe",
    "REPUBLIC OF IRELAND": "Europe",
    "REST OF  EUROPE": "Europe",
    "SPAIN": "Europe",
    "FRANCE": "Europe",
    "NETHERLANDS": "Europe",
    "ITALY": "Europe",
    "SWITZERLAND": "Europe",
    "POLAND": "Europe",
    "AUSTRIA": "Europe",
    "PORTUGAL": "Europe",
    "REST OF  C.E.M.E.A.": "C.E.M.E.A",
    "UNITED ARAB EMIRATES": "Asia-Pacific",
    "INDIA": "Asia-Pacific",
    "AUSTRALIA": "Asia-Pacific",
    "REST OF  ASIAPAC": "Asia-Pacific",
    "REST OF  LAT.AM.": "South America",
    "SOUTH AFRICA": "Africa"
}

# Map destination countries to continents
mcc_country_df['continent'] = mcc_country_df['destination_country'].map(continent_map).fillna('Other')
mcc_country_df.to_csv('continent.csv')

# Group by MCC and continent, summing the spend
continent_grouped = mcc_country_df.groupby(['mcc', 'continent'])['spend'].sum().reset_index()

# Normalize spend to get percentage share within each MCC
continent_grouped['total_spend_per_mcc'] = continent_grouped.groupby('mcc')['spend'].transform('sum')
continent_grouped['spend_share'] = continent_grouped['spend'] / continent_grouped['total_spend_per_mcc'] * 100

# Pivot the data for plotting
chart_data = continent_grouped.pivot(index='mcc', columns='continent', values='spend_share').fillna(0)

# Define custom colors for continents
continent_colors = {
    "North America": "#1f77b4",
    "Europe": "#ff7f0e",
    "Asia-Pacific": "#2ca02c",
    "C.E.M.E.A": "#d62728",
    "South America": "#9467bd",
    "Africa": "#8c564b",
    "Other": "#cccccc"
}

# Reorder columns to match color keys
pivot_df = pivot_df[sorted(pivot_df.columns, key=lambda x: list(continent_colors.keys()).index(x) if x in continent_colors else 999)]

# Plot with custom colors
fig, ax = plt.subplots(figsize=(14, 12))
pivot_df.plot(kind='bar', stacked=True, ax=ax, width=0.8, color=[continent_colors.get(col, '#cccccc') for col in pivot_df.columns])

# Customize the plot
ax.set_ylabel('Share of Spend (%)')
ax.set_xlabel('Sector (MCC)')
ax.set_title('Share of Spend by Continent within Each MCC Sector (2024)')
plt.xticks(rotation=45, ha='right')

# Move the legend to the bottom and make it flatter and wider
ax.legend(
    loc='upper center',
    bbox_to_anchor=(0.5, -0.25),
    ncol=4,
    title='Continent',
    frameon=False
)

plt.tight_layout()
plt.savefig("stacked_bar_chart_by_continent.png")


In [None]:
total_share = grouped.groupby("continent").agg({
    "spend": "sum",
    "total_spend_per_mcc": "sum"  # or 'max', 'min', etc., depending on your logic
}).reset_index()
total_share["share"] = (total_share["spend"] / total_share["total_spend_per_mcc"]) * 100

In [None]:
print(total_share)

In [None]:
print(pivot_df)

In [None]:
country_grouped = mcc_country_df.groupby(['destination_country'])['spend'].sum()
country_grouped_perc = (country_grouped / country_grouped.sum()) * 100
country_grouped_perc

In [None]:
#the US and rest of canada make up north america, the US accounts for x% of spend calculation below
10.385408/(0.944747+10.385408)*100