In [None]:
project_path = "/home/jupyter"
import os
import sys

sys.path.append(project_path)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import re
from google.cloud import bigquery
import plotly.graph_objects as go


from fintrans_toolbox.src import bq_utils as bq
from fintrans_toolbox.src import table_utils as t

client = bigquery.Client()

In [None]:
# Summarise the data by mcg
UK_index_merchant_channel = '''SELECT time_period_value, spend, merchant_channel
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Month'
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM'
and mcg = 'All'
GROUP BY time_period_value, merchant_channel, spend
ORDER BY time_period_value, spend DESC'''

UK_index_merchant_channel = bq.read_bq_table_sql(client, UK_index_merchant_channel)

UK_index_merchant_channel.head()

In [None]:
UK_index_merchant_channel.to_csv('UK_index_merchant_channel.csv')

In [None]:
# Summarise the data by mcg
UK_index_cardholders = '''SELECT time_period_value, cardholders
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Month'
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM'
and mcg = 'All'
and merchant_channel = 'All'
GROUP BY time_period_value, cardholders
ORDER BY time_period_value, cardholders DESC'''

UK_index_cardholders = bq.read_bq_table_sql(client, UK_index_cardholders)

UK_index_cardholders.head()

In [None]:
UK_index_cardholders.to_csv('UK_index_cardholders.csv')

In [None]:
# Convert time_period_value to datetime format
UK_index_cardholders["date"] = pd.to_datetime(UK_index_cardholders["time_period_value"].astype(str), format="%Y%m")

# Aggregate monthly spend
monthly_cardholders = UK_index_cardholders.groupby("date")["cardholders"].sum().reset_index()

# Filter for 2019 data and calculate the average spend
monthly_cardholders["year"] = monthly_cardholders["date"].dt.year
base_2019_avg = monthly_cardholders[monthly_cardholders["year"] == 2019]["cardholders"].mean()

# Create monthly index (base = 100 using 2019 average)
monthly_cardholders["monthly_index"] = (monthly_cardholders["cardholders"] / base_2019_avg) * 100

# Get the base value from January 2019
#base_date = pd.to_datetime("201901", format="%Y%m")
#base_value = monthly_cardholders.loc[monthly_cardholders["date"] == base_date, "cardholders"].values[0]

# Create monthly index (base = 100)
#monthly_cardholders["monthly_index"] = (monthly_cardholders["cardholders"] / base_value) * 100

# Save the index to a CSV file
monthly_cardholders.to_csv("monthly_index_cardholders.csv", index=False)

print(monthly_cardholders)

In [None]:
# Convert time_period_value to datetime format
UK_index_merchant_channel["date"] = pd.to_datetime(UK_index_merchant_channel["time_period_value"].astype(str), format="%Y%m")

monthly_spend = UK_index_merchant_channel.groupby([UK_index_merchant_channel["date"], UK_index_merchant_channel["merchant_channel"]])["spend"].sum().reset_index()
monthly_weighted = pd.merge(monthly_spend, monthly_cardholders[["date", "monthly_index"]], on="date", how="left")

monthly_weighted["year"] = monthly_weighted["date"].dt.year
base_2019 = monthly_weighted[monthly_weighted["year"] == 2019].groupby("merchant_channel")["spend"].mean()

# Normalize spend using 2019 average as base index (set to 100)
monthly_weighted["indexed_spend"] = monthly_weighted.apply(
    lambda row: (row["spend"] / base_2019[row["merchant_channel"]]) * 100, axis=1)

monthly_weighted["weighted_indexed_spend"] = monthly_weighted["indexed_spend"] * ((200-monthly_weighted["monthly_index"]) / 100)

monthly_weighted.to_csv('monthly_weighted.csv')

In [None]:
# Pivot the data for plotting
pivot_df = monthly_weighted.pivot(index="date", columns="merchant_channel", values="weighted_indexed_spend")

# Plot the time series
plt.figure(figsize=(12, 6))
for channel in pivot_df.columns:
    plt.plot(pivot_df.index, pivot_df[channel], label=channel)

plt.title("Monthly Spend Index by Merchant Channel (Base: 2019 Average = 100)")
plt.xlabel("Month")
plt.ylabel("Spend Index")
plt.legend(title="Merchant Channel")
plt.grid(True)
plt.tight_layout()

# Save or show the plot
plt.savefig("monthly_spend_index.png")  # or use plt.show() to display

In [None]:
print(pivot_df)

In [None]:
pivot_df.to_csv('pivot_df.csv')