In [None]:
project_path = "/home/jupyter"
import os
import sys
sys.path.append(project_path)
from google.cloud import bigquery, storage

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px

from fintrans_toolbox.src import bq_utils as bq

client = bigquery.Client()

In [None]:
# Summarise the data by UK Cardholder Spending All Quarterly --------------- Cardholders' Number Total Quarterly ---- TEST mcg = 'All'

UK_spending_by_mcg_All = '''SELECT time_period_value, cardholders, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcg = 'All' 
and merchant_channel = 'All' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
 
GROUP BY cardholders, destination_country, 
time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''
df_by_mcg_All = bq.read_bq_table_sql(client, UK_spending_by_mcg_All)
df_by_mcg_All.head()

# Caculate UK Domestic Total Spending Quarterly

# Assuming df_by_mcg_All is the DataFrame returned from the BigQuery query
# Then group by 'time_period_value' and sum the 'spend' for each quarter

# Check if df_by_mcg_All is not None and has the expected columns
if df_by_mcg_All is not None and 'time_period_value' in df_by_mcg_All.columns and 'spend' in df_by_mcg_All.columns:
    # Group by quarter and sum the spend
    UK_spending_by_mcg_All = df_by_mcg_All.groupby('time_period_value')['cardholders'].sum().reset_index()
   
 # Rename the column
    UK_spending_by_mcg_All = UK_spending_by_mcg_All.rename(columns={'cardholders': 'Spend_mcg_All_cardholders'})
    print(UK_spending_by_mcg_All)
else:
    print("DataFrame is empty or missing required columns.")

    # Save the result to a CSV file
csv_filename = "UK_spending_by_mcg_All.csv"
UK_spending_by_mcg_All.to_csv(csv_filename, index=False)

print(f"CSV file '{csv_filename}' has been created successfully.")

In [None]:
# Summarise the data by UK Cardholder Household Spending Online All Quarterly --------------- Cardholders' Number Total Quarterly 

UK_HH_Online = '''SELECT time_period_value, cardholders, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcc = 'All' 
and mcg != 'All'
and mcg != 'BUSINESS TO BUSINESS' 
and merchant_channel = 'Online' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
GROUP BY cardholders, destination_country, 
time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''
df_by_All = bq.read_bq_table_sql(client, UK_HH_Online)
df_by_All.head()

# Caculate UK Domestic Total Spending Quarterly

# Assuming df_by_All is the DataFrame returned from the BigQuery query
# Then group by 'time_period_value' and sum the 'spend' for each quarter

# Check if df_by_All is not None and has the expected columns
if df_by_All is not None and 'time_period_value' in df_by_All.columns and 'spend' in df_by_All.columns:
    # Group by quarter and sum the spend
    UK_HH_Online = df_by_All.groupby('time_period_value')['cardholders'].sum().reset_index()
   
 # Rename the column
    UK_HH_Online = UK_HH_Online.rename(columns={'cardholders': 'HH_Online_cardholders'})
    print(UK_HH_Online)
else:
    print("DataFrame is empty or missing required columns.")
    
# Save the result to a CSV file
csv_filename = "UK_HH_Online.csv"
UK_HH_Online.to_csv(csv_filename, index=False)

print(f"CSV file '{csv_filename}' has been created successfully.")


In [None]:
# Total UK Cardholder Household Online Quarterly

# Summarise the data by UK Cardholder Online Household Spending Total Quarterly

UK_spending_HH_Online_All = '''SELECT time_period_value, SUM(spend) AS total_spend
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcc = 'All'
and mcg != 'All'
and mcg != 'BUSINESS TO BUSINESS' 
and merchant_channel = 'Online' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
GROUP BY 
time_period_value 
ORDER BY time_period_value'''
df_by_HH_Online_All = bq.read_bq_table_sql(client, UK_spending_HH_Online_All)
df_by_HH_Online_All = df_by_HH_Online_All.rename(columns={'total_spend': 'Online_spend_HH'})
df_by_HH_Online_All.head()


# Save the DataFrame to a CSV file
csv_filename = "UK_HH_Online_Spending.csv"
df_by_HH_Online_All.to_csv(csv_filename, index=False)

print(f"CSV file '{csv_filename}' has been created successfully.")

In [None]:
# UK Household Online Total Adjusted base value 2019Q1  ---------------- MODIFIED --- mcg = 'All'

import pandas as pd

# Load the data from the two CSV files
df_cardholders = pd.read_csv("UK_spending_by_mcg_All.csv")
df_spend = pd.read_csv("UK_HH_Online_Spending.csv")

# Merge the two DataFrames on 'time_period_value'
df_merged = pd.merge(df_cardholders, df_spend, on="time_period_value", how="inner")

# Extract the 2019Q1 values
base_row = df_merged[df_merged["time_period_value"] == "2019Q1"]
if not base_row.empty:
    base_cardholders = base_row["Spend_mcg_All_cardholders"].values[0]
   

    # Calculate adjusted quarterly spend
    df_merged["adjusted_Online_spend"] = (
        base_cardholders / df_merged["Spend_mcg_All_cardholders"]
    ) * df_merged["Online_spend_HH"]

    # Save the result to a new CSV file
    df_merged.to_csv("Adjusted_Online_HH_Spend_mcg.csv", index=False)
    print("Adjusted quarterly spend saved to Adjusted_Online_HH_Spend_mcg.csv")
else:
    print("2019Q1 base data not found in the dataset.")
print(df_merged)

In [None]:
# UK Household Online Total Adjusted base value 2019Q1  ---------------- MODIFIED

import pandas as pd

# Load the data from the two CSV files
df_cardholders = pd.read_csv("UK_HH_Online.csv")
df_spend = pd.read_csv("UK_HH_Online_Spending.csv")

# Merge the two DataFrames on 'time_period_value'
df_merged = pd.merge(df_cardholders, df_spend, on="time_period_value", how="inner")

# Extract the 2019Q1 values
base_row = df_merged[df_merged["time_period_value"] == "2019Q1"]
if not base_row.empty:
    base_cardholders = base_row["HH_Online_cardholders"].values[0]
   

    # Calculate adjusted quarterly spend
    df_merged["adjusted_Online_spend"] = (
        base_cardholders / df_merged["HH_Online_cardholders"]
    ) * df_merged["Online_spend_HH"]

    # Save the result to a new CSV file
    df_merged.to_csv("Adjusted_Online_HH_Spend.csv", index=False)
    print("Adjusted quarterly spend saved to Adjusted_Online_HH_Spend.csv")
else:
    print("2019Q1 base data not found in the dataset.")
print(df_merged)

In [None]:
# Summarise the data by UK Cardholder Household Spending F2F All Quarterly --------------- Cardholders' Number Total Quarterly 

UK_HH_F2F = '''SELECT time_period_value, cardholders, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcc = 'All' 
and mcg != 'All'
and mcg != 'BUSINESS TO BUSINESS' 
and merchant_channel = 'Face to Face' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
GROUP BY cardholders, destination_country, 
time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''
df_by_All = bq.read_bq_table_sql(client, UK_HH_F2F)
df_by_All.head()

# Caculate UK Domestic Total Spending Quarterly

# Assuming df_by_All is the DataFrame returned from the BigQuery query
# Then group by 'time_period_value' and sum the 'spend' for each quarter

# Check if df_by_All is not None and has the expected columns
if df_by_All is not None and 'time_period_value' in df_by_All.columns and 'spend' in df_by_All.columns:
    # Group by quarter and sum the spend
    UK_HH_F2F = df_by_All.groupby('time_period_value')['cardholders'].sum().reset_index()
   
 # Rename the column
    UK_HH_F2F = UK_HH_F2F.rename(columns={'cardholders': 'HH_F2F_cardholders'})
    print(UK_HH_F2F)
else:
    print("DataFrame is empty or missing required columns.")
    
# Save the result to a CSV file
csv_filename = "UK_HH_F2F.csv"
UK_HH_F2F.to_csv(csv_filename, index=False)

print(f"CSV file '{csv_filename}' has been created successfully.")


In [None]:
# Total UK Cardholder Household F2F Quarterly

# Summarise the data by UK Cardholder F2F Household Spending Total Quarterly

UK_spending_HH_F2F_All = '''SELECT time_period_value, SUM(spend) AS total_spend
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcc = 'All'
and mcg != 'All'
and mcg != 'BUSINESS TO BUSINESS' 
and merchant_channel = 'Face to Face' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
GROUP BY 
time_period_value 
ORDER BY time_period_value'''
df_by_HH_F2F_All = bq.read_bq_table_sql(client, UK_spending_HH_F2F_All)
df_by_HH_F2F_All = df_by_HH_F2F_All.rename(columns={'total_spend': 'F2F_spend_HH'})
df_by_HH_F2F_All.head()


# Save the DataFrame to a CSV file
csv_filename = "UK_HH_F2F_Spending.csv"
df_by_HH_F2F_All.to_csv(csv_filename, index=False)

print(f"CSV file '{csv_filename}' has been created successfully.")

In [None]:
# UK Household F2F Total Adjusted base value 2019Q1 --- mcg = 'All'

import pandas as pd

# Load the data from the two CSV files
df_cardholders = pd.read_csv("UK_spending_by_mcg_All.csv")
df_spend = pd.read_csv("UK_HH_F2F_Spending.csv")

# Merge the two DataFrames on 'time_period_value'
df_merged = pd.merge(df_cardholders, df_spend, on="time_period_value", how="inner")

# Extract the 2019Q1 values
base_row = df_merged[df_merged["time_period_value"] == "2019Q1"]
if not base_row.empty:
    base_cardholders = base_row["Spend_mcg_All_cardholders"].values[0]
   
    # Calculate adjusted quarterly spend
    df_merged["adjusted_F2F_spend"] = (
        base_cardholders / df_merged["Spend_mcg_All_cardholders"]
    ) * df_merged["F2F_spend_HH"]

    # Save the result to a new CSV file
    df_merged.to_csv("Adjusted_F2F_HH_Spend_mcg.csv", index=False)
    print("Adjusted quarterly spend saved to Adjusted_F2F_HH_Spend_mcg.csv")
else:
    print("2019Q1 base data not found in the dataset.")

In [None]:
# UK Household F2F Total Adjusted base value 2019Q1

import pandas as pd

# Load the data from the two CSV files
df_cardholders = pd.read_csv("UK_HH_F2F.csv")
df_spend = pd.read_csv("UK_HH_F2F_Spending.csv")

# Merge the two DataFrames on 'time_period_value'
df_merged = pd.merge(df_cardholders, df_spend, on="time_period_value", how="inner")

# Extract the 2019Q1 values
base_row = df_merged[df_merged["time_period_value"] == "2019Q1"]
if not base_row.empty:
    base_cardholders = base_row["HH_F2F_cardholders"].values[0]
   
    # Calculate adjusted quarterly spend
    df_merged["adjusted_F2F_spend"] = (
        base_cardholders / df_merged["HH_F2F_cardholders"]
    ) * df_merged["F2F_spend_HH"]

    # Save the result to a new CSV file
    df_merged.to_csv("Adjusted_F2F_HH_Spend.csv", index=False)
    print("Adjusted quarterly spend saved to Adjusted_F2F_HH_Spend.csv")
else:
    print("2019Q1 base data not found in the dataset.")

In [None]:
# Adjusted Indexed Spend F2F HH All  ---  mcg = 'All'
# Indexed card spending data (average 2019 equals 100) is calculated :
# Spend=(Adjusted Spend / Average Adjusted Spend in 2019) × 100
#Indexed Spend = (adjusted_F2F_spend / Average Adjusted F2F Spend in 2019 (which is 2019Q1 - 2019Q4 ave)) × 100

import pandas as pd
import matplotlib.pyplot as plt

# Read the CSV file
df = pd.read_csv("Adjusted_F2F_HH_Spend_mcg.csv")

# Filter rows for 2019Q1 to 2019Q4
df_2019 = df[df['time_period_value'].isin(["2019Q1", "2019Q2", "2019Q3", "2019Q4"])]

# Calculate the average adjusted_F2F_spend for 2019
base_avg = df_2019['adjusted_F2F_spend'].mean()

# Compute Indexed Spend
df['Indexed Spend F2F HH'] = (df['adjusted_F2F_spend'] / base_avg) * 100

# Plot the Indexed Spend over time
plt.figure(figsize=(12, 6))
plt.plot(df['time_period_value'], df['Indexed Spend F2F HH'], marker='o', linestyle='-')
plt.title('Indexed Adjusted F2F Spend Over Time (Base: 2019 Average = 100)')
plt.xlabel('Time Period')
plt.ylabel('Indexed Spend')
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.savefig("indexed_spend_plot.png")
plt.show()


# Save to CSV
df.to_csv("Indexed_Adjusted_F2F_HH_mcg.csv", index=False)

# Output the path to the saved file
"Indexed_Adjusted_F2F_HH_mcg.csv"


In [None]:
# Adjusted Indexed Spend F2F HH All
# Indexed card spending data (average 2019 equals 100) is calculated :
# Spend=(Adjusted Spend / Average Adjusted Spend in 2019) × 100
#Indexed Spend = (adjusted_F2F_spend / Average Adjusted F2F Spend in 2019 (which is 2019Q1 - 2019Q4 ave)) × 100

import pandas as pd
import matplotlib.pyplot as plt

# Read the CSV file
df = pd.read_csv("Adjusted_F2F_HH_Spend.csv")

# Filter rows for 2019Q1 to 2019Q4
df_2019 = df[df['time_period_value'].isin(["2019Q1", "2019Q2", "2019Q3", "2019Q4"])]

# Calculate the average adjusted_F2F_spend for 2019
base_avg = df_2019['adjusted_F2F_spend'].mean()

# Compute Indexed Spend
df['Indexed Spend F2F HH'] = (df['adjusted_F2F_spend'] / base_avg) * 100

# Plot the Indexed Spend over time
plt.figure(figsize=(12, 6))
plt.plot(df['time_period_value'], df['Indexed Spend F2F HH'], marker='o', linestyle='-')
plt.title('Indexed Adjusted F2F Spend Over Time (Base: 2019 Average = 100)')
plt.xlabel('Time Period')
plt.ylabel('Indexed Spend')
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.savefig("indexed_spend_plot.png")
plt.show()


# Save to CSV
df.to_csv("Indexed_Adjusted_F2F_HH.csv", index=False)

# Output the path to the saved file
"Indexed_Adjusted_F2F_HH.csv"


In [None]:
# Adjusted Indexed Spend Online HH All --- mcg = 'All'
# Indexed card spending data (average 2019 equals 100) is calculated :
# Spend=(Adjusted Spend / Average Adjusted Spend in 2019) × 100
#Indexed Spend = (adjusted_Online_spend / Average Adjusted Online Spend in 2019 (which is 2019Q1 - 2019Q4 ave)) × 100

import pandas as pd
import matplotlib.pyplot as plt

# Read the CSV file
df = pd.read_csv("Adjusted_Online_HH_Spend_mcg.csv")

# Filter rows for 2019Q1 to 2019Q4
df_2019 = df[df['time_period_value'].isin(["2019Q1", "2019Q2", "2019Q3", "2019Q4"])]

# Calculate the average adjusted_F2F_spend for 2019
base_avg = df_2019['adjusted_Online_spend'].mean()

# Compute Indexed Spend
df['Indexed Spend Online HH'] = (df['adjusted_Online_spend'] / base_avg) * 100

# Plot the Indexed Spend over time
plt.figure(figsize=(12, 6))
plt.plot(df['time_period_value'], df['Indexed Spend Online HH'], marker='o', linestyle='-')
plt.title('Indexed Adjusted Online HH Spend Over Time (Base: 2019 Average = 100)')
plt.xlabel('Time Period')
plt.ylabel('Indexed Spend')
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.savefig("indexed_spend_plot.png")
plt.show()


# Save to CSV
df.to_csv("Indexed_Adjusted_Online_HH_mcg.csv", index=False)

# Output the path to the saved file
"Indexed_Adjusted_Online_HH_mcg.csv"


In [None]:
# Adjusted Indexed Spend Online HH All
# Indexed card spending data (average 2019 equals 100) is calculated :
# Spend=(Adjusted Spend / Average Adjusted Spend in 2019) × 100
#Indexed Spend = (adjusted_Online_spend / Average Adjusted Online Spend in 2019 (which is 2019Q1 - 2019Q4 ave)) × 100

import pandas as pd
import matplotlib.pyplot as plt

# Read the CSV file
df = pd.read_csv("Adjusted_Online_HH_Spend.csv")

# Filter rows for 2019Q1 to 2019Q4
df_2019 = df[df['time_period_value'].isin(["2019Q1", "2019Q2", "2019Q3", "2019Q4"])]

# Calculate the average adjusted_F2F_spend for 2019
base_avg = df_2019['adjusted_Online_spend'].mean()

# Compute Indexed Spend
df['Indexed Spend Online HH'] = (df['adjusted_Online_spend'] / base_avg) * 100

# Plot the Indexed Spend over time
plt.figure(figsize=(12, 6))
plt.plot(df['time_period_value'], df['Indexed Spend Online HH'], marker='o', linestyle='-')
plt.title('Indexed Adjusted Online Spend Over Time (Base: 2019 Average = 100)')
plt.xlabel('Time Period')
plt.ylabel('Indexed Spend')
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.savefig("indexed_spend_plot.png")
plt.show()


# Save to CSV
df.to_csv("Indexed_Adjusted_Online_HH.csv", index=False)

# Output the path to the saved file
"Indexed_Adjusted_Online_HH.csv"


In [None]:
# Indexed Line Chart for UK Online vs F2F Totals - Adjusted Indexed Value --- mcg = ' All'

import pandas as pd
import matplotlib.pyplot as plt

# Load the CSV files
online_df = pd.read_csv("Indexed_Adjusted_Online_HH_mcg.csv")
f2f_df = pd.read_csv("Indexed_Adjusted_F2F_HH_mcg.csv")

# Plot the line chart
plt.figure(figsize=(12, 6))
plt.plot(online_df['time_period_value'], online_df['Indexed Spend Online HH'], label='Online HH', marker='o')
plt.plot(f2f_df['time_period_value'], f2f_df['Indexed Spend F2F HH'], label='F2F HH', marker='s')

# Customize the plot
plt.title('Indexed Spend Comparison: Online vs F2F Households')
plt.xlabel('Time Period')
plt.ylabel('Indexed Spend (Base 2019 = 100)')
plt.xticks(rotation=45)
plt.legend()
plt.grid(True)
plt.tight_layout()

# Save the plot
plt.savefig("indexed_spend_comparison.png")
plt.show()



In [None]:
# Indexed Line Chart for UK Online vs F2F Totals - Adjusted Indexed Value

import pandas as pd
import matplotlib.pyplot as plt

# Load the CSV files
online_df = pd.read_csv("Indexed_Adjusted_Online_HH.csv")
f2f_df = pd.read_csv("Indexed_Adjusted_F2F_HH.csv")

# Plot the line chart
plt.figure(figsize=(12, 6))
plt.plot(online_df['time_period_value'], online_df['Indexed Spend Online HH'], label='Online HH', marker='o')
plt.plot(f2f_df['time_period_value'], f2f_df['Indexed Spend F2F HH'], label='F2F HH', marker='s')

# Customize the plot
plt.title('Indexed Spend Comparison: Online vs F2F Households')
plt.xlabel('Time Period')
plt.ylabel('Indexed Spend (Base 2019 = 100)')
plt.xticks(rotation=45)
plt.legend()
plt.grid(True)
plt.tight_layout()

# Save the plot
plt.savefig("indexed_spend_comparison.png")
plt.show()

