In [None]:
project_path = "/home/jupyter"
import os
import sys
sys.path.append(project_path)
from google.cloud import bigquery, storage

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px

from fintrans_toolbox.src import bq_utils as bq

client = bigquery.Client()

In [None]:
# Summarise the data by UK Cardholder Domestic Spending All

UK_spending_by_Dom_All = '''SELECT time_period_value, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcc = 'All' 
and mcg = 'All' 
and merchant_channel = 'All' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
and destination_country = 'UNITED KINGDOM' 
GROUP BY destination_country, 
time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''
df_by_Dom_All = bq.read_bq_table_sql(client, UK_spending_by_Dom_All)
df_by_Dom_All.head()

#Caculate UK Domestic Total Spending

import pandas as pd

# Assuming df_by_Dom_All is the DataFrame with your data
# Ensure 'time_period_value' is a string type and split it to get the year (assuming 'Q1', 'Q2', etc., are part of the time_period_value)

# Extract the year from the time_period_value (assuming it's in the format like '2023-Q1', '2023-Q2', etc.)
df_by_Dom_All['year'] = df_by_Dom_All['time_period_value'].str[:4].astype(int)

# Now group by year and sum the spend for each year
df_yearly_Dom_All = df_by_Dom_All.groupby('year')['spend'].sum().reset_index()

# Optionally, you can sort the result by year
df_yearly_Dom_All = df_yearly_Dom_All.sort_values(by='year')

# Display the yearly totals
print(df_yearly_Dom_All)

In [None]:
df_yearly_Dom_All.to_csv('UK_yearly_Dom_All.csv')

In [None]:
# Summarise the data by UK Cardholder Domestic Online Spending All

UK_spending_by_Dom_Online_All = '''SELECT time_period_value, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcg = 'All' 
and merchant_channel = 'Online' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
and destination_country = 'UNITED KINGDOM' 
GROUP BY destination_country, 
time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''
df_by_Dom_Online_All = bq.read_bq_table_sql(client, UK_spending_by_Dom_Online_All)
# Rename the 'spend' column to 'online_spend'
df_by_Dom_Online_All = df_by_Dom_Online_All.rename(columns={'spend': 'dom_online_spend'})
df_by_Dom_Online_All.head()

# Yearly UK Cardholder Domestic Online Spending All

import pandas as pd

# Assuming df_by_Dom_Online_All is the DataFrame with your data
# Ensure 'time_period_value' is a string type and split it to get the year (assuming 'Q1', 'Q2', etc., are part of the time_period_value)

# Extract the year from the time_period_value (assuming it's in the format like '2023-Q1', '2023-Q2', etc.)
df_by_Dom_Online_All['year'] = df_by_Dom_Online_All['time_period_value'].str[:4].astype(int)

# Now group by year and sum the spend for each year
df_yearly_Dom_Online_All = df_by_Dom_Online_All.groupby('year')['dom_online_spend'].sum().reset_index()

# Optionally, you can sort the result by year
df_yearly_Dom_Online_All = df_yearly_Dom_Online_All.sort_values(by='year')

# Display the yearly totals
print(df_yearly_Dom_Online_All)

In [None]:
df_yearly_Dom_Online_All.to_csv('UK_yearly_Dom_Online_All.csv')

In [None]:
# Bar chart Yearly Domestic Online Spend by UK Cardholders

import pandas as pd
import plotly.express as px

# Step 1: Load the data from the CSV file
df_yearly_spend1 = pd.read_csv('UK_yearly_Dom_Online_All.csv')

# Step 2: Create the Bar Chart
fig_bar = px.bar(df_yearly_spend1, 
                 x='year', 
                 y='dom_online_spend', 
                 title='Yearly Domestic Online Spend by UK Cardholders',
                 labels={'dom_online_spend': 'Online Spend (£)', 'year': 'Year'},
                 color='year',  # Color bars by year
                 text='dom_online_spend')  # Display value on top of bars

# Customize the chart: Add text outside the bars
fig_bar.update_traces(texttemplate='%{text:.2f}', textposition='outside', marker=dict(line=dict(width=1, color='black')))

# Update the y-axis range to zoom in
fig_bar.update_layout(
    yaxis=dict(
        range=[0, df_yearly_spend1['dom_online_spend'].max() * 1.1]  # Add a small buffer above the maximum value
    ),
    xaxis=dict(tickmode='linear')
)

# Show the bar chart
fig_bar.show()


In [None]:
# Create the Line Chart
fig_line = px.line(df_yearly_spend1, 
                   x='year', 
                   y='dom_online_spend', 
                   title='Yearly Domestic Online Spend by UK Cardholders (Line Chart)',
                   labels={'dom_online_spend': 'Online Spend (£)', 'year': 'Year'},
                   markers=True)  # Add markers at each data point

# Customize the line chart: Add thicker line and larger markers
fig_line.update_traces(line=dict(width=3), marker=dict(size=7, color='red'))
fig_line.update_layout(xaxis=dict(tickmode='linear'), yaxis=dict(title='Online Spend (£)'))

# Show the line chart
fig_line.show()


In [None]:
# UK Domestic Online Quarterly Bar Chart  ----------------- Method 1

# Assign value and percentage change variables
df_by_Dom_Online_All = df_by_Dom_Online_All.sort_values(['destination_country','time_period_value'])
df_by_Dom_Online_All['pct_change'] = df_by_Dom_Online_All.groupby(['destination_country']).dom_online_spend.pct_change()
df_by_Dom_Online_All['value_change'] = df_by_Dom_Online_All.groupby(['destination_country']).dom_online_spend.diff()

# Plot stacked bar chart with all values
df_by_Dom_Online_All[['time_period_value','destination_country','dom_online_spend']].groupby(['time_period_value','destination_country']).sum().unstack().plot(kind = 'bar', stacked = 'True')

In [None]:
# Plot line chart using plotly express for all countries
pfig2 = px.line(
        df_by_Dom_Online_All,
        x="time_period_value",
        y="dom_online_spend",
        color="destination_country")
pfig2

In [None]:
# UK Domestic Online Quarterly Bar Chart  ----------------- Method 2

# Select some contries to plot and reduce dataframe
countries = ['UNITED KINGDOM']
plot_df = df_by_Dom_Online_All[['time_period_value','destination_country','dom_online_spend']].loc[df_by_Dom_Online_All['destination_country'].isin(countries) ]

#stacked bar chart using pandas plot
plot_df.groupby(['time_period_value','destination_country']).sum().unstack().plot(kind = 'bar', stacked = 'True')

In [None]:
# Compare UK Domestic Yearly Total Spend vs Online Spend ---------------- Method 1

import pandas as pd
import matplotlib.pyplot as plt

# Load the CSV files
online_df = pd.read_csv("UK_yearly_Dom_Online_All.csv")
all_df = pd.read_csv("UK_yearly_Dom_All.csv")

# Plotting the line chart
plt.figure(figsize=(10, 6))
plt.plot(online_df['year'], online_df['dom_online_spend'], marker='o', label='Domestic Online Spend')
plt.plot(all_df['year'], all_df['spend'], marker='o', label='Total Domestic Spend')

# Adding labels and title
plt.xlabel('Year')
plt.ylabel('Spend (in currency units)')
plt.title('Comparison of Domestic Online Spend vs Total Domestic Spend in the UK')
plt.legend()
plt.grid(True)
plt.tight_layout()

# Save the plot
plt.savefig("domestic_spend_comparison.png")
plt.show()



In [None]:
# Compare UK Domestic Yearly Total Spend vs Online Spend ---------------- Method 2

import pandas as pd
import plotly.express as px

# Load the CSV files
df_online = pd.read_csv("UK_yearly_Dom_Online_All.csv")
df_total = pd.read_csv("UK_yearly_Dom_All.csv")

# Merge the dataframes on 'year'
df = pd.merge(df_total, df_online, on='year')

# Rename columns for better legend labels
df = df.rename(columns={
    'spend': 'Domestic Total Spend',
    'dom_online_spend': 'Domestic Online Spend'
})

# Create the line chart
fig = px.line(
    df,
    x='year',
    y=['Domestic Total Spend', 'Domestic Online Spend'],
    title='UK Domestic Yearly Total Spend vs Online Spend',
    labels={'year': 'Year', 'value': 'Spend (£)', 'variable': 'Spending Type'},
    markers=True
)

# Customize the chart
fig.update_traces(line=dict(width=3), marker=dict(size=7))
fig.update_layout(xaxis=dict(tickmode='linear'))

# Show the chart
fig.show()




In [None]:
# Bar Chart UK Domestic Online Spend to Total Spend Ratio 2019 - 2025

import pandas as pd
import plotly.express as px

# Load data from CSV files
df_total_spend = pd.read_csv("UK_yearly_Dom_All.csv")
df_online_spend = pd.read_csv("UK_yearly_Dom_Online_All.csv")

# Merge the dataframes on 'year'
df = pd.merge(df_total_spend, df_online_spend, on='year')

# Rename columns for clarity
df.rename(columns={'spend': 'total_spend', 'dom_online_spend': 'online_spend'}, inplace=True)

# Calculate the ratio of online spend to total spend for each year
df['online_total_ratio'] = df['online_spend'] / df['total_spend']

# Create the bar chart with the ratio figures on top of each bar
fig_bar = px.bar(df, 
                 x='year', 
                 y='online_total_ratio', 
                 title='UK Domestic Online Spend to Total Spend Ratio 2019 - 2025',
                 labels={'online_total_ratio': 'Online Spend to Total Spend Ratio', 'year': 'Year'},
                 color='online_total_ratio',
                 color_continuous_scale='Viridis',
                 text='online_total_ratio')

# Customize the bar chart appearance
fig_bar.update_layout(
    xaxis=dict(tickmode='linear'),
    yaxis=dict(title='Ratio', range=[0, df['online_total_ratio'].max() + 0.1])
)

# Update the trace to adjust the text position and formatting
fig_bar.update_traces(
    texttemplate='%{text:.2f}',
    textposition='outside',
    textfont_size=12,
    textfont_color='black'
)

# Show the bar chart
fig_bar.show()

