In [None]:
project_path = "/home/jupyter"
import os
import sys
sys.path.append(project_path)
from google.cloud import bigquery, storage

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px

from fintrans_toolbox.src import bq_utils as bq

In [None]:
client = bigquery.Client()

In [None]:
# Summarise the data by country
UK_spending_by_country3a = '''SELECT time_period_value, destination_country, spend FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` where time_period = 'Quarter' and mcg = 'All' and merchant_channel = 'All' and cardholder_origin_country = 'All' and cardholder_origin = 'UNITED KINGDOM' and destination_country = 'UNITED KINGDOM' GROUP BY destination_country, 
time_period_value, spend ORDER BY time_period_value, destination_country DESC'''
df_by_country3a = bq.read_bq_table_sql(client, UK_spending_by_country3a)
df_by_country3a.head()

In [None]:
import pandas as pd

# Assuming df_by_country3a is the DataFrame with your data
# Ensure 'time_period_value' is a string type and split it to get the year (assuming 'Q1', 'Q2', etc., are part of the time_period_value)

# Extract the year from the time_period_value (assuming it's in the format like '2023-Q1', '2023-Q2', etc.)
df_by_country3a['year'] = df_by_country3a['time_period_value'].str[:4].astype(int)

# Now group by year and sum the spend for each year
df_yearly_spend = df_by_country3a.groupby('year')['spend'].sum().reset_index()

# Optionally, you can sort the result by year
df_yearly_spend = df_yearly_spend.sort_values(by='year')

# Display the yearly totals
print(df_yearly_spend)

In [None]:
df_by_country3a.to_csv('UK_spending_by_country3a.csv')

In [None]:
# Display the yearly totals UK Spent in UK
df_yearly_spend.to_csv('UK_yearly_spend_country3a.csv')

In [None]:
# Step 3: Create the Bar Chart
fig_bar = px.bar(df_yearly_spend, 
                 x='year', 
                 y='spend', 
                 title='Yearly Total Spend in the UK (2019-2024)',
                 labels={'spend': 'Total Spend (£)', 'year': 'Year'},
                 color='year',  # Color bars by year
                 text='spend')  # Display spend values on top of bars

# Customize the chart
fig_bar.update_traces(texttemplate='%{text:.2f}', textposition='outside', marker=dict(line=dict(width=1, color='black')))
fig_bar.update_layout(xaxis=dict(tickmode='linear'), yaxis=dict(title='Total Spend (£)'))

# Show the bar chart
fig_bar.show()


In [None]:
import pandas as pd
import plotly.express as px

# Step 1: Load the CSV data into a DataFrame
df_yearly_spend = pd.read_csv('UK_yearly_spend_country3a.csv')

# Step 2: Create the Bar Chart
fig_bar = px.bar(df_yearly_spend, 
                 x='year', 
                 y='spend', 
                 title='Yearly Total Spend in the UK (2019-2024)',
                 labels={'spend': 'Total Spend (£)', 'year': 'Year'},
                 color='year',  # Color bars by year
                 text='spend')  # Display spend values on top of bars

# Customize the chart: Zoom in by setting the Y-axis range
fig_bar.update_traces(texttemplate='%{text:.2f}', textposition='outside', marker=dict(line=dict(width=1, color='black')))

# Adjust the Y-axis range to zoom in a bit
fig_bar.update_layout(
    xaxis=dict(tickmode='linear'),
    yaxis=dict(
        title='Total Spend (£)', 
        range=[0, df_yearly_spend['spend'].max() * 1.1]  # Set the max value a bit higher (10% above the max spend)
    )
)

# Show the bar chart
fig_bar.show()


In [None]:
# Step 4: Create the Line Chart
fig_line = px.line(df_yearly_spend, 
                   x='year', 
                   y='spend', 
                   title='Yearly Total Spend in the UK (2019-2024) - Line Chart',
                   labels={'spend': 'Total Spend (£)', 'year': 'Year'},
                   markers=True)  # Mark data points on the line

# Customize the line chart
fig_line.update_traces(line=dict(width=3), marker=dict(size=8, color='red'))  # Line width and point style

# Show the line chart
fig_line.show()


In [None]:
import pandas as pd
import plotly.express as px

# Step 1: Load the CSV data into a DataFrame
df_yearly_spend = pd.read_csv('UK_yearly_spend_country3a.csv')

# Step 2: Create the Line Chart
fig_line = px.line(df_yearly_spend, 
                   x='year', 
                   y='spend', 
                   title='Yearly Total Spend in the UK (2019-2024) - Line Chart',
                   labels={'spend': 'Total Spend (£)', 'year': 'Year'},
                   markers=True)  # Mark data points on the line

# Customize the line chart
fig_line.update_traces(line=dict(width=3), marker=dict(size=8, color='red'))  # Line width and point style

# Adjust the Y-axis range to start from 0
fig_line.update_layout(
    yaxis=dict(
        title='Total Spend (£)',
        range=[0, df_yearly_spend['spend'].max() * 1.1]  # Ensure Y-axis starts from 0 and adds a 10% margin above the max spend
    )
)

# Show the line chart
fig_line.show()


In [None]:
# Summarise the data by country
UK_spending_by_country3 = '''SELECT time_period_value, destination_country, spend FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` where time_period = 'Quarter' and mcg = 'All' and merchant_channel = 'Online' and cardholder_origin_country = 'All' and cardholder_origin = 'UNITED KINGDOM' and destination_country = 'UNITED KINGDOM' GROUP BY destination_country, 
time_period_value, spend ORDER BY time_period_value, destination_country DESC'''
df_by_country3 = bq.read_bq_table_sql(client, UK_spending_by_country3)
# Rename the 'spend' column to 'online_spend'
df_by_country3 = df_by_country3.rename(columns={'spend': 'online_spend'})
df_by_country3.head()


In [None]:
import pandas as pd

# Assuming df_by_country3a is the DataFrame with your data
# Ensure 'time_period_value' is a string type and split it to get the year (assuming 'Q1', 'Q2', etc., are part of the time_period_value)

# Extract the year from the time_period_value (assuming it's in the format like '2023-Q1', '2023-Q2', etc.)
df_by_country3['year'] = df_by_country3['time_period_value'].str[:4].astype(int)

# Now group by year and sum the spend for each year
df_yearly_spend1 = df_by_country3.groupby('year')['online_spend'].sum().reset_index()

# Optionally, you can sort the result by year
df_yearly_spend1 = df_yearly_spend1.sort_values(by='year')

# Display the yearly totals
print(df_yearly_spend1)

In [None]:
df_yearly_spend1.to_csv('UK_yearly_spend_Online.csv')

In [None]:
import pandas as pd

# Read the online spend CSV
df_online_spend = pd.read_csv('UK_yearly_spend_Online.csv')

# Read the total spend CSV
df_total_spend = pd.read_csv('UK_yearly_spend_country3a.csv')

# Display the first few rows of each DataFrame to check the structure
print(df_online_spend.head())
print(df_total_spend.head())

In [None]:
# Merge the two DataFrames on 'year'
merged_spend = pd.merge(df_online_spend[['year', 'online_spend']], df_total_spend[['year', 'spend']], on='year', how='inner')

# Display the merged DataFrame to verify
print(merged_spend.head())


In [None]:
# Calculate the online spend ratio (as a percentage)
merged_spend['online_spend_ratio'] = (merged_spend['online_spend'] / merged_spend['spend']) * 100

# Display the DataFrame with the new ratio column
print(merged_spend[['year', 'online_spend_ratio']])


In [None]:
# Save the result to a new CSV file
merged_spend.to_csv('UK_yearly_online_spend_ratio.csv', index=False)

# Display a success message
print("The online spend ratio has been saved to 'UK_yearly_online_spend_ratio.csv'.")


In [None]:
pip install plotly


In [None]:
import pandas as pd
import plotly.express as px

# Step 1: Read the CSV files for online spend and total spend
df_online_spend = pd.read_csv('UK_yearly_spend_Online.csv')
df_total_spend = pd.read_csv('UK_yearly_spend_country3a.csv')

# Step 2: Merge the data on 'year'
merged_spend = pd.merge(df_online_spend[['year', 'online_spend']], df_total_spend[['year', 'spend']], on='year', how='inner')

# Step 3: Calculate the online spend ratio (as a percentage)
merged_spend['online_spend_ratio'] = (merged_spend['online_spend'] / merged_spend['spend']) * 100

# Step 4: Create the Bar Chart
fig_bar = px.bar(merged_spend, 
                 x='year', 
                 y='online_spend_ratio', 
                 title='Online Spend Ratio by Year', 
                 labels={'online_spend_ratio': 'Online Spend Ratio (%)', 'year': 'Year'}, 
                 color='year', 
                 text='online_spend_ratio')

# Customize the bar chart
fig_bar.update_traces(texttemplate='%{text:.2f}%', textposition='outside', marker=dict(line=dict(width=1, color='black')))
fig_bar.update_layout(xaxis=dict(tickmode='linear'), yaxis=dict(range=[0, 100]))

# Show the bar chart
fig_bar.show()

# Step 5: Create the Line Chart
fig_line = px.line(merged_spend, 
                   x='year', 
                   y='online_spend_ratio', 
                   title='Online Spend Ratio by Year (Line Chart)', 
                   labels={'online_spend_ratio': 'Online Spend Ratio (%)', 'year': 'Year'},
                   markers=True)

# Customize the line chart
fig_line.update_traces(line=dict(width=3), marker=dict(size=7, color='red'))
fig_line.update_layout(xaxis=dict(tickmode='linear'), yaxis=dict(range=[0, 100]))

# Show the line chart
fig_line.show()


In [None]:
# Step 1: Load the CSV data into a DataFrame
df_online_spend_ratio = pd.read_csv('UK_yearly_online_spend_ratio.csv')

# Step 2: Calculate the average online spend ratio
average_online_spend_ratio = df_online_spend_ratio['online_spend_ratio'].mean()

# Step 3: Print or display the result
print(f"Average Online Spend Ratio (2019-2024): {average_online_spend_ratio:.2f}%")
