In [None]:
project_path = "/home/jupyter"
import os
import sys
sys.path.append(project_path)
from google.cloud import bigquery, storage

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px

from fintrans_toolbox.src import bq_utils as bq

client = bigquery.Client()

In [None]:
# Summarise the data by country
UK_spending_by_country = '''SELECT time_period_value, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcg = 'All' 
and merchant_channel = 'Online' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
and destination_country != 'UNITED KINGDOM' 
GROUP BY destination_country, 
time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''
df_by_country = bq.read_bq_table_sql(client, UK_spending_by_country)
df_by_country.head()


In [None]:
# Assign value and percentage change variables
df_by_country = df_by_country.sort_values(['destination_country','time_period_value'])
df_by_country['pct_change'] = df_by_country.groupby(['destination_country']).spend.pct_change()
df_by_country['value_change'] = df_by_country.groupby(['destination_country']).spend.diff()


In [None]:
# Plot stacked bar chart with all values
df_by_country[['time_period_value','destination_country','spend']].groupby(['time_period_value','destination_country']).sum().unstack().plot(kind = 'bar', stacked = 'True')

In [None]:
# Plot line chart using plotly express for all countries
pfig2 = px.line(
        df_by_country,
        x="time_period_value",
        y="spend",
        color="destination_country")
pfig2

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# Grouping the data for the plot and saving the result to a CSV file
grouped_data = df_by_country[['time_period_value', 'destination_country', 'spend']].groupby(['time_period_value', 'destination_country']).sum().unstack()

# Save the aggregated data to a CSV file
csv_filename = '/home/jupyter/uk_spending_by_country.csv'
grouped_data.to_csv(csv_filename)

# Now plot the stacked bar chart
plt.figure(figsize=(12, 8))
grouped_data.plot(kind='bar', stacked=True, figsize=(12, 8))

# Adding labels and title
plt.title('UK Spending by Country (Online - All Cards)', fontsize=16)
plt.xlabel('Time Period', fontsize=12)
plt.ylabel('Spending', fontsize=12)
plt.legend(title='Destination Country', bbox_to_anchor=(1.05, 1), loc='upper left')

# Saving the plot as a PNG image
png_filename = '/home/jupyter/uk_spending_by_country.png'
plt.tight_layout()  # Ensures that everything fits well
plt.savefig(png_filename)

# Show the plot (optional)
plt.show()

# Return file paths for reference
csv_filename, png_filename

In [None]:
# Select some contries to plot and reduce dataframe - Non-EU Countries
countries = ['AUSTRALIA',
'INDIA',
'REST OF  ASIAPAC',
'REST OF  C.E.M.E.A.',
'REST OF  CANADA',
'REST OF  EUROPE',
'REST OF  LAT.AM.',
'SOUTH AFRICA',
'SWITZERLAND',
'UNITED ARAB EMIRATES',
'UNITED STATES OF AMERICA']
plot_df = df_by_country[['time_period_value','destination_country','spend']].loc[df_by_country['destination_country'].isin(countries) ]

In [None]:
#stacked bar chart using pandas plot
plot_df.groupby(['time_period_value','destination_country']).sum().unstack().plot(kind = 'bar', stacked = 'True')

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# List of countries to plot
countries = ['AUSTRALIA',
 'INDIA',
 'REST OF  ASIAPAC',
 'REST OF  C.E.M.E.A.',
 'REST OF  CANADA',
 'REST OF  EUROPE',
 'REST OF  LAT.AM.',
 'SOUTH AFRICA',
 'SWITZERLAND',
 'UNITED ARAB EMIRATES',
 'UNITED STATES OF AMERICA']

# Filter the dataframe for the selected countries
plot_df = df_by_country[['time_period_value', 'destination_country', 'spend']].loc[df_by_country['destination_country'].isin(countries)]

# Group by time_period_value and destination_country and aggregate the spend
plot_df_grouped = plot_df.groupby(['time_period_value', 'destination_country']).sum().reset_index()

# Pivot the dataframe to have time_period_value as rows and countries as columns
pivot_df = plot_df_grouped.pivot(index='time_period_value', columns='destination_country', values='spend')

# Plot the line chart
plt.figure(figsize=(14, 8))
pivot_df.plot(kind='line', marker='o', figsize=(14, 8), linestyle='-', linewidth=2)

# Adding labels and title
plt.title('UK Spending by Selected Countries (Online - All Cards)', fontsize=16)
plt.xlabel('Time Period', fontsize=12)
plt.ylabel('Spending', fontsize=12)

# Show the plot
plt.grid(True)
plt.legend(title='Destination Country', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()  # Ensures that everything fits well in the figure

# Display the chart
plt.show()

In [None]:
pip install kaleido

In [None]:
# Line chart using plotly express
pfig = px.line(
        plot_df,
        x="time_period_value",
        y="spend",
        color="destination_country")
pfig

In [None]:
# Save the figure as a PNG image
pfig2.write_image("line_chartoutsideuk1.png")

In [None]:

import plotly.express as px

# List of countries to plot
countries = [
    'AUSTRALIA',
    'INDIA',
    'REST OF  ASIAPAC',
    'REST OF  C.E.M.E.A.',
    'REST OF  CANADA',
    'REST OF  EUROPE',
    'REST OF  LAT.AM.',
    'SOUTH AFRICA',
    'SWITZERLAND',
    'UNITED ARAB EMIRATES',
    'UNITED STATES OF AMERICA'
]

# Filter the dataframe to include only the selected countries
plot_df = df_by_country[['time_period_value', 'destination_country', 'spend']].loc[df_by_country['destination_country'].isin(countries)]

# Create a line chart using Plotly Express
pfig = px.line(
    plot_df,  # DataFrame containing the data
    x="time_period_value",  # X-axis: Time Periods
    y="spend",  # Y-axis: Spending
    color="destination_country",  # Different lines for each country
    title="UK Spending by Selected Countries (Online - All Cards)",  # Title of the plot
    labels={"spend": "Spending", "time_period_value": "Time Period"},  # Axis labels
    line_shape="linear",  # Line shape (you can choose 'linear', 'spline', etc.)
)

# Customize the layout for better readability
pfig.update_layout(
    xaxis_title="Time Period",
    yaxis_title="Spending",
    legend_title="Destination Country",
    template="plotly_white",  # Clean theme
    hovermode="x unified"  # Hover mode for better comparison of values at each time period
)

# Show the figure
pfig.show()

In [None]:
import plotly.express as px

# List of countries to plot
countries = [
    'AUSTRALIA',
    'INDIA',
    'REST OF  ASIAPAC',
    'REST OF  C.E.M.E.A.',
    'REST OF  CANADA',
    'REST OF  EUROPE',
    'REST OF  LAT.AM.',
    'SOUTH AFRICA',
    'SWITZERLAND',
    'UNITED ARAB EMIRATES',
    'UNITED STATES OF AMERICA'
]

# Filter the dataframe to include only the selected countries
plot_df = df_by_country[['time_period_value', 'destination_country', 'spend']].loc[df_by_country['destination_country'].isin(countries)]

# Check for missing values in the filtered dataframe
missing_countries_in_plot = [country for country in countries if country not in plot_df['destination_country'].unique()]
print("Missing countries in plot:", missing_countries_in_plot)

# Create a line chart using Plotly Express
pfig = px.line(
    plot_df,  # DataFrame containing the data
    x="time_period_value",  # X-axis: Time Periods
    y="spend",  # Y-axis: Spending
    color="destination_country",  # Different lines for each country
    title="UK Spending by Selected Countries (Online - All Cards)",  # Title of the plot
    labels={"spend": "Spending", "time_period_value": "Time Period"},  # Axis labels
    line_shape="linear",  # Line shape (you can choose 'linear', 'spline', etc.)
)

# Customize the layout for better readability
pfig.update_layout(
    xaxis_title="Time Period",
    yaxis_title="Spending",
    legend_title="Destination Country",
    template="plotly_white",  # Clean theme
    hovermode="x unified"  # Hover mode for better comparison of values at each time period
)

# Show the figure
pfig.show()

In [None]:
# List of missing countries (that we were trying to plot but are not appearing)
missing_countries = [
    'REST OF  ASIAPAC', 'REST OF  C.E.M.E.A.', 'REST OF  CANADA', 
    'REST OF  EUROPE', 'REST OF  LAT.AM.'
]

# Filter for the missing countries and create a separate plot
missing_plot_df = df_by_country[df_by_country['destination_country'].isin(missing_countries)]

# Plot the missing countries only to inspect if there are any data points
import plotly.express as px

pfig_missing = px.line(
    missing_plot_df,
    x="time_period_value",
    y="spend",
    color="destination_country",
    title="Spending by Missing Countries",
    labels={"spend": "Spending", "time_period_value": "Time Period"}
)

# Show the plot for missing countries
pfig_missing.show()

In [None]:
# Selected Destination Countries 

import pandas as pd

# List of countries to filter
selected_countries = [
    'REST OF  ASIAPAC', 'REST OF  C.E.M.E.A.', 'REST OF  CANADA',
    'REST OF  EUROPE', 'REST OF  LAT.AM.'
]

# Filter the data for the selected countries
filtered_data = df_by_country[df_by_country['destination_country'].isin(selected_countries)]

# Group the data by 'time_period_value' and 'destination_country', summing the 'spend'
grouped_data_selected = filtered_data[['time_period_value', 'destination_country', 'spend']].groupby(['time_period_value', 'destination_country']).sum().unstack()

# Save the aggregated data to a CSV file
csv_filename_selected = '/home/jupyter/uk_spending_selected_countries.csv'
grouped_data_selected.to_csv(csv_filename_selected)

# Display the table for inspection (optional)
print(grouped_data_selected)

# Return the CSV filename for reference
csv_filename_selected

In [None]:
# Summarise the UK Cardholder Online Abroad Spending Quarterly Total data by country - USA
UK_spending_by_country4 = '''SELECT time_period_value, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
where time_period = 'Quarter' 
and mcg = 'All' 
and merchant_channel = 'Online' 
and cardholder_origin_country = 'All' 
and cardholder_origin = 'UNITED KINGDOM' 
and destination_country = 'UNITED STATES OF AMERICA' 
GROUP BY destination_country, 
time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''
df_by_country4 = bq.read_bq_table_sql(client, UK_spending_by_country4)
df_by_country4.head()

In [None]:
# Run the query and get the data into df_by_country4
UK_spending_by_country4 = '''SELECT time_period_value, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel` 
WHERE time_period = 'Quarter' 
  AND mcg = 'All' 
  AND merchant_channel = 'Online' 
  AND cardholder_origin_country = 'All' 
  AND cardholder_origin = 'UNITED KINGDOM' 
  AND destination_country = 'UNITED STATES OF AMERICA' 
GROUP BY destination_country, time_period_value, spend 
ORDER BY time_period_value, destination_country DESC'''

# Fetch the data
df_by_country4 = bq.read_bq_table_sql(client, UK_spending_by_country4)

# Check if the DataFrame is not empty
if df_by_country4 is not None and not df_by_country4.empty:
    # Export to CSV
    df_by_country4.to_csv('uk_spending_to_usa_online_2019_2024.csv', index=False)
    print("CSV file created successfully!")
else:
    print("No data returned from the query. Please check your query.")
