In [None]:
project_path = "/home/jupyter"
import os
import sys
sys.path.append(project_path)
from google.cloud import bigquery, storage

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px

from fintrans_toolbox.src import bq_utils as bq

In [None]:
client = bigquery.Client()

In [None]:
# Summarise the data by country
UK_spending_by_country2 = '''SELECT time_period_value, cardholder_origin_country, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
WHERE time_period = 'Quarter' 
  AND mcg = 'All' 
  AND merchant_channel = 'Online' 
  AND cardholder_origin = 'International Cardholder'
  AND cardholder_origin_country != 'All'
  AND destination_country = 'UNITED KINGDOM' 
GROUP BY destination_country, time_period_value, cardholder_origin_country, spend 
ORDER BY time_period_value, spend DESC'''
df_by_country2 = bq.read_bq_table_sql(client, UK_spending_by_country2)
df_by_country2.head()

In [None]:
import pandas as pd

# Assuming df_by_country2 is already loaded with the data from BigQuery
# Group by time_period_value to sum the spend per quarter (this will be the Spend_Total)
df_by_country2['Spend_Total'] = df_by_country2.groupby('time_period_value')['spend'].transform('sum')

# Now df_by_country2 will have the original columns and the added Spend_Total for each row
df_by_country2.head()

In [None]:
import pandas as pd

# Assuming df_by_country2 is already loaded with the data from BigQuery
# Group by time_period_value and sum the spend
quarterly_spending = df_by_country2.groupby('time_period_value')['spend'].sum().reset_index()

# Rename the 'spend' column to 'Spend_Total'
quarterly_spending.rename(columns={'spend': 'Spend_Total'}, inplace=True)

# Display the result
print(quarterly_spending)

In [None]:
# Save the DataFrame to a CSV file
quarterly_spending.to_csv('quarterly_spending_intlcard.csv', index=False)

# Display a message confirming that the file has been saved
print("CSV file has been saved as 'quarterly_spending_intlcard.csv'")

In [None]:
df_by_country2.to_csv('UK_Spending_By_Country2.csv')

In [None]:
# Assign value and percentage change variables
df_by_country2 = df_by_country2.sort_values(['destination_country','time_period_value'])
df_by_country2['pct_change'] = df_by_country2.groupby(['destination_country']).spend.pct_change()
df_by_country2['value_change'] = df_by_country2.groupby(['destination_country']).spend.diff()


In [None]:
# Plot stacked bar chart with all values
df_by_country2[['time_period_value','destination_country','spend']].groupby(['time_period_value','destination_country']).sum().unstack().plot(kind = 'bar', stacked = 'True')

In [None]:
# Plot line chart using plotly express for all countries
pfig2 = px.line(
        df_by_country2,
        x="time_period_value",
        y="Spend_Total",
        color="destination_country")
pfig2

In [None]:
# Summarise the data by country
UK_spending_by_country2 = '''SELECT time_period_value, cardholder_origin_country, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
WHERE time_period = 'Quarter' 
  AND mcg = 'All' 
  AND merchant_channel = 'Online' 
  AND cardholder_origin = 'International Cardholder'
  AND cardholder_origin_country != 'All'
  AND destination_country = 'UNITED KINGDOM' 
GROUP BY destination_country, time_period_value, cardholder_origin_country, spend 
ORDER BY time_period_value, spend DESC
LIMIT 5'''
df_by_country2 = bq.read_bq_table_sql(client, UK_spending_by_country2)
df_by_country2.head()

In [None]:
# Summarise the data by country
UK_spending_by_country2 = '''SELECT time_period_value, cardholder_origin_country, destination_country, spend 
FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
WHERE time_period = 'Quarter' 
  AND mcg = 'All' 
  AND merchant_channel = 'Online' 
  AND cardholder_origin = 'International Cardholder'
  AND cardholder_origin_country != 'All'
  AND destination_country = 'UNITED KINGDOM' 
GROUP BY destination_country, time_period_value, cardholder_origin_country, spend 
ORDER BY time_period_value, spend DESC
'''
df_by_country2 = bq.read_bq_table_sql(client, UK_spending_by_country2)
df_by_country2.head()

In [None]:
# Assuming df_by_country2 contains the query result
df_top_5_countries = df_by_country2[['cardholder_origin_country', 'spend']].sort_values(by='spend', ascending=False).head(5)
df_top_5_countries

In [None]:
UK_spending_by_country2 = '''
WITH ranked_spend AS (
    SELECT 
        time_period_value, 
        cardholder_origin_country, 
        SUM(spend) AS total_spend,
        ROW_NUMBER() OVER (PARTITION BY time_period_value ORDER BY SUM(spend) DESC) AS rank
    FROM `ons-fintrans-data-prod.fintrans_visa.spend_origin_and_channel`
    WHERE time_period = 'Quarter' 
      AND mcg = 'All' 
      AND merchant_channel = 'Online' 
      AND cardholder_origin = 'International Cardholder'
      AND cardholder_origin_country != 'All'
      AND destination_country = 'UNITED KINGDOM'
    GROUP BY time_period_value, cardholder_origin_country
)
SELECT 
    time_period_value,
    cardholder_origin_country,
    total_spend
FROM ranked_spend
WHERE rank <= 5
ORDER BY time_period_value, rank
'''
# Execute the query and fetch the results into a pandas DataFrame
df_by_country2 = client.query(UK_spending_by_country2).to_dataframe()

# Display the first few rows of the DataFrame
df_by_country2.head()

In [None]:
import plotly.express as px

# Create a bar chart to show spend over time for each destination country
bar_chart = px.bar(
    df_by_country2,
    x="time_period_value",  # Time period on the x-axis
    y="spend",               # Spend value on the y-axis
    color="destination_country",  # Color bars by destination country
    title="Spending by Destination Country Over Time",
    labels={"time_period_value": "Time Period", "spend": "Spend"},
    barmode="group"  # Group bars by destination_country
)

# Show the bar chart
bar_chart.show()

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# Grouping the data for the plot and saving the result to a CSV file
grouped_data = df_by_country2[['time_period_value', 'destination_country', 'spend']].groupby(['time_period_value', 'destination_country']).sum().unstack()

# Save the aggregated data to a CSV file
csv_filename = '/home/jupyter/uk_spending_by_country.csv'
grouped_data.to_csv(csv_filename)

# Now plot the stacked bar chart
plt.figure(figsize=(12, 8))
grouped_data.plot(kind='bar', stacked=True, figsize=(12, 8))

# Adding labels and title
plt.title('UK Spending by Country (Online - All Cards)', fontsize=16)
plt.xlabel('Time Period', fontsize=12)
plt.ylabel('Spending', fontsize=12)
plt.legend(title='Destination Country', bbox_to_anchor=(1.05, 1), loc='upper left')

# Saving the plot as a PNG image
png_filename = '/home/jupyter/uk_spending_by_country.png'
plt.tight_layout()  # Ensures that everything fits well
plt.savefig(png_filename)

# Show the plot (optional)
plt.show()

# Return file paths for reference
csv_filename, png_filename

In [None]:
import pandas as pd
import plotly.express as px

# Aggregate the data by time_period_value, summing the 'spend' for each period
df_aggregated = df_by_country2.groupby("time_period_value", as_index=False)["spend"].sum()

# Create a simple line chart with the aggregated data
pfig2 = px.line(
    df_aggregated,
    x="time_period_value",
    y="spend",
    title="Total Spend Over Time (Aggregated)",
    labels={"time_period_value": "Time Period", "spend": "Total Spend"}
)

# Show the plot
pfig2.show()

In [None]:
import pandas as pd
import plotly.express as px

# Aggregate the data by time_period_value, summing the 'spend' for each period
df_aggregated = df_by_country2.groupby("time_period_value", as_index=False)["spend"].sum()

# Create a simple line chart with the aggregated data
pfig2 = px.line(
    df_aggregated,
    x="time_period_value",
    y="spend",
    title="Total Spend Over Time (Aggregated)",
    labels={"time_period_value": "Time Period", "spend": "Total Spend"}
)

# Update the x-axis to rotate the time period labels vertically
pfig2.update_layout(
    xaxis=dict(
        tickangle=90  # Rotate the x-axis labels to vertical (90 degrees)
    )
)

# Show the plot
pfig2.show()

In [None]:
import pandas as pd
import plotly.express as px

# Aggregate the data by time_period_value, summing the 'spend' for each period
df_aggregated = df_by_country2.groupby("time_period_value", as_index=False)["spend"].sum()

# Create a simple line chart with the aggregated data
pfig2 = px.line(
    df_aggregated,
    x="time_period_value",
    y="spend",
    title="Total Spend Over Time (Aggregated)",
    labels={"time_period_value": "Time Period", "spend": "Total Spend"}
)

# Update the x-axis to rotate the time period labels vertically facing left
pfig2.update_layout(
    xaxis=dict(
        tickangle=-90  # Rotate the x-axis labels to vertical, facing the left
    )
)

# Show the plot
pfig2.show()

In [None]:
import pandas as pd
import plotly.express as px

# Aggregate the data by time_period_value, summing the 'spend' for each period
df_aggregated = df_by_country2.groupby("time_period_value", as_index=False)["spend"].sum()

# Create a simple line chart with the aggregated data
pfig2 = px.line(
    df_aggregated,
    x="time_period_value",
    y="spend",
    title="Total Spend Over Time (Aggregated)",
    labels={"time_period_value": "Time Period", "spend": "Total Spend"}
)

# Update the layout to fix vertical labels and adjust the figure's height
pfig2.update_layout(
    xaxis=dict(
        tickangle=-90  # Rotate the x-axis labels to vertical, facing left
    ),
    height=500,  # Adjust the height to make the plot more reasonable
    title="Total Spend Over Time (Aggregated)"
)

# Show the plot
pfig2.show()

In [None]:
pfig2.update_layout(
    xaxis=dict(
        tickangle=-90
    ),
    height=500,
    margin={"t": 40, "b": 100, "l": 40, "r": 40}  # Adjust margins if necessary
)