In [1]:
import pandas as pd
import altair as alt
alt.data_transformers.enable("vegafusion")

DataTransformerRegistry.enable('vegafusion')

In [2]:
data = pd.read_csv('../data/processed/processed_data.csv')
data.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country,Revenue
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01,2.55,17850.0,United Kingdom,15.3
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01,3.39,17850.0,United Kingdom,20.34
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01,2.75,17850.0,United Kingdom,22.0
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01,3.39,17850.0,United Kingdom,20.34
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01,3.39,17850.0,United Kingdom,20.34


In [11]:
# group description by revenue then get the top 10 products
# used ChatGPT to debug
product_revenue = (data
    .groupby('Description')['Revenue']
    .sum()
    .sort_values(ascending=False)
    .head(10)
    .reset_index())
product_revenue

Unnamed: 0,Description,Revenue
0,DOTCOM POSTAGE,206245.48
1,REGENCY CAKESTAND 3 TIER,164762.19
2,WHITE HANGING HEART T-LIGHT HOLDER,99668.47
3,PARTY BUNTING,98302.98
4,JUMBO BAG RED RETROSPOT,92356.03
5,RABBIT NIGHT LIGHT,66756.59
6,POSTAGE,66230.64
7,PAPER CHAIN KIT 50'S CHRISTMAS,63791.94
8,ASSORTED COLOUR BIRD ORNAMENT,58959.73
9,CHILLI LIGHTS,53768.06


In [12]:
# plot bar chart
bar_chart = alt.Chart(product_revenue).mark_bar().encode(
    x=alt.X('Revenue:Q', title='Revenue'),
    y=alt.Y('Description:N', sort='-x', title='Product Description'),
    color=alt.Color('Description:N', scale=alt.Scale(scheme='pastel2')),
    tooltip=['Description', 'Revenue'] # for interactivity
).properties(
    title='Top 10 Products by Revenue',
    width=600,
    height=300
)
bar_chart

In [13]:
# convert the above into a function for better usability
# used chatGPT for docstring
def plot_top_products_revenue(data, n_products=10):
    """
    Create a bar chart of top products by revenue using Altair.
    
    Parameters:
    -----------
    data : pandas.DataFrame
        Input DataFrame containing 'Description' and 'Revenue' columns
    n_products : int, optional
        Number of top products to display (default: 10)
        
    Returns:
    --------
    altair.Chart
        Bar chart showing top products by revenue
    """
    
    # group description by revenue then get the top 10 products
    product_revenue = (data
        .groupby('Description')['Revenue']
        .sum()
        .sort_values(ascending=False)
        .head(n_products) # adjust for function
        .reset_index())
    
    # plot the bar chart
    bar_chart = alt.Chart(product_revenue).mark_bar().encode(
        x=alt.X('Revenue:Q', title='Revenue (Â£)'),
        y=alt.Y('Description:N', sort='-x', title='Product Description'),
        color=alt.Color('Description:N', scale=alt.Scale(scheme='pastel2')),
        tooltip=['Description', 'Revenue']
    ).properties(
        title=f'Top {n_products} Products by Revenue', # adjust for function
        width=600,
        height=300
    )
    
    return bar_chart

In [14]:
chart = plot_top_products_revenue(data)
chart