### Imports

#### Import Packages

In [1]:
import polars as pl
import altair as alt
import numpy as np
from great_tables import GT
import gc

# alt.JupyterChart.enable_offline()
# alt.renderers.enable("jupyter", offline=True)

#### Import Panel Data

In [2]:
grocery_lf = pl.scan_csv(source="data/panel-datasets/edible_grocery.csv",
                         has_header=True,
                         separator=",",
                         schema={'panel_id': pl.Int32,
                                 'trans_id': pl.Int32,
                                 'week': pl.Int16,
                                 'sku_id': pl.Categorical,
                                 'units': pl.Int16,
                                 'price': pl.Float32,
                                 'brand': pl.Categorical})
grocery_lf.head().collect()

panel_id,trans_id,week,sku_id,units,price,brand
i32,i32,i16,cat,i16,f32,cat
3102011,1569,6,"""19""",1,2.79,"""Alpha"""
3102012,4301,15,"""15""",1,3.19,"""Alpha"""
3102012,4301,15,"""15""",1,3.19,"""Alpha"""
3102012,4301,15,"""38""",1,3.49,"""Bravo"""
3102012,4301,15,"""44""",1,3.49,"""Bravo"""


In [3]:
sku_lf = pl.scan_csv(source="data/panel-datasets/sku_weight.csv",
                         has_header=True,
                         separator=",",
                         schema={'sku_id': pl.Categorical,
                                 'weight': pl.Int16})
sku_lf.head().collect()

sku_id,weight
cat,i16
"""1""",400
"""2""",400
"""3""",400
"""4""",250
"""5""",1000


In [4]:
kiwi_lf = pl.scan_csv(source="data/panel-datasets/kiwibubbles_trans.csv",
                      has_header=True,
                      separator=",",
                      schema={'ID': pl.Int16,
                              'Market': pl.Categorical,
                              'Week': pl.Int16,
                              'Day': pl.Int16,
                              'Units': pl.Int16})
kiwi_lf.head().collect()

ID,Market,Week,Day,Units
i16,cat,i16,i16,i16
10001,"""1""",19,3,1
10002,"""1""",12,5,1
10003,"""1""",37,7,1
10004,"""1""",30,6,1
10004,"""1""",47,3,1


#### Reusable Functions

In [5]:
def weekly_plot(dataframe, cols, title, y_axis_label, pct=False, colors=None):
    # Default color to black if no colors are provided
    if colors is None:
        colors = ['black'] * len(cols)
    
    # Create the chart for each column with its assigned color
    layers = []
    for i, col in enumerate(cols):
        layer = alt.Chart(dataframe).mark_line(color=colors[i], strokeWidth=1).encode(
            x=alt.X(
                'week',
                axis=alt.Axis(
                    values=np.arange(0, 104 + 1, 13),  # Explicitly specify quarter-end weeks
                    labelExpr="datum.value",  # Show only these labels
                    title='Week'
                )
            ),
            y=alt.Y(
                col,
                title=y_axis_label,
                axis=alt.Axis(format="$,.0f") if not pct else alt.Axis(format=",.0%")
            )
        )
        layers.append(layer)

    # Combine the layers into one chart
    chart = alt.layer(*layers).properties(
        width=650,
        height=250,
        title=title
    )
    
    return alt.JupyterChart(chart)

### Preliminaries

#### Weekly Sales Pattern

In [6]:
# Grocery Sales LazyFrame (Query Plan): Category Weekly 'spend' - units x price = spend
grocery_spend_category = (
    grocery_lf
    .select(
        ['week', 'units', 'price']
    )
    .with_columns(
        ((pl.col('units') * pl.col('price')).cast(pl.Float64)).alias('spend')
    )
    .group_by('week')
    .agg(
        pl.col("spend").sum().cast(pl.Float64).alias('Weekly Spend')
    )
    .sort('week')
)

In [7]:
# Grocery Sales LazyFrame (Query Plan): Weekly 'spend' by Brand - units x price = spend
grocery_spend_brand = (
    grocery_lf
    .select(
        ['week', 'units', 'price', 'brand']
    )
    .with_columns(
         ((pl.col('units') * pl.col('price')).cast(pl.Float64)).alias('spend')
    )
    .group_by('week', 'brand')
    .agg(
        pl.col("spend").sum().cast(pl.Float64).alias('Weekly Spend')
    )
    .sort('week')
)

In [8]:
# Weekly Sales Pivot Table - Polars DataFrame
# For visualizing, inspecting, and plotting
weekly_sales_pivot = grocery_spend_brand.collect().pivot(
    on="brand",
    index="week",
    values="Weekly Spend",
    sort_columns=True,
).with_columns(
    pl.sum_horizontal(pl.exclude('week')).cast(pl.Float64).alias("Total") # Row total
).sort("week")

weekly_sales_pivot

week,Alpha,Bravo,Charlie,Delta,Other,Total
i16,f64,f64,f64,f64,f64,f64
1,331.459999,247.780003,51.79,17.64,18.75,667.420002
2,567.249997,398.640007,45.43,34.19,23.23,1068.740004
3,497.969999,639.980008,51.11,39.39,14.55,1243.000006
4,1389.960004,472.500004,51.87,85.04,39.779999,2039.150007
5,358.239999,252.540003,40.21,69.54,45.93,766.460002
…,…,…,…,…,…,…
100,692.480006,420.479996,69.600001,54.590001,47.97,1285.120004
101,265.770002,264.659999,64.030001,35.05,26.88,656.390003
102,379.610005,456.78999,43.5,20.4,43.61,943.909995
103,532.500005,440.999996,76.400001,40.44,45.55,1135.890002


In [9]:
weekly_plot(dataframe=weekly_sales_pivot, 
            cols=['Total'], 
            title='Category - Weekly Revenue', 
            y_axis_label='Spend ($)', 
            pct=False)

JupyterChart(spec={'config': {'view': {'continuousWidth': 300, 'continuousHeight': 300}}, 'layer': [{'mark': {…

In [10]:
weekly_plot(dataframe=weekly_sales_pivot, 
            cols=['Alpha'], 
            title='Alpha - Weekly Revenue', 
            y_axis_label='Spend ($)', 
            pct=False)

JupyterChart(spec={'config': {'view': {'continuousWidth': 300, 'continuousHeight': 300}}, 'layer': [{'mark': {…

In [11]:
weekly_plot(dataframe=weekly_sales_pivot, 
            cols=['Bravo'], 
            title='Bravo - Weekly Revenue', 
            y_axis_label='Spend ($)', 
            pct=False)

JupyterChart(spec={'config': {'view': {'continuousWidth': 300, 'continuousHeight': 300}}, 'layer': [{'mark': {…

#### Weekly (Volume) Market Share

In [12]:
# Grocery Volume LazyFrame: Weekly 'volume' - units x weight = volume
grocery_volume = (
    grocery_lf
    .join(
        other=sku_lf,
        left_on="sku_id",
        right_on="sku_id"
    )
    .select(
        ['week', 'units', 'brand', 'weight']
    )
    .with_columns(
        # volume column that is the product of weight of each SKU and the units of SKU sold
        (((pl.col('units') * pl.col('weight'))/1000).cast(pl.Float64)).alias('volume') # # weight from grams to kilograms
    )
    .group_by('week', 'brand')
    .agg(
        pl.col("volume").sum().cast(pl.Float64).alias('Weekly Volume')
    )
    .sort('week')
)

In [13]:
# Weekly (volume) market share
pct_volume =(
    grocery_volume
    .collect()
    .pivot(
        on='brand',
        index='week',
        values='Weekly Volume',
        sort_columns=True
    )
    .with_columns(
        (pl.col("*").exclude("week")) / pl.sum_horizontal(pl.exclude('week')).cast(pl.Float64)
    )
)

pct_volume

week,Alpha,Bravo,Charlie,Delta,Other
i16,f64,f64,f64,f64,f64
1,0.481077,0.355011,0.11194,0.02532,0.026652
2,0.524953,0.371525,0.059391,0.029696,0.014435
3,0.389338,0.508812,0.060676,0.032505,0.008668
4,0.677032,0.230511,0.036686,0.044984,0.010787
5,0.428735,0.324306,0.07574,0.119348,0.051871
…,…,…,…,…,…
100,0.545707,0.31711,0.080381,0.039087,0.017715
101,0.36891,0.40552,0.140805,0.059138,0.025627
102,0.342581,0.543494,0.071147,0.019689,0.023089
103,0.424609,0.415495,0.100574,0.042429,0.016893


In [14]:
weekly_plot(dataframe=pct_volume, 
            cols=['Alpha', 'Bravo'], 
            title='Volume Market Share - Alpha vs. Bravo', 
            y_axis_label='% of Total Volume', 
            pct=True,
            colors=['blue', 'red'])

JupyterChart(spec={'config': {'view': {'continuousWidth': 300, 'continuousHeight': 300}}, 'layer': [{'mark': {…

In [23]:
# Compute the Correlation Matrix
corr_matrix = (
    pct_volume.select(
        pl.col('*').exclude('week')  # Exclude 'week' column if applicable
    )
    .corr()  # Compute the correlation matrix
    .hstack(pl.DataFrame({"index": pct_volume.columns[1:]}))
)

(
    GT(corr_matrix, rowname_col='index')
    .tab_header(title="Correlation Matrix of Weekly (Volume) Market Share")
    .fmt_number(columns=['Alpha', 'Bravo', 'Charlie', 'Delta', 'Other'])
    .data_color(
        domain=[-1, 1],
        palette=["rebeccapurple", "white", 'orange']
    )
)

Correlation Matrix of Weekly (Volume) Market Share,Correlation Matrix of Weekly (Volume) Market Share,Correlation Matrix of Weekly (Volume) Market Share,Correlation Matrix of Weekly (Volume) Market Share,Correlation Matrix of Weekly (Volume) Market Share,Correlation Matrix of Weekly (Volume) Market Share
Unnamed: 0_level_1,Alpha,Bravo,Charlie,Delta,Other
Alpha,1.00,−0.85,−0.34,−0.11,−0.15
Bravo,−0.85,1.00,−0.11,−0.23,−0.10
Charlie,−0.34,−0.11,1.00,0.13,0.24
Delta,−0.11,−0.23,0.13,1.00,0.31
Other,−0.15,−0.10,0.24,0.31,1.00


#### Annual Sales