### Imports

#### Import Packages

In [51]:
import polars as pl
import altair as alt
import numpy as np
import great_tables as GT

In [52]:
import os
print(os.path.dirname(alt.__file__))
print(alt.__version__)

/usr/local/lib/python3.10/dist-packages/altair
5.4.1


#### Import Panel Data

In [53]:
grocery_df = pl.scan_csv(source="edible_grocery.csv",
                         has_header=True,
                         separator=",",
                         schema_overrides={'panel_id': pl.Int32,
                                           'trans_id': pl.Int32,
                                           'week': pl.Int16,
                                           'sku_id': pl.Categorical,
                                           'units': pl.Int16,
                                           'price': pl.Float32,
                                           'brand': pl.Categorical})

In [54]:
sku_df = pl.scan_csv(source="data/panel-datasets/sku_weight.csv",
                         has_header=True,
                         separator=",",
                         schema_overrides={'sku_id': pl.Int32,
                                           'weight': pl.Int16})

In [55]:
kiwi_df = pl.scan_csv(source="data/panel-datasets/kiwibubbles_trans.csv",
                      has_header=True,
                      separator=",",
                      schema_overrides={'ID': pl.Int16,
                                        'Market': pl.Categorical,
                                        'Day': pl.Int16,
                                        'Units': pl.Int16})

### Preliminaries

#### Weekly Sales Pattern

In [56]:
# Create a new column 'spend' - units x price = spend
grocery_df = grocery_df.with_columns(
    ((pl.col('units') * pl.col('price')).cast(pl.Float64)).alias('spend')
)

# Group_by LazyFrame
weekly_sales = (
    grocery_df
    .group_by(["week", "brand"])
    .agg(
        pl.col("spend").sum().alias('Total Spend')
    )
    .sort("week")
)

pivot_result = weekly_sales.collect().pivot(
    on="brand",
    index="week",
    values="Total Spend",
    sort_columns=True,
).sort("week").with_columns(
    pl.sum_horizontal(pl.all()).alias("Total") # Row total
)

pivot_result

week,Alpha,Bravo,Charlie,Delta,Other,Total
i16,f64,f64,f64,f64,f64,f64
1,331.459999,247.780003,51.79,17.64,18.75,668.420002
2,567.249997,398.640007,45.43,34.19,23.23,1070.740004
3,497.969999,639.980008,51.11,39.39,14.55,1246.000006
4,1389.960004,472.500004,51.87,85.04,39.779999,2043.150007
5,358.239999,252.540003,40.21,69.54,45.93,771.460002
…,…,…,…,…,…,…
100,692.480006,420.479996,69.600001,54.590001,47.97,1385.120004
101,265.770002,264.659999,64.030001,35.05,26.88,757.390003
102,379.610005,456.78999,43.5,20.4,43.61,1045.909995
103,532.500005,440.999996,76.400001,40.44,45.55,1238.890002


In [57]:
alt.Chart(pivot_result).mark_line().encode(
    x=alt.X('week', axis=alt.Axis(
        values=np.arange(0, 104+1, 13),  # Explicitly specify quarter-end weeks
        labelExpr="datum.value",  # Show only these labels
        title='Week')),
    y=alt.Y('Alpha',
            title='Spend ($)')
).properties(
    width=800,
    height=300
)

In [58]:
alt.Chart(pivot_result).mark_line().encode(
    x=alt.X('week', axis=alt.Axis(
        values=np.arange(0, 104+1, 13),  # Explicitly specify quarter-end weeks
        labelExpr="datum.value",  # Show only these labels
        title='Week')),
    y=alt.Y('Total',
            title='Spend ($)')
).properties(
    width=800,
    height=300
)