In [15]:
import polars as pl
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Load the data
df = pl.read_csv('data/2026-01-30-Jesta tech test - fashion_sample.csv')

# Parse dates
df = df.with_columns([
    pl.col('date').str.to_date(),
    pl.col('launch_date').str.to_date()
])

# Calculate derived fields
df = df.with_columns([
    ((pl.col('original_price') - pl.col('current_price')) / pl.col('original_price')).alias('markdown_pct'),
    ((pl.col('current_price') - pl.col('cost')) / pl.col('current_price')).alias('margin')
])

# Display basic info
print("Dataset shape:", df.shape)
print("\nData types:")
print(df.schema)
print("\nBasic statistics:")
print(df.describe())

Dataset shape: (100, 11)

Data types:
Schema({'date': Date, 'style': String, 'site': String, 'sales': Int64, 'inventory': Int64, 'original_price': Float64, 'current_price': Float64, 'cost': Float64, 'launch_date': Date, 'markdown_pct': Float64, 'margin': Float64})

Basic statistics:
shape: (9, 12)
┌────────────┬────────────┬────────────┬───────────┬───┬───────┬────────────┬───────────┬──────────┐
│ statistic  ┆ date       ┆ style      ┆ site      ┆ … ┆ cost  ┆ launch_dat ┆ markdown_ ┆ margin   │
│ ---        ┆ ---        ┆ ---        ┆ ---       ┆   ┆ ---   ┆ e          ┆ pct       ┆ ---      │
│ str        ┆ str        ┆ str        ┆ str       ┆   ┆ f64   ┆ ---        ┆ ---       ┆ f64      │
│            ┆            ┆            ┆           ┆   ┆       ┆ str        ┆ f64       ┆          │
╞════════════╪════════════╪════════════╪═══════════╪═══╪═══════╪════════════╪═══════════╪══════════╡
│ count      ┆ 100        ┆ 100        ┆ 100       ┆ … ┆ 100.0 ┆ 100        ┆ 100.0     ┆ 100.0

In [21]:
# Visualization 1: Sales and Inventory Over Time
import plotly.io as pio

dates = df['date'].to_list()
sales = df['sales'].to_list()
inventory = df['inventory'].to_list()
pio.renderers.default = "browser"
fig = make_subplots(rows=2, cols=1, shared_xaxes=True,
                    subplot_titles=('Sales Pattern - Stockouts Force Zero Sales',
                                    'Inventory Levels - Two Stockout Periods Visible'))

# Sales
fig.add_trace(go.Scatter(x=dates, y=sales, mode='lines+markers', name='Daily Sales',
                         marker=dict(size=4)), row=1, col=1)

# Stockout periods
fig.add_vrect(x0=dates[32], x1=dates[40], fillcolor='red', opacity=0.2,
              annotation_text='Stockout 1', row=1, col=1)
fig.add_vrect(x0=dates[60], x1=dates[69], fillcolor='red', opacity=0.2,
              annotation_text='Stockout 2', row=1, col=1)
fig.add_vline(x=dates[69], line_dash='dash', line_color='green', row=1, col=1)

# Inventory
fig.add_trace(go.Scatter(x=dates, y=inventory, fill='tozeroy', name='Inventory',
                         fillcolor='rgba(255,165,0,0.5)', line=dict(color='darkorange')),
              row=2, col=1)
fig.add_hline(y=0, line_dash='dash', line_color='red', opacity=0.5, row=2, col=1)

fig.update_layout(height=600, title_text='Sales and Inventory Over Time')
fig.update_yaxes(title_text='Sales', row=1, col=1)
fig.update_yaxes(title_text='Inventory', row=2, col=1)
fig.show()

# Identify stockout days
stockout_days = df.filter(pl.col('inventory') == 0)
print(f"\nStockout days: {stockout_days.height}")

# Save visualization
fig.write_html("vis1.html")
fig.show()


Stockout days: 39


In [22]:
# Visualization 2: Price Changes and Margin Impact
fig = make_subplots(rows=3, cols=1, shared_xaxes=True,
                    subplot_titles=('Price Evolution - Markdown Period After Day 70',
                                    'Markdown Percentage Over Time',
                                    'Margin Evolution'))

original_price = df['original_price'].to_list()
current_price = df['current_price'].to_list()
cost = df['cost'].to_list()
markdown_pct = df['markdown_pct'].to_list()
margin = df['margin'].to_list()

# Prices
fig.add_trace(go.Scatter(x=dates, y=original_price, name='Original Price',
                         line=dict(dash='dash')), row=1, col=1)
fig.add_trace(go.Scatter(x=dates, y=current_price, name='Current Price',
                         line=dict(width=2)), row=1, col=1)
fig.add_trace(go.Scatter(x=dates, y=cost, name='Cost',
                         line=dict(dash='dot', color='gray')), row=1, col=1)

# Markdown
fig.add_trace(go.Bar(x=dates, y=[m * 100 for m in markdown_pct], name='Markdown %',
                     marker_color='red', opacity=0.6), row=2, col=1)

# Margin
fig.add_trace(go.Scatter(x=dates, y=[m * 100 for m in margin], name='Margin %',
                         line=dict(color='green', width=2)), row=3, col=1)
fig.add_hline(y=10, line_dash='dash', line_color='red', row=3, col=1,
              annotation_text='Min Margin (10%)')

fig.update_layout(height=800, title_text='Price Changes and Margin Impact')
fig.update_yaxes(title_text='Price ($)', row=1, col=1)
fig.update_yaxes(title_text='Markdown %', row=2, col=1)
fig.update_yaxes(title_text='Margin %', row=3, col=1)
fig.show()

# Data quality check
print("\nData Quality Check:")
print(f"Null counts:\n{df.null_count()}")
print(f"\nNegative sales: {df.filter(pl.col('sales') < 0).height}")
print(f"Negative inventory: {df.filter(pl.col('inventory') < 0).height}")
print(f"Price > Original: {df.filter(pl.col('current_price') > pl.col('original_price')).height}")


Data Quality Check:
Null counts:
shape: (1, 11)
┌──────┬───────┬──────┬───────┬───┬──────┬─────────────┬──────────────┬────────┐
│ date ┆ style ┆ site ┆ sales ┆ … ┆ cost ┆ launch_date ┆ markdown_pct ┆ margin │
│ ---  ┆ ---   ┆ ---  ┆ ---   ┆   ┆ ---  ┆ ---         ┆ ---          ┆ ---    │
│ u32  ┆ u32   ┆ u32  ┆ u32   ┆   ┆ u32  ┆ u32         ┆ u32          ┆ u32    │
╞══════╪═══════╪══════╪═══════╪═══╪══════╪═════════════╪══════════════╪════════╡
│ 0    ┆ 0     ┆ 0    ┆ 1     ┆ … ┆ 0    ┆ 0           ┆ 0            ┆ 0      │
└──────┴───────┴──────┴───────┴───┴──────┴─────────────┴──────────────┴────────┘

Negative sales: 0
Negative inventory: 0
Price > Original: 0
