In [127]:
# Task 4: Analysis & Features

import polars as pl
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Load the data
df = pl.read_csv('data/2026-01-30-Jesta tech test - fashion_sample.csv')

# Parse dates
df = df.with_columns([
    pl.col('date').str.to_date(),
    pl.col('launch_date').str.to_date()
])

# Calculate derived fields
df = df.with_columns([
    ((pl.col('original_price') - pl.col('current_price')) / pl.col('original_price')).alias('markdown_pct'),
    ((pl.col('current_price') - pl.col('cost')) / pl.col('current_price')).alias('margin')
])

# Data quality check
print("\nData Quality Check:")
print(f"Null counts:\n{df.null_count()}")
print(f"\nNegative sales: {df.filter(pl.col('sales') < 0).height}")
print(f"Negative inventory: {df.filter(pl.col('inventory') < 0).height}")
print(f"Price > Original: {df.filter(pl.col('current_price') > pl.col('original_price')).height}")

# Display basic info
print("Dataset shape:", df.shape)
print("\nData types:")
print(df.schema)
print("\nBasic statistics:")
print(df.describe())


Data Quality Check:
Null counts:
shape: (1, 11)
┌──────┬───────┬──────┬───────┬───┬──────┬─────────────┬──────────────┬────────┐
│ date ┆ style ┆ site ┆ sales ┆ … ┆ cost ┆ launch_date ┆ markdown_pct ┆ margin │
│ ---  ┆ ---   ┆ ---  ┆ ---   ┆   ┆ ---  ┆ ---         ┆ ---          ┆ ---    │
│ u32  ┆ u32   ┆ u32  ┆ u32   ┆   ┆ u32  ┆ u32         ┆ u32          ┆ u32    │
╞══════╪═══════╪══════╪═══════╪═══╪══════╪═════════════╪══════════════╪════════╡
│ 0    ┆ 0     ┆ 0    ┆ 1     ┆ … ┆ 0    ┆ 0           ┆ 0            ┆ 0      │
└──────┴───────┴──────┴───────┴───┴──────┴─────────────┴──────────────┴────────┘

Negative sales: 0
Negative inventory: 0
Price > Original: 0
Dataset shape: (100, 11)

Data types:
Schema({'date': Date, 'style': String, 'site': String, 'sales': Int64, 'inventory': Int64, 'original_price': Float64, 'current_price': Float64, 'cost': Float64, 'launch_date': Date, 'markdown_pct': Float64, 'margin': Float64})

Basic statistics:
shape: (9, 12)
┌────────────┬──────────

The data is mostly clean, with only one null value in the sales column that can be filled with 0. It is a stockout period it is safe to assume the real value is 0.

In [128]:
# Fillna with zeros for visualization
df = df.fill_null(0)

In [129]:
# Visualization 1: Sales and Inventory Over Time
import os
import plotly.io as pio

dates = df['date'].to_list()
sales = df['sales'].to_list()
inventory = df['inventory'].to_list()
pio.renderers.default = "browser"
fig = make_subplots(rows=2, cols=1, shared_xaxes=True,
                    subplot_titles=('Sales Pattern - Stockouts Force Zero Sales',
                                    'Inventory Levels - Two Stockout Periods Visible'))

# Sales
fig.add_trace(go.Scatter(x=dates, y=sales, mode='lines+markers', name='Daily Sales',
                         marker=dict(size=4)), row=1, col=1)

# Inventory
fig.add_trace(go.Scatter(x=dates, y=inventory, fill='tozeroy', name='Inventory',
                         fillcolor='rgba(255,165,0,0.5)', line=dict(color='darkorange')),
              row=2, col=1)
fig.add_hline(y=0, line_dash='dash', line_color='red', opacity=0.5, row=2, col=1)

fig.update_layout(height=600, title_text='Sales and Inventory Over Time', template='plotly_white')
fig.update_yaxes(title_text='Sales', row=1, col=1)
fig.update_yaxes(title_text='Inventory', row=2, col=1)

# Save visualization
os.makedirs("plots", exist_ok=True)
fig.write_html("plots/sales_inventory_over_time.html")
fig.show()

The sales and inventory levels visualization reveals a clear pattern of stockouts impacting sales. There are six distinct periods where inventory drops to zero, leading to zero sales during those times. After the first stockout period starting Feb 2, the stock is replenished and sale start again until the next stockout period starts. Then a pattern of restocking and sales happens until the end of the dataset. Higher sales can also be seen nearby the end because of a higher markdown. The inventory levels indicate that the product was restocked after each stockout, allowing sales to resume. The underlying problem these two plots show us is that managing the inventory is crucial for optimizing the consistency of our sales.

In [130]:
# Visualization 2: Price Changes and Margin Impact
fig = make_subplots(rows=4, cols=1, shared_xaxes=True,
                    subplot_titles=('Price Evolution - Markdown Period After Day 70',
                                    'Markdown Percentage Over Time',
                                    'Margin Evolution', 'Impact of Markdown Periods on Profitability'))

original_price = df['original_price'].to_list()
current_price = df['current_price'].to_list()
sales = df['sales'].to_list()
cost = df['cost'].to_list()
markdown_pct = df['markdown_pct'].to_list()
margin = df['margin'].to_list()

# Prices
fig.add_trace(go.Scatter(x=dates, y=original_price, name='Original Price ($)',
                         line=dict(dash='dash')), row=1, col=1)
fig.add_trace(go.Scatter(x=dates, y=current_price, name='Current Price ($)',
                         line=dict(width=2)), row=1, col=1)
fig.add_trace(go.Scatter(x=dates, y=cost, name='Cost ($)',
                         line=dict(dash='dot', color='gray')), row=1, col=1)

# Markdown
fig.add_trace(go.Bar(x=dates, y=[m * 100 for m in markdown_pct], name='Markdown (%)',
                     marker_color='red', opacity=0.6), row=2, col=1)

# Margin
fig.add_trace(go.Scatter(x=dates, y=[m * 100 for m in margin], name='Margin (%)',
                         line=dict(color='green', width=2)), row=3, col=1)

# Profitability
profitability = [(cp - c) * s for cp, c, s in zip(current_price, cost, sales)]
fig.add_trace(go.Scatter(x=dates, y=profitability, name='Profitability ($)',
                         line=dict(width=2)), row=4, col=1)


fig.update_layout(height=800, title_text='Price Changes and Margin Impact', template='plotly_white')
fig.update_yaxes(title_text='Price ($)', row=1, col=1)
fig.update_yaxes(title_text='Markdown %', row=2, col=1)
fig.update_yaxes(title_text='Margin %', row=3, col=1)
fig.write_html("plots/price_margin_over_time.html")
fig.show()

Figure `plots/price_margin_over_time.html` shows the evolution of prices and margins over time. The original price remains constant, while the current price drops significantly after day 70, indicating a markdown period. The markdown percentage increases correspondingly, reaching up to around 30%. The margin percentage also declines during this period, showing the reduced profitability due to the markdown. While we do see more sales due to the markdown period, we need to calculate if the trade-off between pricing and margins is profitable. This is shown in figure `plots/sales_profitability.html`.

In [131]:
# Part B: Feature Engineering

# 1. Stockout Detection
# Flag periods where inventory=0 (stockout period)
df = df.with_columns([
    (pl.col('inventory') == 0).alias('is_stockout')
])

# 2. Product Lifecycle
# Calculate weeks since launch and classify
df = df.with_columns([
    ((pl.col('date') - pl.col('launch_date')).dt.total_days() / 7).cast(pl.Int32).alias('weeks_since_launch')
])

df = df.with_columns([
    pl.when(pl.col('weeks_since_launch') <= 4)
    .then(pl.lit('new'))
    .when(pl.col('weeks_since_launch') <= 8)
    .then(pl.lit('growth'))
    .when(pl.col('markdown_pct') > 0.1)
    .then(pl.lit('clearance'))
    .otherwise(pl.lit('mature'))
    .alias('lifecycle_stage')
])

# 3. Sales Momentum
# Compare recent trend (7-day) vs longer trend (28-day)
# Use min_periods=1 to avoid discontinuities at the start
df = df.with_columns([
    pl.col('sales').rolling_mean(window_size=7, min_periods=1).alias('avg_daily_sales_7d'),
    pl.col('sales').rolling_mean(window_size=28, min_periods=1).alias('avg_daily_sales_28d')
])

df = df.with_columns([
    (pl.col('avg_daily_sales_7d') / pl.col('avg_daily_sales_28d')).alias('sales_momentum')
])

# 4. Inventory Health
# Days of supply, markdown depth, margin status
df = df.with_columns([
    # Days of supply (inventory / avg daily sales over last 7 days)
    pl.when(pl.col('avg_daily_sales_7d') > 0)
    .then(pl.col('inventory') / pl.col('avg_daily_sales_7d'))
    .otherwise(pl.lit(None))
    .alias('days_of_supply'),

    # Markdown depth classification
    pl.when(pl.col('markdown_pct') == 0)
    .then(pl.lit('full_price'))
    .when(pl.col('markdown_pct') <= 0.2)
    .then(pl.lit('light_markdown'))
    .when(pl.col('markdown_pct') <= 0.4)
    .then(pl.lit('moderate_markdown'))
    .otherwise(pl.lit('deep_markdown'))
    .alias('markdown_depth'),

    # Margin status
    pl.when(pl.col('margin') >= 0.3)
    .then(pl.lit('healthy'))
    .when(pl.col('margin') >= 0.1)
    .then(pl.lit('acceptable'))
    .when(pl.col('margin') >= 0)
    .then(pl.lit('low'))
    .otherwise(pl.lit('negative'))
    .alias('margin_status')
])

# Display feature summary
print("=== Feature Engineering Summary ===\n")

print("1. Stockout Detection:")
print(f"   Total stockout days: {df.filter(pl.col('is_stockout')).height}")
print(f"   Stockout rate: {df.filter(pl.col('is_stockout')).height / df.height * 100:.1f}%\n")

print("2. Product Lifecycle Distribution:")
print(df.group_by('lifecycle_stage').agg(pl.len().alias('days')).sort('days', descending=True))

print("\n3. Sales Momentum:")
print(f"   Avg momentum: {df['sales_momentum'].mean():.2f}")

print("\n4. Inventory Health:")
print(f"   Avg days of supply: {df['days_of_supply'].mean():.1f}")
print("\n   Markdown depth distribution:")
print(df.group_by('markdown_depth').agg(pl.len().alias('days')).sort('days', descending=True))
print("\n   Margin status distribution:")
print(df.group_by('margin_status').agg(pl.len().alias('days')).sort('days', descending=True))

# Show new columns
print("\n=== New Feature Columns ===")
print(df.select(['date', 'is_stockout', 'weeks_since_launch', 'lifecycle_stage',
                 'sales_momentum', 'days_of_supply', 'markdown_depth', 'margin_status']).head(10))

=== Feature Engineering Summary ===

1. Stockout Detection:
   Total stockout days: 39
   Stockout rate: 39.0%

2. Product Lifecycle Distribution:
shape: (4, 2)
┌─────────────────┬──────┐
│ lifecycle_stage ┆ days │
│ ---             ┆ ---  │
│ str             ┆ u32  │
╞═════════════════╪══════╡
│ clearance       ┆ 31   │
│ growth          ┆ 28   │
│ mature          ┆ 23   │
│ new             ┆ 18   │
└─────────────────┴──────┘

3. Sales Momentum:
   Avg momentum: 1.02

4. Inventory Health:
   Avg days of supply: 9.5

   Markdown depth distribution:
shape: (3, 2)
┌───────────────────┬──────┐
│ markdown_depth    ┆ days │
│ ---               ┆ ---  │
│ str               ┆ u32  │
╞═══════════════════╪══════╡
│ full_price        ┆ 69   │
│ deep_markdown     ┆ 16   │
│ moderate_markdown ┆ 15   │
└───────────────────┴──────┘

   Margin status distribution:
shape: (3, 2)
┌───────────────┬──────┐
│ margin_status ┆ days │
│ ---           ┆ ---  │
│ str           ┆ u32  │
╞═══════════════╪══════╡


the argument `min_periods` for `Expr.rolling_mean` is deprecated. It was renamed to `min_samples` in version 1.21.0.


the argument `min_periods` for `Expr.rolling_mean` is deprecated. It was renamed to `min_samples` in version 1.21.0.



In [132]:
# Visualization: Feature Engineering Results
fig = make_subplots(rows=3, cols=2,
                    subplot_titles=('1. Stockout Detection', '2. Product Lifecycle & Stage',
                                    '3. Sales Momentum & Averages', '4. Days of Supply',
                                    '4. Markdown Depth', '4. Margin Status'),
                    specs=[[{}, {"secondary_y": True}], [{"secondary_y": True}, {}], [{}, {}]])


# 1. Stockout Detection
stockout_flags = [1 if x else 0 for x in df['is_stockout'].to_list()]
fig.add_trace(go.Scatter(x=dates, y=inventory, name='Inventory',
                         line=dict(color='blue')), row=1, col=1)
fig.add_trace(go.Scatter(x=dates, y=[s * max(inventory) for s in stockout_flags],
                         name='Stockout', fill='tozeroy',
                         fillcolor='rgba(255,0,0,0.3)', line=dict(color='red')), row=1, col=1)


# 2. Product Lifecycle & Stage (combined with dual y-axis)
weeks = df['weeks_since_launch'].to_list()
lifecycle_map = {'new': 1, 'growth': 2, 'mature': 3, 'clearance': 4}
lifecycle_numeric = [lifecycle_map.get(x, 0) for x in df['lifecycle_stage'].to_list()]

fig.add_trace(go.Scatter(x=dates, y=weeks, name='Weeks Since Launch',
                         mode='lines', line=dict(color='purple', width=2)), row=1, col=2, secondary_y=False)
fig.add_trace(go.Scatter(x=dates, y=lifecycle_numeric, name='Lifecycle Stage',
                         mode='lines', line=dict(color='orange', width=2, dash='dash')), row=1, col=2, secondary_y=True)

# Add the lifecycle_numeric feature to the df
df = df.with_columns([
    pl.Series('lifecycle_stage_numeric', lifecycle_numeric)
])

# 3. Sales Momentum & Averages (skip first 27 days for stable momentum calculation)
momentum = df['sales_momentum'].to_list()
avg_7d = df['avg_daily_sales_7d'].to_list()
avg_28d = df['avg_daily_sales_28d'].to_list()

# Moving averages don't have values for first few days, pad with None
avg_7d_plot = [None] * 6 + avg_7d[6:]
avg_28d_plot = [None] * 27 + avg_28d[27:]
momentum_plot = [None] * 27 + momentum[27:]

fig.add_trace(go.Scatter(x=dates, y=avg_7d_plot, name='Avg Sales 7d',
                         line=dict(color='blue', width=1.5)), row=2, col=1, secondary_y=False)
fig.add_trace(go.Scatter(x=dates, y=avg_28d_plot, name='Avg Sales 28d',
                         line=dict(color='purple', width=1.5, dash='dot')), row=2, col=1, secondary_y=False)
fig.add_trace(go.Scatter(x=dates, y=momentum_plot, name='Sales Momentum',
                         line=dict(color='teal', width=2)), row=2, col=1, secondary_y=True)
fig.add_hline(y=1, line_dash='dash', line_color='gray', row=2, col=1,
              annotation_text="Neutral (1.0)")


# 4.1 Days of Supply
dos = df['days_of_supply'].to_list()
fig.add_trace(go.Scatter(x=dates, y=dos, name='Days of Supply',
                         line=dict(color='orange', width=2)), row=2, col=2)

# 4.2 Markdown Depth
markdown_map = {'full_price': 0, 'light_markdown': 1, 'moderate_markdown': 2, 'deep_markdown': 3}
markdown_numeric = [markdown_map.get(x, 0) for x in df['markdown_depth'].to_list()]
fig.add_trace(go.Scatter(x=dates, y=markdown_numeric, name='Markdown Depth',
                         line=dict(color='red', width=2)), row=3, col=1)

# 4.3 Margin Status
margin_map = {'healthy': 3, 'acceptable': 2, 'low': 1, 'negative': 0}
margin_numeric = [margin_map.get(x, 0) for x in df['margin_status'].to_list()]
fig.add_trace(go.Scatter(x=dates, y=margin_numeric, name='Margin Status',
                         line=dict(color='green', width=2)), row=3, col=2)

fig.update_layout(height=900, title_text='Feature Engineering Results', showlegend=True, template='plotly_white')
fig.update_yaxes(title_text='Inventory', row=1, col=1)
fig.update_yaxes(title_text='Weeks', row=1, col=2, secondary_y=False)
fig.update_yaxes(title_text='Stage (1-4)', row=1, col=2, secondary_y=True)
fig.update_yaxes(title_text='Avg Sales', row=2, col=1, secondary_y=False)
fig.update_yaxes(title_text='Momentum', row=2, col=1, secondary_y=True)
fig.update_yaxes(title_text='Days', row=2, col=2)
fig.update_yaxes(title_text='Depth', row=3, col=1)
fig.update_yaxes(title_text='Status', row=3, col=2)
fig.write_html("plots/feature_engineering_results.html")
fig.show()

In [133]:
import importlib
import src.product_qualification
importlib.reload(src.product_qualification)
from src.product_qualification import should_forecast_product, print_qualification_report

# Call the qualification function
qualified_df = should_forecast_product(df)
print_qualification_report(qualified_df)

# Show some results
print("\nSample of qualification results:")
print(qualified_df.select(['date', 'should_forecast', 'pass_inventory', 'pass_product_age_rule', 'pass_profitability', 'pass_clearance_sales']).tail(10))

PRODUCT QUALIFICATION REPORT (Daily)
Total Days: 100
Qualified Days: 7 (7.0%)

Failure Reasons (Days Failed):
  Inventory Health: 39 days (39.0%)
  Product Age and Sales Recency: 33 days (33.0%)
  Profitability: 27 days (27.0%)
  Clearance Sales: 77 days (77.0%)

Sample of qualification results:
shape: (10, 6)
┌────────────┬─────────────────┬────────────────┬────────────────┬────────────────┬────────────────┐
│ date       ┆ should_forecast ┆ pass_inventory ┆ pass_product_a ┆ pass_profitabi ┆ pass_clearance │
│ ---        ┆ ---             ┆ ---            ┆ ge_rule        ┆ lity           ┆ _sales         │
│ date       ┆ bool            ┆ bool           ┆ ---            ┆ ---            ┆ ---            │
│            ┆                 ┆                ┆ bool           ┆ bool           ┆ bool           │
╞════════════╪═════════════════╪════════════════╪════════════════╪════════════════╪════════════════╡
│ 2024-03-31 ┆ false           ┆ false          ┆ true           ┆ true           

Considering the results of the rules applied for qualifying a product for forecasting, we shouldn't forecast this product when the inventory is depleted.

In [134]:
# Task 5: Model Comparison
import polars as pl
import plotly.graph_objects as go
import importlib

import src.models

importlib.reload(src.models)

from src.models import baseline_moving_average, create_lag_features, train_lgbm, calculate_wmape

print("=== Task 5: Model Comparison ===\n")

# Create lag features for LGBM models (must be done before split)
df = create_lag_features(df, lags=[7, 14, 28])

# Baseline forecast (computed on full data, then split)
df = baseline_moving_average(df, window_size=28)

# Split data: Train on first 80 days, test on last 20 days
train_days = 80
train_df = df.head(train_days)
test_df = df.tail(df.height - train_days)

print(f"Train set size: {train_df.height}")
print(f"Test set size: {test_df.height}")
print(f"Train period: {train_df['date'].min()} to {train_df['date'].max()}")
print(f"Test period: {test_df['date'].min()} to {test_df['date'].max()}")

# 2. Build Models

# Approach 1: Baseline (28-day moving average) - already computed
wmape_baseline = calculate_wmape(test_df)

# Approach 2: LGBM (Lag features only)
features_lags = ['sales_lag_7', 'sales_lag_14', 'sales_lag_28']
test_lgbm_lags, model_lags = train_lgbm(train_df, test_df, features=features_lags)
wmape_lgbm_lags = calculate_wmape(test_lgbm_lags)

# Approach 3: LGBM (Engineered features)
features_engineered = ["is_stockout", "lifecycle_stage_numeric", "sales_momentum"]
test_lgbm_eng, model_eng = train_lgbm(train_df, test_df, features=features_engineered)
wmape_lgbm_eng = calculate_wmape(test_lgbm_eng)

# 3. Compare Results
results = {
    'Baseline (28d MA)': wmape_baseline,
    'LGBM (Lags Only)': wmape_lgbm_lags,
    'LGBM (Engineered)': wmape_lgbm_eng
}

print("\nModel Performance (WMAPE):")
for model, score in results.items():
    print(f"{model}: {score:.2%}")

# Calculate improvement
print(f"\nImprovement over baseline:")
print(f"  LGBM (Lags): {(wmape_baseline - wmape_lgbm_lags) / wmape_baseline * 100:.1f}%")
print(f"  LGBM (Engineered): {(wmape_baseline - wmape_lgbm_eng) / wmape_baseline * 100:.1f}%")

# Visualization of Forecasts
fig = go.Figure()

fig.add_trace(go.Scatter(x=test_df['date'].to_list(), y=test_df['sales'].to_list(),
                         mode='lines+markers', name='Actual Sales',
                         line=dict(color='black', width=3)))

fig.add_trace(go.Scatter(x=test_df['date'].to_list(), y=test_df['forecast'].to_list(),
                         mode='lines', name=f'Baseline (MA) - WMAPE: {wmape_baseline:.1%}',
                         line=dict(dash='dash', color='gray')))

fig.add_trace(go.Scatter(x=test_lgbm_lags['date'].to_list(), y=test_lgbm_lags['forecast'].to_list(),
                         mode='lines', name=f'LGBM (Lags) - WMAPE: {wmape_lgbm_lags:.1%}',
                         line=dict(color='blue')))

fig.add_trace(go.Scatter(x=test_lgbm_eng['date'].to_list(), y=test_lgbm_eng['forecast'].to_list(),
                         mode='lines', name=f'LGBM (Engineered) - WMAPE: {wmape_lgbm_eng:.1%}',
                         line=dict(color='green')))

fig.update_layout(title='Forecast Model Comparison (Last 20 Days)',
                  xaxis_title='Date', yaxis_title='Sales',
                  template='plotly_white')
fig.write_html("plots/model_comparison_forecasts.html")
fig.show()

=== Task 5: Model Comparison ===

Train set size: 80
Test set size: 20
Train period: 2024-01-01 to 2024-03-20
Test period: 2024-03-21 to 2024-04-09
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000021 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 37
[LightGBM] [Info] Number of data points in the train set: 52, number of used features: 3
[LightGBM] [Info] Start training from score 4.769231
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000015 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 31
[LightGBM] [Info] Number of data points in the train set: 80, number of used features: 3
[LightGBM] [Info] Start training from score 8.525000

Model Performance (WMAPE):
Baseline (28d MA): 117.31%
LGBM (Lags Only): 118.12%
LGBM (Engineered): 79.36%

Improvement over baseline:
  LGBM (Lags): -0.7%
  LGBM (Engineered): 32.


X does not have valid feature names, but LGBMRegressor was fitted with feature names


X does not have valid feature names, but LGBMRegressor was fitted with feature names



In [136]:
# Run unit tests for should_forecast_product()
import unittest
import importlib
import src.unit_tests

importlib.reload(src.unit_tests)
from src.unit_tests import TestProductQualification
suite = unittest.TestLoader().loadTestsFromTestCase(TestProductQualification)
unittest.TextTestRunner(verbosity=2).run(suite)

test_rule1_no_inventory_fails (src.unit_tests.TestProductQualification.test_rule1_no_inventory_fails)
Rule 1: Products with no inventory should not be forecasted. ... ok
test_rule2_new_product_forecasted (src.unit_tests.TestProductQualification.test_rule2_new_product_forecasted)
Rule 2: New products should always be forecasted. ... ERROR
test_rule4_clearance_forecasted (src.unit_tests.TestProductQualification.test_rule4_clearance_forecasted)
Rule 4: Products on clearance should be forecasted. ... FAIL

ERROR: test_rule2_new_product_forecasted (src.unit_tests.TestProductQualification.test_rule2_new_product_forecasted)
Rule 2: New products should always be forecasted.
----------------------------------------------------------------------
Traceback (most recent call last):
  File "C:\Users\mathi\PycharmProjects\mathieu_damours_assignment\src\unit_tests.py", line 49, in test_rule2_new_product_forecasted
    self.assertTrue(result.head(14)['is_new_product'].all())
                    ~~~~~~

<unittest.runner.TextTestResult run=3 errors=1 failures=1>