# Module 1.10: Lie Detector 6 â€” When Diagnostics Lie

> **Goal:** Select 6 anchor metrics, synthesize into 2 scores.

Module 1.09 gave us 45+ diagnostics per seriesâ€”too many to act on. Now we select 6 "Anchor Metrics" that separate real signal from noise, and learn when even those can lie.

---

## 1. Setup

In [1]:
# --- Imports ---
import os
import sys
import warnings
from pathlib import Path
import pandas as pd
from tsforge.eda.diagnostics import compute_forecastability
from tsforge.eda.segmentation import compute_abc
from tsforge.plots import plot_scatter, plot_distribution, plot_pareto

warnings.filterwarnings('ignore')

In [2]:
# --- Settings ---

# Project Root Setup
markers = ('.git', 'pyproject.toml', 'setup.py', 'requirements.txt', '.project-root')
p = Path.cwd().resolve()
PROJECT_ROOT = next((d for d in [p] + list(p.parents) if any((d / m).exists() for m in markers)), p)
os.chdir(PROJECT_ROOT)
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

# Data Directory
DATA_DIR = PROJECT_ROOT / 'data'
OUTPUT_DIR = DATA_DIR / 'output'

---

## 2. Load Data

In [3]:
# Load diagnostics from 1.09
diagnostics = pd.read_parquet(OUTPUT_DIR / '1.09_diagnostics.parquet')

weekly_df = pd.read_parquet(OUTPUT_DIR / '1.08_data_preparation_output.parquet')

In [4]:
# Quick check
diagnostics.head(3)

Unnamed: 0,unique_id,series_length,adi,nperiods,seasonal_period,trend,spike,linearity,curvature,e_acf1,...,p75,p95,p97point5,max,min,item_id,dept_id,cat_id,store_id,state_id
0,FOODS_1_001_CA_1,283,1.105469,1,52,0.20445,0.003823,-9.830194,8.560252,0.305453,...,8.0,12.0,14.95,25.0,0.0,FOODS_1_001,FOODS_1,FOODS,CA_1,CA
1,FOODS_1_001_CA_2,283,1.105469,1,52,0.22328,0.0095,-15.859114,4.662268,0.347209,...,11.0,18.0,20.95,30.0,0.0,FOODS_1_001,FOODS_1,FOODS,CA_2,CA
2,FOODS_1_001_CA_3,283,1.118577,1,52,0.162804,0.016073,-16.074762,8.062367,0.098584,...,13.0,20.0,23.0,32.0,0.0,FOODS_1_001,FOODS_1,FOODS,CA_3,CA


---

## 3. The Lie Detector 6

| Camp | Metric | Question | High Value Means |
|------|--------|----------|------------------|
| **Structure** | `trend` | Is there directional movement? | Strong up/down pattern |
| **Structure** | `seasonal_strength` | Is there a repeating cycle? | Reliable weekly/yearly pattern |
| **Structure** | `MI_top_k_lags` | Does last week predict this week? | High persistence |
| **Chaos** | `entropy` | How random is the sequence? | Unpredictable behavior |
| **Chaos** | `adi` | How sparse is demand? | Long gaps between non-zero values |
| **Chaos** | `lumpiness` | Does variance change over time? | Unstable, regime-shifting behavior |

In [5]:
# The Lie Detector 6
structure_cols = ['trend', 'seasonal_strength', 'MI_top_k_lags']
chaos_cols = ['permutation_entropy', 'adi', 'lumpiness']
ld6_cols = structure_cols + chaos_cols

In [6]:
# Keep LD6 + identifiers + hierarchy + cv2 (for S-B)
hierarchy_cols = ['unique_id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id']
keep_cols = [c for c in hierarchy_cols if c in diagnostics.columns] + ld6_cols

ld6_df = diagnostics[keep_cols].copy()
ld6_df.head()

Unnamed: 0,unique_id,item_id,dept_id,cat_id,store_id,state_id,trend,seasonal_strength,MI_top_k_lags,permutation_entropy,adi,lumpiness
0,FOODS_1_001_CA_1,FOODS_1_001,FOODS_1,FOODS,CA_1,CA,0.20445,0.376623,0.270401,0.969347,1.105469,87.235596
1,FOODS_1_001_CA_2,FOODS_1_001,FOODS_1,FOODS,CA_2,CA,0.22328,0.439298,0.153054,0.981118,1.105469,230.382385
2,FOODS_1_001_CA_3,FOODS_1_001,FOODS_1,FOODS,CA_3,CA,0.162804,0.384099,0.150131,0.984305,1.118577,116.775986
3,FOODS_1_001_CA_4,FOODS_1_001,FOODS_1,FOODS,CA_4,CA,0.110839,0.479389,0.28407,0.952965,1.276018,0.956493
4,FOODS_1_001_TX_1,FOODS_1_001,FOODS_1,FOODS,TX_1,TX,0.260977,0.376637,0.168827,0.962183,1.200855,20.594612


---

## 4. When Diagnostics Lie

The LD6 works as a **system**, not 6 isolated numbers. When chaos metrics are high, structure metrics become unreliable.

In [None]:
# Higher = more "lying" (trend claims structure, ADI says it's unreliable)
ld6_df['trend_lie_score'] = ld6_df['trend'] * ld6_df['adi']
liars = ld6_df.nlargest(20, 'trend_lie_score')

truthful = ld6_df[(ld6_df['trend'] > 0.6) & (ld6_df['adi'] < 1.1)].head(20)

In [36]:
from tsforge.plots import plot_panel, plot_timeseries

plot_panel([
    plot_timeseries(
        weekly_df,
        id_col="unique_id", date_col="ds", value_col="y",
        ids=liars.iloc[0],
        style={
            "title":
            f"ðŸ”´ LIAR â€” sparse spikes fool trend detection<br><sub>{liars.iloc[0]['unique_id']}</sub>"
        },
        trace_style={'line_color': 'red',
                     'line_width': 0.8,
                     'fill': 'tozeroy'}
    ),
    plot_timeseries(
        weekly_df,
        id_col="unique_id", date_col="ds", value_col="y",
        ids=truthful.iloc[0],
        style={
            "title":
            f"ðŸŸ¢ TRUTHFUL â€” dense demand with real upward drift<br><sub>{truthful.iloc[0]['unique_id']}</sub>"
        },
            trace_style={'line_color': 'green',
                     'line_width': 0.8,
                     'fill': 'tozeroy'}
    ),
],
    title="Trend Ã— ADI: When Trend Lies (High Trend, High ADI)",
    style={"y_title": "Units"},
    layout="vertical",
     vertical_spacing=0.12,
)


## Can look through drop down list of other examples of when trend lies 

In [None]:
plot_timeseries(
        weekly_df,
        id_col="unique_id", date_col="ds", value_col="y",
        ids=liars['unique_id'].tolist(),
        style={
            "title": "Trend Ã— ADI: When Trend Lies (High Trend, High ADI)",
        },
        trace_style={'line_color': 'red',
                     'line_width': 0.8,
                     'fill': 'tozeroy'},
        mode='dropdown'
    )

---

## 5. ABC Classification

In [11]:
abc_df = compute_abc(weekly_df, id_col='unique_id', value_col='y', recency='52W')
abc_df

Unnamed: 0,unique_id,total_volume,abc_rank,cumulative_pct,abc_class
0,FOODS_3_090_CA_3,37537.0,1,0.002592,A
1,FOODS_3_586_TX_2,32861.0,2,0.004860,A
2,FOODS_3_120_CA_3,27640.0,3,0.006768,A
3,FOODS_3_586_CA_3,25896.0,4,0.008556,A
4,FOODS_3_586_TX_3,25356.0,5,0.010307,A
...,...,...,...,...,...
30485,HOBBIES_1_016_CA_4,3.0,30486,0.999999,C
30486,HOUSEHOLD_2_202_CA_4,3.0,30487,1.000000,C
30487,HOUSEHOLD_2_276_CA_4,3.0,30488,1.000000,C
30488,HOBBIES_1_111_TX_1,2.0,30489,1.000000,C


In [12]:
abc_df = compute_abc(weekly_df, id_col='unique_id', value_col='y')
abc_df

Unnamed: 0,unique_id,total_volume,abc_rank,cumulative_pct,abc_class
0,FOODS_3_090_CA_3,257313.0,1,0.003775,A
1,FOODS_3_586_TX_2,197512.0,2,0.006673,A
2,FOODS_3_586_TX_3,153918.0,3,0.008932,A
3,FOODS_3_586_CA_3,138204.0,4,0.010959,A
4,FOODS_3_090_CA_1,130539.0,5,0.012875,A
...,...,...,...,...,...
30485,HOUSEHOLD_1_378_CA_1,27.0,30486,1.000019,C
30486,FOODS_2_209_TX_2,27.0,30487,1.000019,C
30487,HOUSEHOLD_2_130_WI_2,26.0,30488,1.000019,C
30488,HOBBIES_1_170_WI_3,19.0,30489,1.000020,C


In [13]:
abc_df

Unnamed: 0,unique_id,total_volume,abc_rank,cumulative_pct,abc_class
0,FOODS_3_090_CA_3,257313.0,1,0.003775,A
1,FOODS_3_586_TX_2,197512.0,2,0.006673,A
2,FOODS_3_586_TX_3,153918.0,3,0.008932,A
3,FOODS_3_586_CA_3,138204.0,4,0.010959,A
4,FOODS_3_090_CA_1,130539.0,5,0.012875,A
...,...,...,...,...,...
30485,HOUSEHOLD_1_378_CA_1,27.0,30486,1.000019,C
30486,FOODS_2_209_TX_2,27.0,30487,1.000019,C
30487,HOUSEHOLD_2_130_WI_2,26.0,30488,1.000019,C
30488,HOBBIES_1_170_WI_3,19.0,30489,1.000020,C


In [14]:
plot_pareto(abc_df, id_col='unique_id',
            value_col='total_volume',
            cumulative_col='cumulative_pct',
            color_col='abc_class',
            style={"x_title": ""})


---

## 5. Compute Structure & Chaos Scores

We collapse 6 metrics into 2 scores using `compute_structure_chaos_scores()`:
- **Structure Score**: Average of normalized (trend, seasonality, acf1) â€” higher = more learnable
- **Chaos Score**: Average of normalized (entropy, ADI, lumpiness) â€” higher = less trustworthy

The function handles:
- Clipping outliers (default: 95th percentile for chaos metrics)
- Min-max normalization to [0, 1]
- Weighted averaging (optional)

In [15]:
# from tsforge.eda.forecastability import compute_forecastability

scores_df = compute_forecastability(
    diagnostics,
    clip_quantile=0.95,
    clip_cols="chaos",
    n_periods_col="series_length", 
)

In [16]:
scores_df

Unnamed: 0,unique_id,series_length,adi,nperiods,seasonal_period,trend,spike,linearity,curvature,e_acf1,...,max,min,item_id,dept_id,cat_id,store_id,state_id,structure_score,chaos_score,forecastability
0,FOODS_1_001_CA_1,283,0.065787,1,52,0.204450,3.823080e-03,-9.830194,8.560252,0.305453,...,25.0,0.0,FOODS_1_001,FOODS_1,FOODS,CA_1,CA,0.211238,0.346068,Messy
1,FOODS_1_001_CA_2,283,0.065787,1,52,0.223280,9.499593e-03,-15.859114,4.662268,0.347209,...,30.0,0.0,FOODS_1_001,FOODS_1,FOODS,CA_2,CA,0.198534,0.351539,Messy
2,FOODS_1_001_CA_3,283,0.073964,1,52,0.162804,1.607298e-02,-16.074762,8.062367,0.098584,...,32.0,0.0,FOODS_1_001,FOODS_1,FOODS,CA_3,CA,0.155179,0.354816,Messy
3,FOODS_1_001_CA_4,282,0.172170,1,52,0.110839,9.207660e-05,-5.512033,4.155784,0.179607,...,9.0,0.0,FOODS_1_001,FOODS_1,FOODS,CA_4,CA,0.226309,0.374603,Messy
4,FOODS_1_001_TX_1,282,0.125286,1,52,0.260977,2.540837e-03,-2.783171,-1.893590,0.192326,...,24.0,0.0,FOODS_1_001,FOODS_1,FOODS,TX_1,TX,0.191938,0.362696,Messy
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30485,HOUSEHOLD_2_516_TX_2,282,0.206935,1,52,0.045244,1.163422e-04,1.920066,1.019703,-0.042675,...,10.0,0.0,HOUSEHOLD_2_516,HOUSEHOLD_2,HOUSEHOLD,TX_2,TX,0.194306,0.382092,Sparse
30486,HOUSEHOLD_2_516_TX_3,283,0.589348,1,52,0.228420,1.623258e-05,3.350072,3.957704,0.115205,...,5.0,0.0,HOUSEHOLD_2_516,HOUSEHOLD_2,HOUSEHOLD,TX_3,TX,0.260850,0.467400,Sparse
30487,HOUSEHOLD_2_516_WI_1,282,0.851066,1,52,0.114466,8.931491e-06,-0.183477,-2.029504,0.046527,...,4.0,0.0,HOUSEHOLD_2_516,HOUSEHOLD_2,HOUSEHOLD,WI_1,WI,0.188816,0.547241,Sparse
30488,HOUSEHOLD_2_516_WI_2,277,1.000000,1,52,0.057683,8.992631e-07,0.606239,0.491996,-0.032954,...,3.0,0.0,HOUSEHOLD_2_516,HOUSEHOLD_2,HOUSEHOLD,WI_2,WI,0.243874,0.493618,Sparse


In [17]:
scores_df = scores_df.merge(abc_df[['unique_id', 'abc_class']], on='unique_id', how='left')

In [18]:
scores_df

Unnamed: 0,unique_id,series_length,adi,nperiods,seasonal_period,trend,spike,linearity,curvature,e_acf1,...,min,item_id,dept_id,cat_id,store_id,state_id,structure_score,chaos_score,forecastability,abc_class
0,FOODS_1_001_CA_1,283,0.065787,1,52,0.204450,3.823080e-03,-9.830194,8.560252,0.305453,...,0.0,FOODS_1_001,FOODS_1,FOODS,CA_1,CA,0.211238,0.346068,Messy,B
1,FOODS_1_001_CA_2,283,0.065787,1,52,0.223280,9.499593e-03,-15.859114,4.662268,0.347209,...,0.0,FOODS_1_001,FOODS_1,FOODS,CA_2,CA,0.198534,0.351539,Messy,A
2,FOODS_1_001_CA_3,283,0.073964,1,52,0.162804,1.607298e-02,-16.074762,8.062367,0.098584,...,0.0,FOODS_1_001,FOODS_1,FOODS,CA_3,CA,0.155179,0.354816,Messy,A
3,FOODS_1_001_CA_4,282,0.172170,1,52,0.110839,9.207660e-05,-5.512033,4.155784,0.179607,...,0.0,FOODS_1_001,FOODS_1,FOODS,CA_4,CA,0.226309,0.374603,Messy,B
4,FOODS_1_001_TX_1,282,0.125286,1,52,0.260977,2.540837e-03,-2.783171,-1.893590,0.192326,...,0.0,FOODS_1_001,FOODS_1,FOODS,TX_1,TX,0.191938,0.362696,Messy,B
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30485,HOUSEHOLD_2_516_TX_2,282,0.206935,1,52,0.045244,1.163422e-04,1.920066,1.019703,-0.042675,...,0.0,HOUSEHOLD_2_516,HOUSEHOLD_2,HOUSEHOLD,TX_2,TX,0.194306,0.382092,Sparse,C
30486,HOUSEHOLD_2_516_TX_3,283,0.589348,1,52,0.228420,1.623258e-05,3.350072,3.957704,0.115205,...,0.0,HOUSEHOLD_2_516,HOUSEHOLD_2,HOUSEHOLD,TX_3,TX,0.260850,0.467400,Sparse,C
30487,HOUSEHOLD_2_516_WI_1,282,0.851066,1,52,0.114466,8.931491e-06,-0.183477,-2.029504,0.046527,...,0.0,HOUSEHOLD_2_516,HOUSEHOLD_2,HOUSEHOLD,WI_1,WI,0.188816,0.547241,Sparse,C
30488,HOUSEHOLD_2_516_WI_2,277,1.000000,1,52,0.057683,8.992631e-07,0.606239,0.491996,-0.032954,...,0.0,HOUSEHOLD_2_516,HOUSEHOLD_2,HOUSEHOLD,WI_2,WI,0.243874,0.493618,Sparse,C


In [19]:
plot_scatter(
    scores_df, x='chaos_score', y='structure_score',
    color_col='abc_class',
    x_threshold=0.5,
    y_threshold=0.5,
    quadrant_labels={
        'top_left': 'STABLE',
        'top_right': 'COMPLEX',
        'bottom_left': 'SPARSE',
        'bottom_right': 'MESSY',
    },
    quadrant_label_style='watermark',
)

---

## 6. Save Output

In [20]:
# Save scores for downstream modules
scores_df.to_parquet(DATA_DIR / 'output' / '1.10_scores.parquet', index=False)