# Module 1.12: ABC-XYZ Segmentation

## 1. Setup

In [None]:
import os, sys, warnings
from pathlib import Path
import pandas as pd
import numpy as np

warnings.filterwarnings('ignore')

markers = ('.git', 'pyproject.toml', 'setup.py', 'requirements.txt', '.project-root')
p = Path.cwd().resolve()
PROJECT_ROOT = next((d for d in [p] + list(p.parents) if any((d / m).exists() for m in markers)), p)
os.chdir(PROJECT_ROOT)
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

DATA_DIR = PROJECT_ROOT / 'data'
OUTPUT_DIR = DATA_DIR / 'output'

In [None]:
from tsforge.plots import plot_scatter, plot_bar, plot_timeseries, plot_panel

## 2. Load Data

In [None]:
weekly_df = pd.read_parquet(OUTPUT_DIR / '1.08_data_preparation_output.parquet')
abc_xyz_df = pd.read_parquet(OUTPUT_DIR / '1.10_scores.parquet')

abc_xyz_df['abc_xyz'] = abc_xyz_df['abc_class'] + '-' + abc_xyz_df['xyz_class']

In [None]:
pd.crosstab(abc_xyz_df['abc_class'], abc_xyz_df['xyz_class'], margins=True)

## 3. Forecastability Map

In [None]:
plot_scatter(
    abc_xyz_df,
    x='chaos_score', y='structure_score',
    color_col='xyz_class',
    x_threshold=0.5, y_threshold=0.5,
    quadrant_labels={'top_left': 'X', 'top_right': 'Y', 'bottom_left': 'X', 'bottom_right': 'Z'},
    quadrant_label_style='watermark',
    opacity=0.4,
    colors={'X': '#2ecc71', 'Y': '#f39c12', 'Z': '#e74c3c'},
    style={"title": "XYZ Classification"},
)

## 4. A-Class Examples

In [None]:
a_class = abc_xyz_df[abc_xyz_df['abc_class'] == 'A'].copy()
a_class['xyz_class'].value_counts()

In [None]:
# A-X
a_x_ids = a_class[a_class['xyz_class'] == 'X'].nlargest(20, 'total_volume')['unique_id'].tolist()

if a_x_ids:
    plot_timeseries(
        weekly_df, id_col='unique_id', date_col='ds', value_col='y',
        ids=a_x_ids, mode='dropdown',
        style={"title": "A-X: Automate These"},
        trace_style={'line_color': '#2ecc71', 'fill': 'tozeroy'},
    )

In [None]:
# A-Y
a_y_ids = a_class[a_class['xyz_class'] == 'Y'].nlargest(20, 'total_volume')['unique_id'].tolist()

if a_y_ids:
    plot_timeseries(
        weekly_df, id_col='unique_id', date_col='ds', value_col='y',
        ids=a_y_ids, mode='dropdown',
        style={"title": "A-Y: Robust Methods"},
        trace_style={'line_color': '#f39c12', 'fill': 'tozeroy'},
    )

In [None]:
# A-Z
a_z_ids = a_class[a_class['xyz_class'] == 'Z'].nlargest(20, 'total_volume')['unique_id'].tolist()

if a_z_ids:
    plot_timeseries(
        weekly_df, id_col='unique_id', date_col='ds', value_col='y',
        ids=a_z_ids, mode='dropdown',
        style={"title": "A-Z: Danger Zone"},
        trace_style={'line_color': '#e74c3c', 'fill': 'tozeroy'},
    )

## 5. ABC-XYZ Matrix

In [None]:
plot_bar(
    abc_xyz_df, id_col='abc_xyz', value_col='unique_id', agg='count', sort_by=None,
    colors={
        'A-X': '#1e8449', 'A-Y': '#f39c12', 'A-Z': '#c0392b',
        'B-X': '#27ae60', 'B-Y': '#f5b041', 'B-Z': '#e74c3c',
        'C-X': '#82e0aa', 'C-Y': '#fad7a0', 'C-Z': '#f1948a',
    },
    style={"title": "SKU Count by ABC-XYZ"},
)

In [None]:
segment_stats = abc_xyz_df.groupby('abc_xyz').agg(
    sku_count=('unique_id', 'count'),
    total_volume=('total_volume', 'sum'),
).reset_index()
segment_stats['volume_pct'] = (segment_stats['total_volume'] / segment_stats['total_volume'].sum() * 100).round(1)

plot_bar(
    segment_stats, id_col='abc_xyz', value_col='volume_pct', agg='sum', sort_by=None,
    show_values=True, value_format='.1f',
    colors={
        'A-X': '#1e8449', 'A-Y': '#f39c12', 'A-Z': '#c0392b',
        'B-X': '#27ae60', 'B-Y': '#f5b041', 'B-Z': '#e74c3c',
        'C-X': '#82e0aa', 'C-Y': '#fad7a0', 'C-Z': '#f1948a',
    },
    style={"title": "Volume % by ABC-XYZ"},
)

## 6. Summary

In [None]:
summary = abc_xyz_df.groupby('abc_xyz').agg(
    sku_count=('unique_id', 'count'),
    total_volume=('total_volume', 'sum'),
    avg_chaos=('chaos_score', 'mean'),
    avg_structure=('structure_score', 'mean'),
).reset_index()
summary['volume_pct'] = (summary['total_volume'] / summary['total_volume'].sum() * 100).round(1)
summary.sort_values('abc_xyz')

## 7. Save Output

In [None]:
output_cols = ['unique_id', 'abc_class', 'xyz_class', 'abc_xyz',
               'structure_score', 'chaos_score', 'forecastability', 'total_volume']
abc_xyz_df[output_cols].to_parquet(OUTPUT_DIR / '1.12_abc_xyz.parquet', index=False)
print(f"Saved {len(abc_xyz_df):,} series")