# Forecast Reviewer Transform

Loads site list and VP data, pivots metrics, joins with availability flags.

Inputs (injected variables):
1. `site_list_path` — Site list Excel file (Station, Cycle, Business Org, POD)
2. `ct_file_path` — VP/CT CSV file (long format: metric_name, metric_value, node, ...)

In [None]:
# name: setup | type: python
from pathlib import Path

site_list = Path(site_list_path)
ct_file = Path(ct_file_path)

if not site_list.exists():
    raise FileNotFoundError(f'Site list not found: {site_list_path}')
if not ct_file.exists():
    raise FileNotFoundError(f'CT file not found: {ct_file_path}')

print(f'Site list: {site_list}')
print(f'CT file: {ct_file}')

result = {'site_list_path': str(site_list), 'ct_file_path': str(ct_file)}

In [None]:
-- name: load_site_list | type: sql
CREATE OR REPLACE TABLE site_list AS
SELECT
    "Station" AS station,
    "Cycle" AS cycle,
    "Business Org" AS business_org
FROM read_xlsx(site_list_path)
WHERE "Business Org" = 'AMZL'

In [None]:
-- name: load_vp | type: sql
CREATE OR REPLACE TABLE vp_raw AS
SELECT * FROM read_csv_auto(ct_file_path)

In [None]:
-- name: vp_pivot | type: sql
CREATE OR REPLACE TABLE vp AS
SELECT
    node,
    plan_start_date,
    ofd_dates,
    demand_types,
    cpts,
    MAX(CASE WHEN metric_name = 'total_volume_available' THEN metric_value END) AS total_volume_available,
    MAX(CASE WHEN metric_name = 'automated_uncapped_slam_forecast' THEN metric_value END) AS automated_uncapped_slam_forecast,
    MAX(CASE WHEN metric_name = 'current_slam' THEN metric_value END) AS current_slam,
    MAX(CASE WHEN metric_name = 'weekly_uncapped_slam_forecast' THEN metric_value END) AS weekly_uncapped_slam_forecast,
    MAX(CASE WHEN metric_name = 'post_cutoff_adjustment' THEN metric_value END) AS post_cutoff_adjustment,
    MAX(CASE WHEN metric_name = 'total_backlog' THEN metric_value END) AS total_backlog,
    MAX(CASE WHEN metric_name = 'automated_confidence' THEN metric_value END) AS automated_confidence,
    MAX(CASE WHEN metric_name = 'uncapped_slam_forecast' THEN metric_value END) AS uncapped_slam_forecast,
    MAX(CASE WHEN metric_name = 'atrops_soft_cap' THEN metric_value END) AS atrops_soft_cap,
    MAX(CASE WHEN metric_name = 'confidence_anomaly' THEN metric_value END) AS confidence_anomaly,
    MAX(CASE WHEN metric_name = 'net_volume_adjustments' THEN metric_value END) AS net_volume_adjustments,
    MAX(CASE WHEN metric_name = 'adjusted_uncapped_slam_forecast' THEN metric_value END) AS adjusted_uncapped_slam_forecast,
    MAX(CASE WHEN metric_name = 'cap_target_buffer' THEN metric_value END) AS cap_target_buffer,
    MAX(CASE WHEN metric_name = 'earlies_expected' THEN metric_value END) AS earlies_expected,
    MAX(CASE WHEN metric_name = 'returns' THEN metric_value END) AS returns,
    MAX(CASE WHEN metric_name = 'sideline_in' THEN metric_value END) AS sideline_in,
    MAX(CASE WHEN metric_name = 'vovi_uncapped_slam_forecast' THEN metric_value END) AS vovi_uncapped_slam_forecast,
    MAX(CASE WHEN metric_name = 'in_station_backlog' THEN metric_value END) AS in_station_backlog,
    MAX(CASE WHEN metric_name = 'mnr_expected' THEN metric_value END) AS mnr_expected,
    MAX(CASE WHEN metric_name = 'mnr_received' THEN metric_value END) AS mnr_received,
    MAX(CASE WHEN metric_name = 'current_schedule' THEN metric_value END) AS current_schedule,
    MAX(CASE WHEN metric_name = 'vovi_adjustment' THEN metric_value END) AS vovi_adjustment,
    MAX(CASE WHEN metric_name = 'forecast_type' THEN metric_value END) AS forecast_type,
    MAX(CASE WHEN metric_name = 'earlies_received' THEN metric_value END) AS earlies_received,
    MAX(CASE WHEN metric_name = 'latest_deployed_cap' THEN metric_value END) AS latest_deployed_cap,
    MAX(CASE WHEN metric_name = 'atrops_hard_cap' THEN metric_value END) AS atrops_hard_cap,
    MAX(CASE WHEN metric_name = 'capped_slam_forecast' THEN metric_value END) AS capped_slam_forecast
FROM vp_raw
WHERE plan_start_date::DATE = ofd_dates::DATE
GROUP BY node, plan_start_date, ofd_dates, demand_types, cpts
ORDER BY node, cpts

In [None]:
-- name: joined | type: sql
CREATE OR REPLACE TABLE joined AS
SELECT
    COALESCE(sl.station, vp.node) AS station,
    sl.cycle,
    sl.business_org,
    CASE
        WHEN sl.station IS NOT NULL AND vp.node IS NOT NULL THEN 'vp_list'
        WHEN vp.node IS NOT NULL THEN 'vp'
        ELSE 'list'
    END AS available_inputs,
    vp.plan_start_date,
    vp.ofd_dates,
    vp.demand_types,
    vp.cpts,
    vp.forecast_type,
    vp.automated_confidence,
    vp.vovi_uncapped_slam_forecast,
    vp.uncapped_slam_forecast,
    vp.adjusted_uncapped_slam_forecast,
    vp.capped_slam_forecast,
    vp.atrops_soft_cap,
    vp.atrops_hard_cap,
    vp.latest_deployed_cap,
    vp.cap_target_buffer,
    vp.current_slam,
    vp.current_schedule,
    vp.total_volume_available,
    vp.total_backlog,
    vp.in_station_backlog,
    vp.post_cutoff_adjustment,
    vp.net_volume_adjustments,
    vp.vovi_adjustment,
    vp.confidence_anomaly,
    vp.automated_uncapped_slam_forecast,
    vp.weekly_uncapped_slam_forecast,
    vp.earlies_expected,
    vp.earlies_received,
    vp.returns,
    vp.sideline_in,
    vp.mnr_expected,
    vp.mnr_received
FROM site_list sl
FULL OUTER JOIN vp ON sl.station = vp.node
ORDER BY station, cpts

In [None]:
# name: summary | type: python
# Summary of available_inputs breakdown
summary = conn.execute("""
    SELECT available_inputs, COUNT(DISTINCT station) AS stations, COUNT(*) AS rows
    FROM joined
    GROUP BY available_inputs
    ORDER BY available_inputs
""").fetchdf()
print(summary.to_string(index=False))

total = conn.execute("SELECT COUNT(*) FROM joined").fetchone()[0]
distinct = conn.execute("SELECT COUNT(DISTINCT station) FROM joined").fetchone()[0]
print(f'\nTotal: {distinct} stations, {total} rows')

result = {
    'status': 'success',
    'total_rows': total,
    'distinct_stations': distinct,
    'breakdown': summary.to_dict('records')
}