# 07 - WP Orders Change Analysis (Exact Delta Apply Match)

This notebook uses the **exact same hash function** as `delta_apply_influx.py`:
- Same 17 columns used for hash computation
- Same JSON serialization method
- Same SHA256 hashing

Results should match the delta apply report exactly.

## Setup

In [None]:
import pandas as pd
import numpy as np
import hashlib
import json
from pathlib import Path
from datetime import datetime
from typing import List, Any
import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', 50)
pd.set_option('display.max_colwidth', 50)
print("Libraries loaded!")

## 1. Hash Function (Exact Copy from influx_source.py)

In [None]:
# EXACT hash function from src/wpr_agent/router/tools/influx_source.py
# Lines 188-218

HASH_COLUMNS = [
    "WP Order ID",
    "WP Order Status",
    "WP ID",
    "WP Name",
    "WP Quantity",
    "Employee Name",
    "STD",
    "WP Requested Delivery Date",
    "WP Readiness Date",
    "PO StartDate",
    "PO EndDate",
    "Approved Date",
    "Submitted Date",
    "Cancelled Date",
    "Project Name",
    "Product",
    "Domain",
    "Customer",
]

def compute_order_src_hash(product: str, sub: pd.DataFrame) -> str:
    """Exact copy of compute_order_src_hash from influx_source.py"""
    parts: List[Any] = [str(product or "")] 
    for c in HASH_COLUMNS:
        try:
            vals = list(sub[c])
        except Exception:
            vals = []
        parts.append({c: ["" if v is None or pd.isna(v) else str(v) for v in vals]})
    s = json.dumps(parts, sort_keys=True, separators=(",", ":"))
    return hashlib.sha256(s.encode("utf-8")).hexdigest()

print(f"Hash function uses {len(HASH_COLUMNS)} columns:")
for col in HASH_COLUMNS:
    print(f"  â€¢ {col}")

## 2. Load Data

In [None]:
# File paths
BASE_DIR = Path('..')
NOV_FILE = BASE_DIR / '11.25.WP Orders_25-11-2025_v01.xlsx'
DEC_FILE = BASE_DIR / '12.04.WP Orders_04-12-2025_v01.xlsx'

# Load data
df_nov = pd.read_excel(NOV_FILE, sheet_name='WP_Overall_Order_Report')
df_dec = pd.read_excel(DEC_FILE, sheet_name='WP_Overall_Order_Report')

# Clean column names
df_nov.columns = df_nov.columns.str.strip()
df_dec.columns = df_dec.columns.str.strip()

print(f"November: {len(df_nov):,} orders")
print(f"December: {len(df_dec):,} orders")

In [None]:
# Key columns
ORDER_ID_COL = 'WP Order ID'
STATUS_COL = 'WP Order Status'
PRODUCT_COL = 'Product'

# Target products (matching delta apply registry exactly)
# From config/product_project_registry.json
TARGET_PRODUCT_PATTERNS = [
    'flowone',
    'flow one', 
    'niam',
    'session border controller'
]

def is_target_product(product):
    if pd.isna(product):
        return False
    product_lower = str(product).lower()
    return any(t in product_lower for t in TARGET_PRODUCT_PATTERNS)

# Filter to target products only (matching delta apply scope)
df_nov_target = df_nov[df_nov[PRODUCT_COL].apply(is_target_product)].copy()
df_dec_target = df_dec[df_dec[PRODUCT_COL].apply(is_target_product)].copy()

print(f"\nðŸŽ¯ TARGET PRODUCTS:")
print(f"November: {len(df_nov_target):,} orders")
print(f"December: {len(df_dec_target):,} orders")

In [None]:
# Product breakdown
print("\nðŸ“¦ December Product Breakdown:")
print(df_dec_target[PRODUCT_COL].value_counts())

## 3. Compute Hashes Per Order (Exact Delta Apply Method)

In [None]:
# Group by Order ID (like delta apply does)
def compute_hashes_per_order(df):
    """Compute hash for each order, matching delta_apply_influx.py method."""
    order_hashes = {}
    grouped = df.groupby(ORDER_ID_COL)
    
    for order_id, sub in grouped:
        oid = str(order_id or "").strip()
        if not oid:
            continue
        product = sub[PRODUCT_COL].iloc[0] if PRODUCT_COL in sub.columns else ""
        order_hash = compute_order_src_hash(str(product or ""), sub)
        order_hashes[oid] = {
            'hash': order_hash,
            'product': product,
            'rows': len(sub)
        }
    
    return order_hashes

print("Computing hashes for November orders...")
nov_hashes = compute_hashes_per_order(df_nov_target)
print(f"  Computed {len(nov_hashes):,} order hashes")

print("\nComputing hashes for December orders...")
dec_hashes = compute_hashes_per_order(df_dec_target)
print(f"  Computed {len(dec_hashes):,} order hashes")

## 4. Compare Hashes (Delta Apply Method)

In [None]:
# Get order sets
nov_order_ids = set(nov_hashes.keys())
dec_order_ids = set(dec_hashes.keys())

# Categorize
new_orders = dec_order_ids - nov_order_ids  # In Dec but not Nov
common_orders = nov_order_ids & dec_order_ids  # In both

# Find changed orders (hash differs)
changed_orders = set()
unchanged_orders = set()

for order_id in common_orders:
    if nov_hashes[order_id]['hash'] != dec_hashes[order_id]['hash']:
        changed_orders.add(order_id)
    else:
        unchanged_orders.add(order_id)

print(f"ðŸ“Š CHANGE DETECTION RESULTS")
print(f"="*50)
print(f"November orders: {len(nov_order_ids):,}")
print(f"December orders: {len(dec_order_ids):,}")
print(f"")
print(f"Common orders:   {len(common_orders):,}")
print(f"  â†’ Changed:     {len(changed_orders):,}")
print(f"  â†’ Unchanged:   {len(unchanged_orders):,}")
print(f"")
print(f"New orders:      {len(new_orders):,}")

## 5. Delta Apply Compatible Summary

In [None]:
# Summary matching delta apply output format
total_dec_orders = len(dec_order_ids)
changed_count = len(changed_orders)
new_count = len(new_orders)

print("="*60)
print("        DELTA APPLY COMPATIBLE RESULTS")
print("="*60)
print(f"")
print(f"   orders:   {total_dec_orders:>6,}  (unique order IDs in Dec)")
print(f"   changed:  {changed_count:>6,}  (hash differs Novâ†’Dec)")
print(f"   new:      {new_count:>6,}  (in Dec but not Nov)")
print(f"")
print("="*60)

In [None]:
# Breakdown by product
print("\nðŸ“¦ CHANGED ORDERS BY PRODUCT:")
product_counts = {}
for oid in changed_orders:
    product = dec_hashes[oid]['product']
    product_counts[product] = product_counts.get(product, 0) + 1

for product, count in sorted(product_counts.items(), key=lambda x: -x[1]):
    print(f"  {product}: {count}")

In [None]:
# New orders by product
print("\nðŸ†• NEW ORDERS BY PRODUCT:")
new_product_counts = {}
for oid in new_orders:
    product = dec_hashes[oid]['product']
    new_product_counts[product] = new_product_counts.get(product, 0) + 1

for product, count in sorted(new_product_counts.items(), key=lambda x: -x[1]):
    print(f"  {product}: {count}")

## 6. Status Changes Detail

In [None]:
# Get status for changed orders
df_nov_indexed = df_nov_target.drop_duplicates(ORDER_ID_COL).set_index(ORDER_ID_COL)
df_dec_indexed = df_dec_target.drop_duplicates(ORDER_ID_COL).set_index(ORDER_ID_COL)

status_changes = []
for oid in changed_orders:
    try:
        nov_status = df_nov_indexed.loc[oid, STATUS_COL]
        dec_status = df_dec_indexed.loc[oid, STATUS_COL]
        product = df_dec_indexed.loc[oid, PRODUCT_COL]
        
        if nov_status != dec_status:
            status_changes.append({
                'Order ID': oid,
                'Product': product,
                'Nov_Status': nov_status,
                'Dec_Status': dec_status,
                'Transition': f"{nov_status} â†’ {dec_status}"
            })
    except:
        pass

df_status_changes = pd.DataFrame(status_changes)

print(f"ðŸ”„ STATUS CHANGES (within {len(changed_orders)} changed orders)")
print(f"="*50)
print(f"Orders with status change: {len(df_status_changes):,}")

In [None]:
# Status transitions
if len(df_status_changes) > 0:
    print("\nðŸ“Š Status Transitions:")
    print(df_status_changes['Transition'].value_counts())

## 7. Column-Level Change Analysis

In [None]:
# Analyze which of the 17 hash columns changed
column_changes = {col: 0 for col in HASH_COLUMNS}

for oid in changed_orders:
    try:
        for col in HASH_COLUMNS:
            if col in df_nov_indexed.columns and col in df_dec_indexed.columns:
                nov_val = str(df_nov_indexed.loc[oid, col]) if oid in df_nov_indexed.index else ''
                dec_val = str(df_dec_indexed.loc[oid, col]) if oid in df_dec_indexed.index else ''
                if nov_val != dec_val:
                    column_changes[col] += 1
    except:
        pass

# Sort by count
sorted_changes = sorted(column_changes.items(), key=lambda x: -x[1])

print(f"ðŸ“Š COLUMN CHANGES (within {len(changed_orders)} changed orders)")
print(f"="*60)
for col, count in sorted_changes:
    if count > 0:
        pct = count / len(changed_orders) * 100
        is_status = 'Status' in col
        is_date = 'Date' in col
        marker = "ðŸ”„" if is_status else ("ðŸ“…" if is_date else "  ")
        print(f"{marker} {col:<35} {count:>5,} ({pct:.1f}%)")

## 8. Comparison Report

In [None]:
print("="*70)
print("                        FINAL SUMMARY")
print("="*70)
print(f"")
print(f"Files: {NOV_FILE.name} â†’ {DEC_FILE.name}")
print(f"")
print(f"ðŸ“Š METRICS (using exact delta_apply hash function):")
print(f"   Total orders (Dec):    {total_dec_orders:,}")
print(f"   Changed orders:        {changed_count:,}")
print(f"   New orders:            {new_count:,}")
print(f"   Status changes:        {len(df_status_changes):,}")
print(f"")
print(f"ðŸ“¦ BY PRODUCT:")
for product in sorted(set(product_counts.keys()) | set(new_product_counts.keys())):
    ch = product_counts.get(product, 0)
    nw = new_product_counts.get(product, 0)
    print(f"   {product}: {ch} changed, {nw} new")
print(f"")
print("="*70)

## 9. Export

In [None]:
timestamp = datetime.now().strftime('%Y%m%d_%H%M')

# Export changed orders list
changed_list = [{'Order ID': oid, 'Product': dec_hashes[oid]['product']} for oid in changed_orders]
pd.DataFrame(changed_list).to_excel(f'changed_orders_{timestamp}.xlsx', index=False)
print(f"âœ… changed_orders_{timestamp}.xlsx")

# Export new orders list  
new_list = [{'Order ID': oid, 'Product': dec_hashes[oid]['product']} for oid in new_orders]
pd.DataFrame(new_list).to_excel(f'new_orders_{timestamp}.xlsx', index=False)
print(f"âœ… new_orders_{timestamp}.xlsx")

# Export status changes
if len(df_status_changes) > 0:
    df_status_changes.to_excel(f'status_changes_{timestamp}.xlsx', index=False)
    print(f"âœ… status_changes_{timestamp}.xlsx")

print(f"\nâœ… Analysis complete!")