In [2]:
import yaml
import os
import pandas as pd
from IPython.display import display, Markdown

# --- 1. CONFIGURATION ---
UNIT_OPS_PATH = "../data/raw/unitops.yaml"  # Adjust path if notebook is in root vs notebook folder
PRICING_PATH = "../data/raw/pricing.yaml"

# Handle path differences (if running from root vs notebooks dir)
if not os.path.exists(UNIT_OPS_PATH):
    UNIT_OPS_PATH = "data/raw/unitops.yaml"
    PRICING_PATH = "data/raw/pricing.yaml"

# --- 2. INSPECTION (DEBUGGING) ---
print(f"üìÇ Loading {UNIT_OPS_PATH}...")
with open(UNIT_OPS_PATH, 'r') as f:
    ops_data = yaml.safe_load(f) or {}

# Handle 'unit_ops' root key or flat file
unit_ops = ops_data.get('unit_ops', ops_data)

# Show the structure of the first item to debug why it wasn't picking up
first_key = list(unit_ops.keys())[0]
print(f"\nüßê DIAGNOSTIC: Inspecting first Op '{first_key}':")
print(yaml.dump(unit_ops[first_key], default_flow_style=False))

# --- 3. PARSING LOGIC (ROBUST) ---
required_items = set()

for op_name, details in unit_ops.items():
    # Case A: 'items' is a Dictionary {item_name: {qty: x}}
    if 'items' in details and isinstance(details['items'], dict):
        for item_key in details['items'].keys():
            required_items.add(item_key)
            
    # Case B: 'items' is a List [{item: name, qty: x}]
    elif 'items' in details and isinstance(details['items'], list):
        for i in details['items']:
            if 'item' in i: required_items.add(i['item'])

    # Case C: 'consumables' list (Old format)
    if 'consumables' in details:
        for c in details['consumables']:
            if 'item' in c: required_items.add(c['item'])

print(f"‚úÖ Found {len(required_items)} unique items required across all protocols.")

# --- 4. SYNC CHECK ---
if not os.path.exists(PRICING_PATH):
    print("‚ö†Ô∏è Pricing file not found. Creating new one.")
    pricing_data = {}
else:
    with open(PRICING_PATH, 'r') as f:
        pricing_data = yaml.safe_load(f) or {}

missing_items = [i for i in required_items if i not in pricing_data]

# --- 5. DISPLAY RESULTS ---
if missing_items:
    display(Markdown(f"### üõë Missing {len(missing_items)} Items"))
    
    # Show table of what's missing
    df_missing = pd.DataFrame({"Missing Item": missing_items, "Category Guess": "Consumable"})
    display(df_missing.head(10)) # Show first 10
    
    # --- 6. AUTO-FIX (WRITE TO FILE) ---
    # Appending to the file
    with open(PRICING_PATH, 'a') as f:
        f.write("\n\n# --- AUTO-GENERATED FROM NOTEBOOK ---\n")
        for item in missing_items:
            # Heuristics for category
            cat = "consumable"
            if any(x in item for x in ['media', 'pbs', 'dmem', 'kit', 'reagent']): cat = "reagent"
            
            entry = f"""
{item}:
  name: "{item.replace('_', ' ').title()} (TODO)"
  category: {cat}
  unit_price_usd: 0.00
  logical_unit: unit
"""
            f.write(entry)
            
    print(f"\nüíæ SUCCESS: Appended {len(missing_items)} placeholders to {PRICING_PATH}")
    print("üëâ Go open that file and fill in the real prices!")

else:
    display(Markdown("### ‚úÖ All Systems Go"))
    print("Every item in your protocols exists in your pricing file.")

üìÇ Loading ../data/raw/unitops.yaml...

üßê DIAGNOSTIC: Inspecting first Op 'revertaid_rt':
catalog_number: EP0452
category: enzyme
logical_unit: unit
name: RevertAid Reverse Transcriptase
pack_price_usd: 180.0
pack_size: 10000
pack_unit: unit
unit_price_usd: 0.018
vendor: Thermo

‚úÖ Found 0 unique items required across all protocols.


### ‚úÖ All Systems Go

Every item in your protocols exists in your pricing file.
