In [29]:
"""
CELL 1: SETUP (WITH TIMING)
Description: Load configuration and all VAANI functions with timing
"""

import time

print("="*60)
print("üéÆ VAANI PLAYGROUND - LOADING")
print("="*60)

# Time Config Loading
config_start = time.time()
%run vyapar_config.ipynb
config_time = time.time() - config_start
print(f"‚úÖ Config loaded: {config_time*1000:.0f} ms")

# Time Prompts Loading
prompts_start = time.time()
%run vaani_prompts.ipynb
prompts_time = time.time() - prompts_start
print(f"‚úÖ Prompts loaded: {prompts_time*1000:.0f} ms")

# Time Functions Loading
functions_start = time.time()
%run vaani_functions.ipynb
functions_time = time.time() - functions_start
print(f"‚úÖ Functions loaded: {functions_time*1000:.0f} ms")

# Summary
total_time = config_time + prompts_time + functions_time
print("\n" + "="*60)
print(f"‚è±Ô∏è TOTAL LOAD TIME: {total_time*1000:.0f} ms ({total_time:.2f}s)")
print("="*60)
print(f"   Config:    {config_time*1000:>6.0f} ms ({config_time/total_time*100:>4.1f}%)")
print(f"   Prompts:   {prompts_time*1000:>6.0f} ms ({prompts_time/total_time*100:>4.1f}%)")
print(f"   Functions: {functions_time*1000:>6.0f} ms ({functions_time/total_time*100:>4.1f}%)")
print("="*60)
print("‚úÖ Ready to test!")

üéÆ VAANI PLAYGROUND - LOADING
‚úÖ Libraries imported successfully
‚úÖ Master Registry Link configured
üìç Link: https://docs.google.com/spreadsheets/d/e/2PACX-1vQdOVYDNLuMG...

üîÑ Testing registry load...
‚úÖ Registry loaded successfully: 11 items found

üìã Available items in registry:
  1. default_model
  2. openai_api_key
  3. anthropic_api_key
  4. master_registry_link
  5. usage_data
  6. dropoff_analysis
  7. user_research
  8. excel_db_1000_items
  9. claude_haiku
  10. vaani_test_cases
  11. consolidated_categories

üß™ Testing get_item() function:

1. Testing with 'default_model':
‚úÖ Registry loaded successfully: 11 items found
‚úÖ Retrieved 'default_model'
   Result: {'model': 'claude-sonnet-4-20250514', 'temperature': 0.3, 'max_tokens': 1000, 'provider': 'anthropic'}

2. Testing with non-existent item:
‚úÖ Registry loaded successfully: 11 items found
‚ùå Item 'this_does_not_exist' not found in registry
üí° Available items: default_model, openai_api_key, anthropic_ap

In [18]:
"""
CELL 2: QUICK SINGLE TEST
"""

timer = CellTimer()

# ============ MODIFY THESE ============
test_input = "chai samosa 140 rupees"
transaction_type = None  # Optional: 'expense', 'sale', etc.
# ======================================

print("="*60)
print(f"üìù Testing: '{test_input}'")
print("="*60)

result = route_with_intent(test_input, transaction_type=transaction_type)

print(f"\nüìä RESULT:")
print(f"Status: {result.get('status')}")

if result.get('status') == 'success':
    print(f"Transaction Type: {result.get('transaction_type')}")
    
    print(f"\n‚úÖ Extracted Data:")
    extraction = result.get('extraction', {})
    for key, value in extraction.items():
        if key not in ['raw_response', 'transaction_type']:
            print(f"   {key}: {value}")
    
    if 'time_taken' in extraction:
        print(f"\n‚è±Ô∏è API Time: {extraction['time_taken']*1000:.0f} ms")

elif result.get('status') == 'not_relevant':
    print(f"‚ö†Ô∏è {result.get('message')}")
else:
    print(f"‚ùå Error: {result.get('error')}")

timer.print_summary("TOTAL")

üìù Testing: 'chai samosa 140 rupees'
üîç Running intent detection...
‚úÖ Registry loaded successfully: 9 items found
‚úÖ Registry loaded successfully: 9 items found
‚úÖ Anthropic client initialized
üìä Extracting expense data...
‚úÖ Registry loaded successfully: 9 items found
‚úÖ Registry loaded successfully: 9 items found
‚úÖ Anthropic client initialized

üìä RESULT:
Status: success
Transaction Type: expense

‚úÖ Extracted Data:
   amount: 140
   item: chai and samosa
   category: Food & Beverages
   model_used: claude-sonnet-4-20250514
   time_taken: 4.82

‚è±Ô∏è API Time: 4820 ms

‚è±Ô∏è TOTAL TIME: 14994 ms (14.99s)


In [25]:
"""
CELL 2B: DETAILED TIMING BREAKDOWN
Description: Test with complex input and show timing for each AI agent
"""

import time

# ============ MODIFY THIS ============
test_input = "Sharma ji se vegetables 450 rupees liye aur unhe 50 rupees tip diya delivery ke liye total 500 rupees cash mein"
transaction_type = None  # Set to skip intent detection
# =====================================

print("="*60)
print(f"üî¨ DETAILED TIMING BREAKDOWN")
print("="*60)
print(f"Input: '{test_input}'")
print("="*60)

# Initialize timing variables
total_start = time.time()
intent_time = 0
extraction_time = 0
detected_type = None
skip_extraction = False

result = {
    'input': test_input,
    'intent_detection_skipped': transaction_type is not None
}

# Step 1: Intent Detection
if transaction_type is None:
    print("\n‚è±Ô∏è STEP 1: Intent Detection")
    print("-" * 60)
    
    intent_start = time.time()
    intent_result = agent_intent_detector(test_input)
    intent_time = time.time() - intent_start
    
    print(f"‚úÖ Completed in {intent_time*1000:.0f} ms")
    
    if 'error' in intent_result:
        print(f"‚ùå Error: {intent_result['error']}")
        skip_extraction = True
    else:
        print(f"   Relevant: {intent_result.get('is_relevant')}")
        print(f"   Type: {intent_result.get('transaction_type')}")
        print(f"   Confidence: {intent_result.get('confidence')}")
        
        result['intent'] = intent_result
        detected_type = intent_result.get('transaction_type')
        
        # Handle special cases
        if detected_type == 'greeting':
            result['status'] = 'greeting'
            print(f"\nüí¨ Greeting detected - showing friendly response")
            skip_extraction = True
        
        elif detected_type == 'not_relevant' or not intent_result.get('is_relevant'):
            result['status'] = 'not_relevant'
            print(f"\n‚ö†Ô∏è Not relevant for business transactions")
            skip_extraction = True
else:
    print(f"\n‚è≠Ô∏è Skipping intent detection (type: {transaction_type})")
    detected_type = transaction_type

# Step 2: Transaction Extraction (only if not greeting/not_relevant)
if not skip_extraction and detected_type:
    print(f"\n‚è±Ô∏è STEP 2: Transaction Extraction ({detected_type})")
    print("-" * 60)
    
    extraction_start = time.time()
    extraction_result = agent_transaction_extractor(test_input, detected_type)
    extraction_time = time.time() - extraction_start
    
    print(f"‚úÖ Completed in {extraction_time*1000:.0f} ms")
    
    if 'error' in extraction_result:
        print(f"‚ùå Error: {extraction_result['error']}")
    else:
        print(f"\nüìä Extracted Data:")
        for key, value in extraction_result.items():
            if key not in ['raw_response', 'transaction_type']:
                print(f"   {key}: {value}")
    
    result['extraction'] = extraction_result
    result['status'] = 'success'
    result['transaction_type'] = detected_type

# Calculate total time
total_time = time.time() - total_start
overhead_time = total_time - intent_time - extraction_time

# Detailed Breakdown
print("\n" + "="*60)
print("‚è±Ô∏è DETAILED TIMING BREAKDOWN")
print("="*60)

if intent_time > 0:
    print(f"Intent Detection:  {intent_time*1000:>7.0f} ms ({intent_time/total_time*100:>5.1f}%)")

if extraction_time > 0:
    print(f"Extraction:        {extraction_time*1000:>7.0f} ms ({extraction_time/total_time*100:>5.1f}%)")

print(f"Overhead:          {overhead_time*1000:>7.0f} ms ({overhead_time/total_time*100:>5.1f}%)")
print("-" * 60)
print(f"TOTAL TIME:        {total_time*1000:>7.0f} ms (100.0%)")
print("="*60)

# API Time Breakdown
print(f"\nüîç API Processing Time:")
if intent_time > 0:
    print(f"   Intent API:     {intent_time*1000:.0f} ms")
if extraction_time > 0:
    print(f"   Extraction API: {extraction_time*1000:.0f} ms")
    print(f"   Combined API:   {(intent_time + extraction_time)*1000:.0f} ms")

print("\nüí° Note: Overhead includes Python execution, JSON parsing, etc.")

# Show result status
if result.get('status') == 'greeting':
    print("\nüí¨ This was a greeting - no extraction needed")
elif result.get('status') == 'not_relevant':
    print("\n‚ö†Ô∏è This was not relevant - no extraction needed")
elif result.get('status') == 'success':
    print(f"\n‚úÖ Successfully extracted {detected_type} data")

üî¨ DETAILED TIMING BREAKDOWN
Input: 'Sharma ji se vegetables 450 rupees liye aur unhe 50 rupees tip diya delivery ke liye total 500 rupees cash mein'

‚è±Ô∏è STEP 1: Intent Detection
------------------------------------------------------------
‚úÖ Registry loaded successfully: 10 items found
‚úÖ Registry loaded successfully: 10 items found
‚úÖ Anthropic client initialized
‚úÖ Completed in 9083 ms
   Relevant: True
   Type: expense
   Confidence: 0.9

‚è±Ô∏è STEP 2: Transaction Extraction (expense)
------------------------------------------------------------
‚úÖ Registry loaded successfully: 10 items found
‚úÖ Registry loaded successfully: 10 items found
‚úÖ Anthropic client initialized
‚úÖ Completed in 8774 ms

üìä Extracted Data:
   amount: 500
   item: Vegetables with delivery tip
   category: Food & Beverages
   model_used: claude-sonnet-4-20250514
   time_taken: 6.32

‚è±Ô∏è DETAILED TIMING BREAKDOWN
Intent Detection:     9083 ms ( 50.9%)
Extraction:           8774 ms ( 49.1%)
O

In [30]:
"""
CELL 2C: VIEW CONSOLIDATED CATEGORIES
Description: Load and display categories from registry
"""

print("="*60)
print("üìä CONSOLIDATED EXPENSE CATEGORIES")
print("="*60)

# Load categories
df_categories = load_categories_data()

if df_categories is not None:
    print(f"\n‚úÖ Loaded {len(df_categories)} categories from registry\n")
    
    # Display top 20
    print("TOP 20 MOST USED CATEGORIES:")
    print("-"*60)
    
    for idx, row in df_categories.head(20).iterrows():
        rank = row['#']
        category = row['Consolidated Category']
        count = row['Total Count']
        notes = row.get('Notes', '')
        
        print(f"{rank:2}. {category:25} ({count:>12} uses)")
        if notes:
            print(f"    ‚Üí {notes}")
    
    print("\n" + "="*60)
    
    # Show distribution
    print("\nüìà CATEGORY DISTRIBUTION:")
    print("-"*60)
    
    total_count = df_categories['Total Count'].apply(lambda x: int(str(x).replace(',', ''))).sum()
    
    print(f"Top 10 categories: {df_categories.head(10)['Total Count'].apply(lambda x: int(str(x).replace(',', ''))).sum() / total_count * 100:.1f}% of all expenses")
    print(f"Top 20 categories: {df_categories.head(20)['Total Count'].apply(lambda x: int(str(x).replace(',', ''))).sum() / total_count * 100:.1f}% of all expenses")
    print(f"Top 30 categories: {df_categories.head(30)['Total Count'].apply(lambda x: int(str(x).replace(',', ''))).sum() / total_count * 100:.1f}% of all expenses")
    
    print("\n" + "="*60)
    
    # Show which categories handle common items
    print("\nüîç CATEGORY MAPPING EXAMPLES:")
    print("-"*60)
    
    examples = {
        "chai 50 rupees": "Tea Coffee",
        "petrol 500": "Fuel",
        "taxi 200": "Transport",
        "salary 15000": "Salary",
        "electricity bill 2500": "Electricity",
        "internet 1000": "Internet",
        "office supplies 300": "Office Expenses",
        "courier charges 150": "Freight Courier"
    }
    
    for example, expected_category in examples.items():
        print(f"'{example:30}' ‚Üí {expected_category}")
    
    print("\n" + "="*60)
else:
    print("‚ùå Could not load categories")

üìä CONSOLIDATED EXPENSE CATEGORIES
‚úÖ Registry loaded successfully: 11 items found
‚úÖ Loaded 50 categories

‚úÖ Loaded 50 categories from registry

TOP 20 MOST USED CATEGORIES:
------------------------------------------------------------
 1. Indirect Expenses         (     1388006 uses)
    ‚Üí Most used category
 2. Petrol                    (      330413 uses)
    ‚Üí Merged: Petrol (211,957) + Fuil Exp A/c (118,508) + Diesel
 3. Direct Expenses           (      299989 uses)
    ‚Üí Second most common expense type
 4. Salary                    (      239200 uses)
    ‚Üí Includes wages and salary variations
 5. Transport                 (      192905 uses)
    ‚Üí Transportation and travel related
 6. Tea Coffee                (      125072 uses)
    ‚Üí Merged tea, coffee, chai variations
 7. Bank Charges              (      104635 uses)
    ‚Üí BANK CHARGES (55,800) + Bank Charges (48,835)
 8. Rent                      (       78571 uses)
    ‚Üí Property rental expenses
 9. El

In [31]:
"""
CELL 2D: TEST CATEGORY ACCURACY
Description: Test if expense extraction uses correct categories
"""

# Test cases with expected categories
test_cases = [
    {"input": "chai 50 rupees", "expected": "Tea Coffee"},
    {"input": "Diesel fuel 500", "expected": "Petrol"},
    {"input": "taxi ke liye 200 diye", "expected": "Transport"},
    {"input": "electricity bill 2500 rupees", "expected": "Electricity"},
    {"input": "internet ka bill 1000", "expected": "Internet"},
    {"input": "courier charges 150", "expected": "Freight Courier"},
    {"input": "office stationery 300", "expected": "Stationery"},
    {"input": "printing 500 rupees", "expected": "Printing"},
    {"input": "mobile recharge 200", "expected": "Phone Mobile"},
    {"input": "GST payment 5000", "expected": "GST"},
]

print("="*60)
print("üß™ TESTING CATEGORY ACCURACY")
print("="*60)
print(f"Testing {len(test_cases)} cases...\n")

results = []

for test in test_cases:
    test_input = test['input']
    expected_cat = test['expected']
    
    # Extract
    result = route_with_intent(test_input, transaction_type='expense')
    
    if result.get('status') == 'success':
        extracted = result['extraction']
        detected_cat = extracted.get('category', 'N/A')
        
        match = (detected_cat == expected_cat)
        icon = "‚úÖ" if match else "‚ùå"
        
        print(f"{icon} '{test_input:35}' ‚Üí {detected_cat:20} (expected: {expected_cat})")
        
        results.append({
            'input': test_input,
            'expected': expected_cat,
            'detected': detected_cat,
            'correct': match
        })
    else:
        print(f"‚ùå '{test_input:35}' ‚Üí ERROR")

# Summary
print("\n" + "="*60)
correct = sum(1 for r in results if r['correct'])
print(f"‚úÖ Accuracy: {correct}/{len(results)} ({correct/len(results)*100:.1f}%)")
print("="*60)

üß™ TESTING CATEGORY ACCURACY
Testing 10 cases...

‚è≠Ô∏è  Skipping intent detection, using: expense
üìä Extracting expense data...
‚úÖ Registry loaded successfully: 11 items found
‚úÖ Registry loaded successfully: 11 items found
‚úÖ Anthropic client initialized
‚úÖ 'chai 50 rupees                     ' ‚Üí Tea Coffee           (expected: Tea Coffee)
‚è≠Ô∏è  Skipping intent detection, using: expense
üìä Extracting expense data...
‚úÖ Registry loaded successfully: 11 items found
‚úÖ Registry loaded successfully: 11 items found
‚úÖ Anthropic client initialized
‚úÖ 'Diesel fuel 500                    ' ‚Üí Petrol               (expected: Petrol)
‚è≠Ô∏è  Skipping intent detection, using: expense
üìä Extracting expense data...
‚úÖ Registry loaded successfully: 11 items found
‚úÖ Registry loaded successfully: 11 items found
‚úÖ Anthropic client initialized
‚úÖ 'taxi ke liye 200 diye              ' ‚Üí Transport            (expected: Transport)
‚è≠Ô∏è  Skipping intent detection, using: ex

In [19]:
"""
CELL 3: ENHANCED BATCH TEST
Description: Test multiple inputs across Sonnet and Haiku
"""

timer = CellTimer()

# ============ TEST CONFIGURATION ============
test_cases = [
    # Expenses
    {"input": "chai samosa 140 rupees", "type": "expense"},
    {"input": "petrol 500 rupees", "type": "expense"},
    {"input": "taxi ke liye 200 diye", "type": "expense"},
    {"input": "delivery charges 50", "type": "expense"},
    
    # Sales
    {"input": "Sharma ji bought 5kg rice for 250 rupees", "type": "sale"},
    {"input": "sold vegetables to Ramesh 300 rupees", "type": "sale"},
    
    # Payments In
    {"input": "Mishra aunty ne 500 ka udhar chukaya", "type": "payment_in"},
    {"input": "received 2000 from Kumar", "type": "payment_in"},
    
    # Payments Out
    {"input": "paid 1000 rupees to supplier", "type": "payment_out"},
    {"input": "Gupta ji ko 5000 diye", "type": "payment_out"},
    
    # Not Relevant
    {"input": "what's the weather today?", "type": None},
    {"input": "hello how are you", "type": None},
]

models = [
    ('claude-sonnet-4-5-20250929', 'Sonnet'),
    ('claude-haiku-4-5-20251001', 'Haiku'),
]

skip_intent_modes = [
    (False, 'Auto-detect'),
    (True, 'Skip Intent'),
]
# ==========================================

print("="*60)
print(f"üß™ COMPREHENSIVE BATCH TEST")
print(f"Test Cases: {len(test_cases)}")
print(f"Models: {len(models)}")
print(f"Modes: {len(skip_intent_modes)}")
print(f"Total Tests: {len(test_cases) * len(models) * len(skip_intent_modes)}")
print("="*60)

all_results = []

for case_idx, test_case in enumerate(test_cases, 1):
    test_input = test_case['input']
    expected_type = test_case['type']
    
    print(f"\n[{case_idx}/{len(test_cases)}] '{test_input[:45]}...'")
    
    for model_id, model_name in models:
        for skip_intent, mode_name in skip_intent_modes:
            
            if skip_intent and expected_type:
                tx_type_param = expected_type
            else:
                tx_type_param = None
            
            result = route_with_intent(test_input, transaction_type=tx_type_param)
            
            row = {
                'input': test_input[:40],
                'expected_type': expected_type or 'not_relevant',
                'model': model_name,
                'mode': mode_name,
                'status': result.get('status'),
                'detected_type': result.get('transaction_type', 'N/A'),
                'time_ms': 'N/A',
                'amount': 'N/A',
            }
            
            if result.get('status') == 'success':
                extraction = result.get('extraction', {})
                row['time_ms'] = int(extraction.get('time_taken', 0) * 1000)
                row['amount'] = extraction.get('amount', 'N/A')
                status_icon = '‚úÖ'
            elif result.get('status') == 'not_relevant':
                status_icon = '‚ö†Ô∏è'
            else:
                status_icon = '‚ùå'
                row['amount'] = 'ERROR'
            
            all_results.append(row)
            print(f"   {status_icon} {model_name:6} | {mode_name:12} | {row['time_ms']:>6} ms")

df = pd.DataFrame(all_results)

# Summary by Model
print("\n" + "="*60)
print("üìä SUMMARY BY MODEL")
print("="*60)

for model_name in ['Sonnet', 'Haiku']:
    model_df = df[df['model'] == model_name]
    success_count = len(model_df[model_df['status'] == 'success'])
    
    print(f"\nüîπ {model_name}:")
    print(f"   Success: {success_count}/{len(model_df)} ({success_count/len(model_df)*100:.1f}%)")
    
    times = pd.to_numeric(model_df[model_df['time_ms'] != 'N/A']['time_ms'], errors='coerce')
    if len(times) > 0:
        print(f"   Avg Time: {times.mean():.0f} ms")
        print(f"   Min Time: {times.min():.0f} ms")
        print(f"   Max Time: {times.max():.0f} ms")

# Summary by Mode
print("\n" + "="*60)
print("üìä SUMMARY BY MODE")
print("="*60)

for mode_name in ['Auto-detect', 'Skip Intent']:
    mode_df = df[df['mode'] == mode_name]
    success_count = len(mode_df[mode_df['status'] == 'success'])
    
    print(f"\nüîπ {mode_name}:")
    print(f"   Success: {success_count}/{len(mode_df)} ({success_count/len(mode_df)*100:.1f}%)")
    
    times = pd.to_numeric(mode_df[mode_df['time_ms'] != 'N/A']['time_ms'], errors='coerce')
    if len(times) > 0:
        print(f"   Avg Time: {times.mean():.0f} ms")

# Overall accuracy
print("\n" + "="*60)
print("üìà OVERALL ACCURACY")
print("="*60)

correct = len(df[(df['expected_type'] == df['detected_type']) | 
                  ((df['expected_type'] == 'not_relevant') & (df['status'] == 'not_relevant'))])
total = len(df)
print(f"Correct Type Detection: {correct}/{total} ({correct/total*100:.1f}%)")

timer.print_summary("TOTAL BATCH TEST")
print(f"‚è±Ô∏è Avg per test: {timer.elapsed()/len(all_results)*1000:.0f} ms")
print("="*60)

üß™ COMPREHENSIVE BATCH TEST
Test Cases: 12
Models: 2
Modes: 2
Total Tests: 48

[1/12] 'chai samosa 140 rupees...'
üîç Running intent detection...
‚úÖ Registry loaded successfully: 9 items found
‚úÖ Registry loaded successfully: 9 items found
‚úÖ Anthropic client initialized
üìä Extracting expense data...
‚úÖ Registry loaded successfully: 9 items found
‚úÖ Registry loaded successfully: 9 items found
‚úÖ Anthropic client initialized
   ‚úÖ Sonnet | Auto-detect  |   6370 ms
‚è≠Ô∏è  Skipping intent detection, using: expense
üìä Extracting expense data...
‚úÖ Registry loaded successfully: 9 items found
‚úÖ Registry loaded successfully: 9 items found
‚úÖ Anthropic client initialized
   ‚úÖ Sonnet | Skip Intent  |   5740 ms
üîç Running intent detection...
‚úÖ Registry loaded successfully: 9 items found
‚úÖ Registry loaded successfully: 9 items found
‚úÖ Anthropic client initialized
üìä Extracting expense data...
‚úÖ Registry loaded successfully: 9 items found
‚úÖ Registry loaded succes

In [20]:
"""
CELL 4: VIEW PROMPTS & FIELDS
"""

# ============ MODIFY THIS ============
what_to_view = 'fields'  # Options: 'fields', 'prompts', 'expense', 'sale', 'intent', etc.
# =====================================

print("\n")

if what_to_view == 'fields':
    view_transaction_fields()
elif what_to_view == 'prompts':
    view_all_prompts()
else:
    view_prompt(what_to_view)



üìä TRANSACTION FIELD REQUIREMENTS

üîπ EXPENSE
   Description: Recording business expenses
   ‚úÖ Necessary: amount, item
   üìù Additional: category, date, payment_type, notes

üîπ SALE
   Description: Recording sales/invoices
   ‚úÖ Necessary: customer_name, amount, items
   üìù Additional: payment_type, date, notes, invoice_number

üîπ PURCHASE
   Description: Recording purchases from suppliers
   ‚úÖ Necessary: supplier_name, amount, items
   üìù Additional: payment_type, date, notes, inventory_update

üîπ PAYMENT_IN
   Description: Money received from customers
   ‚úÖ Necessary: payer_name, amount
   üìù Additional: payment_type, date, notes, invoice_reference

üîπ PAYMENT_OUT
   Description: Money paid to vendors/suppliers
   ‚úÖ Necessary: payee_name, amount
   üìù Additional: payment_type, date, notes, invoice_reference



In [21]:
"""
CELL 5: CUSTOM TEST AREA
Description: Free space for experiments
"""

timer = CellTimer()

# Your custom code here
# Example:
# result = route_with_intent("your test input")
# print(result)

print("üí° Use this cell for custom experiments")

timer.print_summary("CUSTOM TEST")

üí° Use this cell for custom experiments

‚è±Ô∏è CUSTOM TEST TIME: 0 ms (0.00s)


In [32]:
"""
CELL: PRODUCTION CODE COMPARISON & ANALYSIS
Description: Understand production implementation and compare with notebook development
"""

import json
import pandas as pd

print("="*70)
print("üè≠ VAANI PRODUCTION CODE ANALYSIS")
print("="*70)

# ============================================================================
# PART 1: Architecture Overview
# ============================================================================

print("\nüìê ARCHITECTURE COMPARISON")
print("-"*70)

architecture = {
    "Component": ["Entry Point", "Orchestration", "LLM Layer", "Prompt System", "Output Format"],
    "Notebook": ["playground cell", "route_with_intent()", "OpenAI/Anthropic SDK", "Python strings", "JSON parsing"],
    "Production": ["scheduler.py", "talk2bill_pipeline.py", "LangChain+Gemini", "prompts.py constants", "Pydantic models"]
}

df_arch = pd.DataFrame(architecture)
print(df_arch.to_string(index=False))

# ============================================================================
# PART 2: Detailed Feature Comparison
# ============================================================================

print("\n\nüìä FEATURE-BY-FEATURE COMPARISON")
print("-"*70)

features = {
    "Feature": [
        "Primary LLM",
        "Backup LLM",
        "Temperature",
        "Max Tokens",
        "Retry Logic",
        "Config Source",
        "Caching",
        "Categories",
        "Intent Types",
        "Greeting Handling",
        "Multi-turn Conversation",
        "Structured Output",
        "Error Handling",
        "Performance Monitoring",
        "Test Coverage"
    ],
    "Notebooks": [
        "Claude Sonnet 4.5",
        "Claude Haiku 4.5",
        "0.2 (intent), 0.3 (extraction)",
        "200-1000",
        "None",
        "Google Sheets Registry",
        "5-min pickle cache",
        "50 consolidated",
        "7 types",
        "‚úÖ Yes",
        "‚ùå No (single-turn)",
        "JSON parsing",
        "Basic try-catch",
        "Detailed timing",
        "90 test cases"
    ],
    "Production": [
        "Gemini 2.0 Flash",
        "None",
        "0.0 (all)",
        "Pydantic-defined",
        "3 attempts, 1s delay",
        "Environment variables",
        "None",
        "~10 generic",
        "2 types (expense, other)",
        "‚ùå No",
        "‚úÖ Yes (with history)",
        "Pydantic models",
        "Retry + logging",
        "Basic logging",
        "Limited"
    ],
    "Priority": [
        "Keep Gemini",
        "Not needed",
        "Keep 0.0",
        "Keep Pydantic",
        "‚úÖ Keep",
        "üü¢ Optional",
        "üü¢ Optional",
        "üî¥ Update",
        "üü° Expand",
        "üü° Add",
        "‚úÖ Keep",
        "‚úÖ Keep",
        "üü° Enhance",
        "üü° Add",
        "üî¥ Add"
    ]
}

df_features = pd.DataFrame(features)
print(df_features.to_string(index=False))

# ============================================================================
# PART 3: Migration Priorities
# ============================================================================

print("\n\nüéØ MIGRATION PRIORITIES BREAKDOWN")
print("-"*70)

priorities = {
    "üî¥ HIGH PRIORITY": [
        "Update prompts.py with 50 consolidated categories",
        "Add greeting intent to INTENT_CLASSIFICATION_PROMPT_VYP",
        "Create test suite with 90 test cases from vaani_test_cases.csv",
        "Test category accuracy with Gemini",
        "Update EXPENSE_EXTRACTION_PROMPT_V1 with categorization rules"
    ],
    "üü° MEDIUM PRIORITY": [
        "Add greeting response handler in talk2bill_pipeline.py",
        "Implement performance monitoring (timing instrumentation)",
        "Enhance JSON parsing robustness in llm_service.py",
        "Add greeting examples to prompts.py",
        "Create benchmark comparison: Gemini vs Claude"
    ],
    "üü¢ LOW PRIORITY": [
        "Add optional configuration registry (vyapar_config.py)",
        "Implement caching for repeated queries",
        "Add more detailed logging",
        "Documentation updates",
        "Code refactoring for maintainability"
    ]
}

for priority, items in priorities.items():
    print(f"\n{priority}:")
    for i, item in enumerate(items, 1):
        print(f"  {i}. {item}")

# ============================================================================
# PART 4: Files That Need Updates
# ============================================================================

print("\n\nüìÅ FILES REQUIRING UPDATES")
print("-"*70)

file_updates = {
    "File": [
        "prompts.py",
        "talk2bill_pipeline.py",
        "llm_service.py",
        "prompt_builder.py",
        "scheduler.py",
        "tests/ (new)",
        "vyapar_config.py (new)"
    ],
    "Priority": [
        "üî¥ HIGH",
        "üü° MEDIUM",
        "üü° MEDIUM",
        "üü¢ LOW",
        "‚úÖ No changes",
        "üî¥ HIGH",
        "üü¢ LOW (optional)"
    ],
    "Changes": [
        "Add 50 categories, greeting rules/examples",
        "Add greeting response handler",
        "Add timing, improve error handling",
        "Update to use new prompts",
        "No changes needed",
        "Create comprehensive test suite",
        "Optional config centralization"
    ],
    "Lines Changed": [
        "~150 lines",
        "~50 lines",
        "~30 lines",
        "~10 lines",
        "0 lines",
        "~500 lines (new)",
        "~100 lines (new)"
    ]
}

df_updates = pd.DataFrame(file_updates)
print(df_updates.to_string(index=False))

# ============================================================================
# PART 5: Integration Points
# ============================================================================

print("\n\nüîå KEY INTEGRATION POINTS")
print("-"*70)

print("""
1. LLM SERVICE (llm_service.py):
   - Currently: Uses Gemini 2.0 Flash with LangChain
   - Action: Keep Gemini, just update prompts
   - Testing: Compare Gemini vs Claude accuracy

2. PROMPT TEMPLATES (prompts.py):
   - Currently: ~10 generic categories, no greeting handling
   - Action: Add 50 categories, greeting intent
   - Testing: Validate with 90 test cases

3. PIPELINE ORCHESTRATION (talk2bill_pipeline.py):
   - Currently: 2 intents (expense, other)
   - Action: Add greeting intent handling
   - Testing: Integration tests for new flow

4. CONVERSATION HISTORY:
   - Currently: Multi-turn with 10 message history
   - Action: Keep existing logic (better than notebooks)
   - Testing: Verify greeting doesn't break conversation flow

5. STRUCTURED OUTPUT:
   - Currently: Pydantic models with validation
   - Action: Keep existing approach (better than notebooks)
   - Testing: Ensure new prompts work with existing models
""")

# ============================================================================
# PART 6: Risk Assessment
# ============================================================================

print("\nüö® RISK ASSESSMENT")
print("-"*70)

risks = {
    "Risk": [
        "Intent accuracy degradation",
        "Category confusion",
        "Greeting loop",
        "Performance regression",
        "Breaking existing flows"
    ],
    "Impact": [
        "HIGH",
        "MEDIUM",
        "LOW",
        "MEDIUM",
        "HIGH"
    ],
    "Probability": [
        "MEDIUM",
        "MEDIUM",
        "LOW",
        "LOW",
        "LOW"
    ],
    "Mitigation": [
        "Extensive testing with 90 cases, shadow deployment",
        "Category accuracy monitoring, user feedback",
        "Simple flow, easy rollback",
        "Performance benchmarking, monitoring",
        "Pydantic validation, comprehensive tests"
    ]
}

df_risks = pd.DataFrame(risks)
print(df_risks.to_string(index=False))

# ============================================================================
# PART 7: Success Metrics
# ============================================================================

print("\n\n‚úÖ SUCCESS CRITERIA")
print("-"*70)

metrics = {
    "Metric": [
        "Intent classification accuracy",
        "Category classification accuracy",
        "Greeting detection accuracy",
        "P95 latency",
        "Error rate",
        "User satisfaction",
        "Expense recording frequency",
        "Voice adoption rate"
    ],
    "Target": [
        "‚â• 90%",
        "‚â• 85%",
        "‚â• 95%",
        "‚â§ 3000ms",
        "‚â§ 5%",
        "‚â• 4.0/5.0",
        "+20%",
        "+15%"
    ],
    "Measurement": [
        "Test suite accuracy",
        "Sampled transactions",
        "Greeting test cases",
        "Latency monitoring",
        "Error logs",
        "User surveys",
        "Transaction analytics",
        "Feature usage data"
    ]
}

df_metrics = pd.DataFrame(metrics)
print(df_metrics.to_string(index=False))

# ============================================================================
# PART 8: Timeline Summary
# ============================================================================

print("\n\nüìÖ MIGRATION TIMELINE")
print("-"*70)

timeline = """
Week 1-2:  High Priority Changes
           - Update prompts.py with 50 categories
           - Add greeting intent
           - Create test suite
           - Test category accuracy

Week 3-4:  Medium Priority Changes
           - Add greeting response handler
           - Implement performance monitoring
           - (Optional) Add configuration registry
           - Documentation updates

Week 5:    Testing & Validation
           - Run full test suite
           - Compare Gemini vs. Claude
           - Performance benchmarking
           - Code review

Week 6:    Shadow Deployment
           - Deploy alongside existing
           - Compare outputs
           - Log discrepancies

Week 7:    Canary Release
           - 5% traffic to new version
           - Monitor metrics
           - Collect feedback

Week 8-10: Gradual Rollout
           - 25% ‚Üí 50% ‚Üí 75% ‚Üí 100%
           - Monitor at each stage
           - Final deployment
"""

print(timeline)

# ============================================================================
# PART 9: Action Items
# ============================================================================

print("\nüéØ IMMEDIATE ACTION ITEMS")
print("-"*70)

actions = """
FOR AKHIL:
‚ñ° Review migration guide with engineering team
‚ñ° Prioritize features based on business impact
‚ñ° Set up testing environment
‚ñ° Create feature branch for migration work
‚ñ° Schedule weekly sync meetings

FOR ENGINEERING:
‚ñ° Read VAANI_Production_Migration_Guide.md
‚ñ° Set up local development environment
‚ñ° Run existing test suite to establish baseline
‚ñ° Create test data files from notebooks
‚ñ° Begin prompt template updates

FOR DATA SCIENCE:
‚ñ° Export 90 test cases CSV
‚ñ° Document category mapping logic
‚ñ° Prepare benchmarking scripts
‚ñ° Set up monitoring dashboards
‚ñ° Create performance baseline report
"""

print(actions)

print("\n" + "="*70)
print("üìÑ Full documentation: VAANI_Production_Migration_Guide.md")
print("="*70)

üè≠ VAANI PRODUCTION CODE ANALYSIS

üìê ARCHITECTURE COMPARISON
----------------------------------------------------------------------
    Component             Notebook            Production
  Entry Point      playground cell          scheduler.py
Orchestration  route_with_intent() talk2bill_pipeline.py
    LLM Layer OpenAI/Anthropic SDK      LangChain+Gemini
Prompt System       Python strings  prompts.py constants
Output Format         JSON parsing       Pydantic models


üìä FEATURE-BY-FEATURE COMPARISON
----------------------------------------------------------------------
                Feature                      Notebooks               Production      Priority
            Primary LLM              Claude Sonnet 4.5         Gemini 2.0 Flash   Keep Gemini
             Backup LLM               Claude Haiku 4.5                     None    Not needed
            Temperature 0.2 (intent), 0.3 (extraction)                0.0 (all)      Keep 0.0
             Max Tokens              