# Excel Differ - Testing Snippets

Quick testing and inspection notebook for Excel Differ V4.

**Usage:**
- Run cells in order
- Inspect outputs
- Modify paths as needed

## Setup: Define Variables and Paths

In [None]:
import os
from pathlib import Path
from src.registry import register_all_components, registry
from src.utils.logging_setup import setup_logging
from src.differ.differ import Differ
from src.differ.formatters.json_formatter import JSONFormatter

# Register all components
register_all_components()

# Initialize logging
log_level = os.getenv('EXCEL_DIFFER_LOG_LEVEL', 'INFO').upper()
logger = setup_logging(log_level=log_level, log_dir='./logs', component='excel-differ-notebook')

# Define paths
SAMPLE_FILE = Path('./data/sample.xlsx')  # Adjust if you have a different sample
SAMPLE_FILE_2 = Path('./data/sample2.xlsx')  # For diff testing (use different file if available)
OUTPUT_DIR = Path('./tmp/snippets-output')
WORKFLOW_FILE = Path('workflow_definitions/default.yaml')

# Create output directory
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

print(f"\n✓ Setup complete")
print(f"  Sample file: {SAMPLE_FILE}")
print(f"  Output dir: {OUTPUT_DIR}")
print(f"  Workflow: {WORKFLOW_FILE}")

## Test 1: Flattener

In [None]:
# Create flattener
flattener_config = {
    'output_dir': str(OUTPUT_DIR / 'flats'),
    'include_computed': False,
    'include_literal': True,
    'include_formats': True,
    'timeout': 900
}

flattener = registry.create_flattener('openpyxl', flattener_config)

# Flatten the sample file
if SAMPLE_FILE.exists():
    result = flattener.flatten(SAMPLE_FILE, origin='SAMPLE_RUN_NOTEBOOK')
    
    if result.success:
        print(f"✓ Flattening successful!")
        print(f"  Input: {result.input_path}")
        print(f"  Output: {result.flat_root}")
        print(f"  Manifest: {result.manifest_path}")
        
        # Show output structure
        if result.flat_root and result.flat_root.exists():
            files = list(result.flat_root.rglob('*'))
            print(f"\n  Files created: {len([f for f in files if f.is_file()])}")
    else:
        print(f"✗ Flattening failed:")
        for error in result.errors:
            print(f"  - {error}")
else:
    print(f"✗ Sample file not found: {SAMPLE_FILE}")
    print(f"  Create a sample Excel file or adjust the SAMPLE_FILE path")

## Test 2: Workflow

In [None]:
# Load and run workflow
from src.orchestrator.factory import create_orchestrator_from_config

if WORKFLOW_FILE.exists():
    orchestrator, workflow = create_orchestrator_from_config(WORKFLOW_FILE)
    
    print(f"✓ Workflow loaded: {WORKFLOW_FILE}")
    print(f"\n  Configuration:")
    print(f"    Source: {workflow.source.implementation}")
    print(f"    Destination: {workflow.destination.implementation}")
    print(f"    Converter: {workflow.converter.implementation}")
    print(f"    Flattener: {workflow.flattener.implementation}")
    
    print(workflow)
    
    # Run the workflow
    print(f"\n  Running workflow...")
    result = orchestrator.run()
    
    print(f"\n  ✓ Workflow complete!")
    print(f"    Files processed: {result.files_processed}")
    print(f"    Files succeeded: {result.files_succeeded}")
    print(f"    Files failed: {result.files_failed}")
    
    if result.files_failed > 0:
        print(f"\n  Failed files:")
        for proc_result in result.processing_results:
            if not proc_result.success:
                print(f"    - {proc_result.input_file}")
else:
    print(f"✗ Workflow file not found: {WORKFLOW_FILE}")

## Test 3: Differ

In [None]:
# Create differ
differ_flattener_config = {
    'output_dir': str(OUTPUT_DIR / 'diff-flats'),
    'include_computed': False,
    'include_literal': True,
    'include_formats': True
}

differ_flattener = registry.create_flattener('openpyxl', differ_flattener_config)
differ = Differ(differ_flattener)
formatter = JSONFormatter()

# Diff two files
if SAMPLE_FILE.exists() and SAMPLE_FILE_2.exists():
    diff_result = differ.diff_files(SAMPLE_FILE, SAMPLE_FILE_2)
    
    if diff_result['success']:
        print(f"✓ Diff complete!")
        print(f"\n  Summary:")
        print(f"    File 1: {diff_result['file1']}")
        print(f"    File 2: {diff_result['file2']}")
        print(f"    Files compared: {diff_result['files_compared']}")
        print(f"    Files different: {diff_result['files_different']}")
        
        if diff_result['files_only_in_file1']:
            print(f"    Only in file1: {len(diff_result['files_only_in_file1'])}")
        if diff_result['files_only_in_file2']:
            print(f"    Only in file2: {len(diff_result['files_only_in_file2'])}")
        
        # Save to file
        diff_results_dir = OUTPUT_DIR / 'diff-resuls'
        diff_results_dir.mkdir(parents=True, exist_ok=True)
        diff_output = diff_results_dir / 'diff-result.json'
        formatter.save(diff_result, str(diff_output))
        print(f"\n  ✓ Diff saved to: {diff_output}")
        
        # Show first few differences
        if diff_result['differences']:
            print(f"\n  First difference:")
            first_diff = diff_result['differences'][0]
            print(f"    File: {first_diff['filename']}")
            if 'lines_added' in first_diff:
                print(f"    Lines added: {first_diff['lines_added']}")
                print(f"    Lines removed: {first_diff['lines_removed']}")
    else:
        print(f"✗ Diff failed:")
        for error in diff_result.get('errors', []):
            print(f"  - {error}")
else:
    print(f"✗ Sample files not found")
    print(f"  File 1: {SAMPLE_FILE} (exists: {SAMPLE_FILE.exists()})")
    print(f"  File 2: {SAMPLE_FILE_2} (exists: {SAMPLE_FILE_2.exists()})")
    print(f"\n  Note: Using same file for both - diff will show no changes")
    print(f"        For real diff, use two different Excel files")