# Receipt Processing Notebook

This notebook replicates the CLI functionality for processing receipts step-by-step.

### Imports
Import all necessary modules for receipt processing

In [None]:
import sys
from pathlib import Path
import pandas as pd

# Add src to path for imports
project_root = Path.cwd().parent
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

from adapters.secondary.file_system_adapter import FileSystemAdapter
from adapters.secondary.anthropic_adapter import AnthropicAdapter
from adapters.secondary.claude_agent_sdk_adapter import ClaudeAgentSdkAdapter
from adapters.secondary.csv_adapter import CSVAdapter
from adapters.secondary.duplicate_detection_adapter import DuplicateDetectionAdapter
from adapters.secondary.env_config_adapter import EnvConfigAdapter
from core.use_cases.process_receipt import ProcessReceiptUseCase
from core.use_cases.view_staging import ViewStagingUseCase
from core.domain.configuration import AppConfig

### Configuration
Load configuration from environment variables (.env file)

In [None]:
# Load configuration
config_adapter = EnvConfigAdapter()
config = config_adapter.load_config()

# Display configuration
config_df = pd.DataFrame([
    {"Setting": "Incoming Folder", "Value": str(config.incoming_folder.resolve())},
    {"Setting": "Scanned Folder", "Value": str(config.scanned_folder.resolve())},
    {"Setting": "Imported Folder", "Value": str(config.imported_folder.resolve())},
    {"Setting": "Failed Folder", "Value": str(config.failed_folder.resolve())},
    {"Setting": "CSV Staging File", "Value": str(config.csv_staging_file.resolve())},
    {"Setting": "XLSX Output File", "Value": str(config.xlsx_output_file.resolve())}
])
display(config_df)

### Initialize Adapters and Use Cases
Create all adapters and use cases following hexagonal architecture

In [None]:
# Create adapters
file_system = FileSystemAdapter()
ai_extraction = ClaudeAgentSdkAdapter() #AnthropicAdapter()
csv = CSVAdapter()
duplicate_detection = DuplicateDetectionAdapter(file_system)

# Create use cases
process_receipt_use_case = ProcessReceiptUseCase(
    file_system, ai_extraction, csv, duplicate_detection
)
view_staging_use_case = ViewStagingUseCase(file_system, csv)

# Create folders if they don't exist
file_system.create_folders(config)

print("✓ Adapters and use cases initialized")

### View System Status
Display file counts in each folder and staging information

In [None]:
# Count files in each folder
incoming_count = file_system.count_receipt_files(config.incoming_folder)
scanned_count = file_system.count_receipt_files(config.scanned_folder)
imported_count = file_system.count_receipt_files(config.imported_folder)
failed_count = file_system.count_receipt_files(config.failed_folder)

# Get staging info
staging_info = view_staging_use_case.execute(config)
staging_str = str(staging_info) if staging_info else "No staging data"

# Display as DataFrame
status_df = pd.DataFrame([
    {"Folder": "Incoming", "File Count": incoming_count},
    {"Folder": "Scanned", "File Count": scanned_count},
    {"Folder": "Imported", "File Count": imported_count},
    {"Folder": "Failed", "File Count": failed_count},
    {"Folder": "Staging CSV", "File Count": staging_str}
])
display(status_df)

### List Incoming Files
Display all supported files in the incoming folder

In [None]:
incoming_files = file_system.get_supported_files(config.incoming_folder)

if incoming_files:
    files_df = pd.DataFrame([
        {
            "Filename": f.name,
            "Extension": f.suffix,
            "Size (KB)": f.stat().st_size / 1024,
            "Full Path": str(f)
        }
        for f in incoming_files
    ])
    display(files_df)
else:
    print(f"No files in {config.incoming_folder}")

### Process All Receipts
Run full receipt processing workflow with duplicate detection

In [None]:
# Execute the full processing use case
process_receipt_use_case.execute(config)

### View Staging Data
Display the contents of receipts.csv as a DataFrame

In [None]:
staging_data = view_staging_use_case.get_full_table(config)

if not staging_data.exists:
    print("receipts.csv does not exist")
elif staging_data.is_empty:
    print("receipts.csv is empty")
else:
    # Convert to DataFrame for better display
    receipts_df = pd.DataFrame([
        {
            "Amount": r.amount,
            "Tax": r.tax,
            "Tax %": r.tax_percentage,
            "Description": r.description,
            "Currency": r.currency,
            "Date": r.date,
            "Confidence": r.confidence,
            "Hash": r.hash[:8] + "..." if len(r.hash) > 8 else r.hash,
            "Filename": r.done_filename
        }
        for r in staging_data.receipts
    ])
    
    print(f"Total entries: {len(staging_data.receipts)}")
    receipts_df = receipts_df.sort_values(by="Date", ascending=False).reset_index(drop=True)
    display(receipts_df)

### View Failed Receipts
Display failed receipt files and their error logs

### Import to XLSX
Import the scanned receipts from receipts.csv into the XLSX file

In [None]:
# Initialize XLSX adapter and import use case
from adapters.secondary.xlsx_adapter import XLSXAdapter
from core.use_cases.import_to_xlsx import ImportToXLSXUseCase

xlsx_adapter = XLSXAdapter()
import_use_case = ImportToXLSXUseCase(
    csv=csv,
    xlsx=xlsx_adapter,
    file_system=file_system,
    duplicate_detection=duplicate_detection
)

print("✓ XLSX adapter and import use case initialized")

In [None]:
# Execute the import
import_use_case.execute(config)

### Verify Import Results
Check the imported folder and verify backup was created

In [None]:
# Check imported folder
imported_files = file_system.get_supported_files(config.imported_folder)

if imported_files:
    files_df = pd.DataFrame([
        {
            "Filename": f.name,
            "Extension": f.suffix,
            "Size (KB)": f.stat().st_size / 1024
        }
        for f in imported_files
    ])
    print(f"Total imported files: {len(imported_files)}")
    display(files_df.tail(10))  # Show last 10 files
else:
    print("No files in imported folder")

# Check if backup was created
backup_files = list(config.xlsx_output_file.parent.glob(f"{config.xlsx_output_file.stem}.*{config.xlsx_output_file.suffix}"))
if backup_files:
    print(f"\n✅ Found {len(backup_files)} backup(s):")
    for backup in backup_files:
        print(f"  • {backup.name}")

### Process Single Receipt (For Testing)
Process one receipt file to test AI extraction and see detailed output

In [None]:
# Configuration: set this to the file you want to test
from core.domain.receipt import FileHash

test_file_path = Path("../../data/incoming/github-himmelreich-it-receipt-2025-09-20.pdf")  # Example: Path("../data/incoming/receipt.pdf")

if test_file_path and test_file_path.exists():
    print(f"Processing: {test_file_path.name}")
    
    # Check for duplicates
    existing_hashes: list[FileHash] = []
    existing_hashes.extend(file_system.get_file_hashes_from_folder(config.imported_folder))
    existing_hashes.extend(file_system.get_file_hashes_from_folder(config.scanned_folder))
    
    duplicate_result = duplicate_detection.check_duplicate(test_file_path, existing_hashes)
    
    if duplicate_result.is_duplicate:
        print(f"⏭️  Duplicate detected in {duplicate_result.location_name} folder")
    else:
        print("🔍 Analyzing with AI...")
        receipt_data = ai_extraction.extract_receipt_data(str(test_file_path))
        
        # Display as DataFrame
        result_df = pd.DataFrame([receipt_data])
        display(result_df)
else:
    print("Set test_file_path to a valid file path to process a single receipt")

### Test Claude Agent SDK Adapter
Test the new Claude Agent SDK adapter with a sample receipt

In [None]:
# Enable logging to see what the Agent SDK is doing
import logging

# Set up logging for the adapter
logging.basicConfig(level=logging.DEBUG, format='%(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger('adapters.secondary.claude_agent_sdk_adapter')
logger.setLevel(logging.DEBUG)

print("Logging enabled for Agent SDK adapter")

In [None]:
# Initialize the adapter
agent_sdk_adapter = ClaudeAgentSdkAdapter()

# Test with one of the incoming receipts
test_receipt = Path("../../data/incoming/2025_10_24 12_13 Office Lens.jpg")

if test_receipt.exists():
    print(f"Testing Claude Agent SDK adapter with: {test_receipt.name}")
    print("This may take a moment as it processes the receipt...\n")
    
    try:
        # Extract receipt data using the new adapter
        result = agent_sdk_adapter.extract_receipt_data(str(test_receipt))
        
        # Display as DataFrame
        result_df = pd.DataFrame([result])
        display(result_df)
        
        print("\n✅ Agent SDK adapter test successful!")
    except Exception as e:
        print(f"❌ Error testing adapter: {e}")
        import traceback
        traceback.print_exc()
else:
    print(f"Test file not found: {test_receipt}")