# Helpers

> Helper functions for common workflow patterns.

In [None]:
#| default_exp core.helpers

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from typing import Dict, Any, Optional, Callable, Tuple

from cjm_fasthtml_workflows.core.job_session import JobSessionManager
from cjm_fasthtml_workflows.core.deduplication import DeduplicationTracker

## Helper Functions

These helper functions provide convenient patterns for common workflow operations.

### get_job_session_info

Retrieve job-related display information from session with fallbacks to job attributes.

In [None]:
#| export
def get_job_session_info(
    job_id:str, # Unique job identifier
    job:Any, # Job object (should have attributes like file_name, file_path, plugin_id)
    sess:Any, # FastHTML session object
    fallback_fields:Optional[Dict[str, Any]]=None # Optional additional fallback fields
) -> Tuple[Dict[str, Any], Dict[str, Any]]: # (file_info, plugin_info) dictionaries
    """Retrieve job-related display information from session with fallbacks."""
    if fallback_fields is None:
        fallback_fields = {}

    job_sess = JobSessionManager(sess)
    job_metadata = job_sess.get_job_metadata(job_id, {})

    # File info with fallbacks
    file_info = job_metadata.get("file_info", {})
    if not file_info:
        file_info = {
            "name": getattr(job, "file_name", "unknown"),
            "path": getattr(job, "file_path", ""),
            **fallback_fields.get("file_info", {})
        }

    # Plugin info with fallbacks
    plugin_info = job_metadata.get("plugin_info", {})
    if not plugin_info:
        plugin_info = {
            "title": getattr(job, "plugin_id", "unknown"),
            "id": getattr(job, "plugin_id", "unknown"),
            **fallback_fields.get("plugin_info", {})
        }

    return file_info, plugin_info

#### Example: get_job_session_info

In [None]:
from cjm_fasthtml_workflows.core.job_session import JobSessionManager

# Create mock job object
class MockJob:
    def __init__(self, job_id, file_name, file_path, plugin_id):
        self.id = job_id
        self.file_name = file_name
        self.file_path = file_path
        self.plugin_id = plugin_id

# Create session with stored metadata
session = {}
job_sess = JobSessionManager(session)
job_sess.store_job_metadata("job-123", {
    "file_info": {"name": "audio.mp3", "path": "/media/audio.mp3", "size": 5000},
    "plugin_info": {"title": "Whisper Large", "id": "whisper_large", "version": "3.0"}
})

# Create mock job
job = MockJob("job-123", "fallback.mp3", "/fallback/path", "fallback_plugin")

# Get info (should use session data, not fallback)
file_info, plugin_info = get_job_session_info("job-123", job, session)
print("File info:", file_info)
print("Plugin info:", plugin_info)

File info: {'name': 'audio.mp3', 'path': '/media/audio.mp3', 'size': 5000}
Plugin info: {'title': 'Whisper Large', 'id': 'whisper_large', 'version': '3.0'}


In [None]:
# Test with no session metadata (uses job attributes as fallback)
empty_session = {}
job2 = MockJob("job-456", "meeting.mp3", "/media/meeting.mp3", "whisper_base")

file_info2, plugin_info2 = get_job_session_info("job-456", job2, empty_session)
print("\nFile info (from job attrs):", file_info2)
print("Plugin info (from job attrs):", plugin_info2)


File info (from job attrs): {'name': 'meeting.mp3', 'path': '/media/meeting.mp3'}
Plugin info (from job attrs): {'title': 'whisper_base', 'id': 'whisper_base'}


In [None]:
# Test with additional fallback fields
file_info3, plugin_info3 = get_job_session_info(
    "job-789", 
    job2, 
    empty_session,
    fallback_fields={
        "file_info": {"format": "mp3", "duration": 120},
        "plugin_info": {"provider": "OpenAI"}
    }
)
print("\nFile info (with extra fallbacks):", file_info3)
print("Plugin info (with extra fallbacks):", plugin_info3)


File info (with extra fallbacks): {'name': 'meeting.mp3', 'path': '/media/meeting.mp3', 'format': 'mp3', 'duration': 120}
Plugin info (with extra fallbacks): {'title': 'whisper_base', 'id': 'whisper_base', 'provider': 'OpenAI'}


### once_per_job

Execute an operation once per job using automatic deduplication.

In [None]:
#| export
def once_per_job(
    sess:Any, # FastHTML session object
    job_id:str, # Unique job identifier
    operation:Callable[[], Any], # Callable to execute (should take no arguments)
    tracker_id:str="default_operation" # Unique identifier for this operation type
) -> Optional[Any]: # Result of operation if executed, None if already processed
    """Execute an operation once per job using automatic deduplication."""
    tracker = DeduplicationTracker(sess, tracker_id)

    if tracker.is_processed(job_id):
        return None

    try:
        result = operation()
        tracker.mark_processed(job_id)
        return result
    except Exception as e:
        # Don't mark as processed if operation failed
        raise e

#### Example: once_per_job

In [None]:
# Simulate a session
session = {}

# Counter to track how many times operation is executed
execution_count = {'save': 0, 'notify': 0}

def save_to_disk(job_id, data):
    execution_count['save'] += 1
    print(f"  [SAVE] Saving {job_id}: {data}")
    return f"Saved {job_id}"

def send_notification(job_id):
    execution_count['notify'] += 1
    print(f"  [NOTIFY] Sending email for {job_id}")
    return f"Notified for {job_id}"

# First call - should execute
print("Call 1:")
result1 = once_per_job(
    session, "job-abc",
    lambda: save_to_disk("job-abc", {"text": "Hello"}),
    tracker_id="save_operation"
)
print(f"  Result: {result1}")

# Second call - should skip
print("\nCall 2 (duplicate):")
result2 = once_per_job(
    session, "job-abc",
    lambda: save_to_disk("job-abc", {"text": "Hello"}),
    tracker_id="save_operation"
)
print(f"  Result: {result2}")

print(f"\nTotal save operations: {execution_count['save']}")

Call 1:
  [SAVE] Saving job-abc: {'text': 'Hello'}
  Result: Saved job-abc

Call 2 (duplicate):
  Result: None

Total save operations: 1


In [None]:
# Multiple operations for same job
print("\nMultiple operations for job-abc:")

# Save operation (already done, will skip)
once_per_job(
    session, "job-abc",
    lambda: save_to_disk("job-abc", {"text": "Hello"}),
    tracker_id="save_operation"
)

# Notification (first time, will execute)
once_per_job(
    session, "job-abc",
    lambda: send_notification("job-abc"),
    tracker_id="notification_operation"
)

# Notification again (duplicate, will skip)
once_per_job(
    session, "job-abc",
    lambda: send_notification("job-abc"),
    tracker_id="notification_operation"
)

print(f"\nTotal operations:")
print(f"  Saves: {execution_count['save']}")
print(f"  Notifications: {execution_count['notify']}")
print(f"\nSession state:")
session


Multiple operations for job-abc:
  [NOTIFY] Sending email for job-abc

Total operations:
  Saves: 1
  Notifications: 1

Session state:


{'dedup_save_operation': ['job-abc'],
 'dedup_notification_operation': ['job-abc']}

In [None]:
# Error handling - operation fails, not marked as processed
def failing_operation():
    print("  [ERROR] Operation failed!")
    raise ValueError("Simulated error")

print("\nTesting error handling:")
try:
    once_per_job(
        session, "job-xyz",
        failing_operation,
        tracker_id="save_operation"
    )
except ValueError as e:
    print(f"  Caught error: {e}")

# Check if job was marked as processed (it shouldn't be)
tracker = DeduplicationTracker(session, "save_operation")
print(f"\nJob-xyz marked as processed: {tracker.is_processed('job-xyz')}")
print("(False means we can retry the operation)")


Testing error handling:
  [ERROR] Operation failed!
  Caught error: Simulated error

Job-xyz marked as processed: False
(False means we can retry the operation)


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()