Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -1266,4 +1266,6 @@ When updating documentation:
3. Add `doc_example_output` calls to generate examples
4. Replace hardcoded blocks with `<<<` imports
5. Add `@include` directives for example outputs
6. Run tests and verify documentation builds correctly
6. Run tests and verify documentation builds correctly
- when adding new paramters ensure the prompt and merge params method in @lib/active_agent/base.rb allows them to be passed through
- Use vscode regions for snippets of examples in docs
8 changes: 4 additions & 4 deletions docs/docs/generation-providers/open-router-provider.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,11 @@ Extract structured data from receipts and documents using OpenRouter's structure

#### Test Implementation

<<< @/../test/agents/open_router_integration_test.rb#89-145{ruby:line-numbers}
<<< @/../test/agents/open_router_integration_test.rb#receipt_extraction_test{ruby:line-numbers}

#### Receipt Schema Definition

<<< @/../test/dummy/app/agents/open_router_integration_agent.rb#188-234{ruby:line-numbers}
<<< @/../test/dummy/app/agents/open_router_integration_agent.rb#receipt_schema{ruby:line-numbers}

The receipt schema ensures consistent extraction of:
- Merchant name and address
Expand All @@ -121,7 +121,7 @@ This example uses structured output to ensure the receipt data is returned in a

OpenRouter supports PDF processing with various engines:

<<< @/../test/agents/open_router_integration_test.rb#209-234{ruby:line-numbers}
<<< @/../test/agents/open_router_integration_test.rb#pdf_processing_local{ruby:line-numbers}

::: details PDF Processing Example
<!-- @include: @/parts/examples/open-router-integration-test.rb-test-processes-PDF-document-from-local-file.md -->
Expand All @@ -137,7 +137,7 @@ OpenRouter offers multiple PDF processing engines:

Example with OCR engine:

<<< @/../test/agents/open_router_integration_test.rb#316-338{ruby:line-numbers}
<<< @/../test/agents/open_router_integration_test.rb#pdf_native_support{ruby:line-numbers}

::: details OCR Processing Example
<!-- @include: @/parts/examples/open-router-integration-test.rb-test-processes-scanned-PDF-with-OCR-engine.md -->
Expand Down
10 changes: 7 additions & 3 deletions lib/active_agent/action_prompt/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ def observer_class_for(value) # :nodoc:
# Define how the agent should generate content
def generate_with(provider, **options)
self.generation_provider = provider

if options.has_key?(:instructions) || (self.options || {}).empty?
# Either instructions explicitly provided, or no inherited options exist
self.options = (self.options || {}).merge(options)
Expand Down Expand Up @@ -245,11 +244,13 @@ def perform_action(action)
current_context = context.clone
# Merge action params with original params to preserve context
original_params = current_context.params || {}

if action.params.is_a?(Hash)
self.params = original_params.merge(action.params)
else
self.params = original_params
end

process(action.name)
context.message.role = :tool
context.message.action_id = action.id
Expand Down Expand Up @@ -373,6 +374,10 @@ def prepare_message(headers)
if headers[:message].present? && headers[:message].is_a?(ActiveAgent::ActionPrompt::Message)
headers[:body] = headers[:message].content
headers[:role] = headers[:message].role
elsif headers[:message].present? && headers[:message].is_a?(Array)
# Handle array of multipart content like [{type: "text", text: "..."}, {type: "file", file: {...}}]
headers[:body] = headers[:message]
headers[:role] = :user
elsif headers[:message].present? && headers[:message].is_a?(String)
headers[:body] = headers[:message]
headers[:role] = :user
Expand All @@ -394,7 +399,6 @@ def load_input_data(headers)
ActiveAgent::ActionPrompt::Message.new(content: headers[:body], content_type: "input_text")
]
end

headers
end

Expand All @@ -421,7 +425,7 @@ def merge_options(prompt_options)
# Extract runtime options from prompt_options (exclude instructions as it has special template logic)
runtime_options = prompt_options.slice(
:model, :temperature, :max_tokens, :stream, :top_p, :frequency_penalty,
:presence_penalty, :response_format, :seed, :stop, :tools_choice, :data_collection
:presence_penalty, :response_format, :seed, :stop, :tools_choice, :data_collection, :plugins
)
# Handle explicit options parameter
explicit_options = prompt_options[:options] || {}
Expand Down
18 changes: 17 additions & 1 deletion lib/active_agent/action_prompt/message.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def initialize(attributes = {})
@metadata = attributes[:metadata] || {}
@charset = attributes[:charset] || "UTF-8"
@content = attributes[:content] || ""
@content_type = attributes[:content_type] || "text/plain"
@content_type = detect_content_type(attributes)
@role = attributes[:role] || :user
@raw_actions = attributes[:raw_actions]
@requested_actions = attributes[:requested_actions] || []
Expand Down Expand Up @@ -85,6 +85,22 @@ def inspect

private

def detect_content_type(attributes)
# If content_type is explicitly provided, use it
return attributes[:content_type] if attributes[:content_type]

# If content is an array with multipart/mixed content, set appropriate type
if attributes[:content].is_a?(Array)
# Check if it contains multimodal content (text, image_url, file, etc.)
has_multimodal = attributes[:content].any? do |item|
item.is_a?(Hash) && (item[:type] || item["type"])
end
has_multimodal ? "multipart/mixed" : "array"
else
"text/plain"
end
end

def validate_role
unless VALID_ROLES.include?(role.to_s)
raise ArgumentError, "Invalid role: #{role}. Valid roles are: #{VALID_ROLES.join(", ")}"
Expand Down
2 changes: 1 addition & 1 deletion lib/active_agent/action_prompt/prompt.rb
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def initialize(attributes = {})
end

def multimodal?
@multimodal ||= @message&.content.is_a?(Array) || @messages.any? { |m| m.content.is_a?(Array) }
@multimodal ||= @message&.content.is_a?(Array) || @messages.any? { |m| m&.content.is_a?(Array) }
end

def messages=(messages)
Expand Down
1 change: 1 addition & 0 deletions lib/active_agent/generation_provider/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ class Base
include ParameterBuilder

class GenerationProviderError < StandardError; end

attr_reader :client, :config, :prompt, :response, :access_token, :model_name

def initialize(config)
Expand Down
5 changes: 3 additions & 2 deletions lib/active_agent/generation_provider/error_handling.rb
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ def retry_delay(attempt)

def handle_generation_error(error)
error_message = format_error_message(error)

# Create new error with original backtrace preserved
new_error = ActiveAgent::GenerationProvider::Base::GenerationProviderError.new(error_message)
new_error.set_backtrace(error.backtrace) if error.respond_to?(:backtrace)
Expand All @@ -61,7 +60,9 @@ def handle_generation_error(error)
end

def format_error_message(error)
message = if error.respond_to?(:message)
message = if error.respond_to?(:response)
error.response[:body]
elsif error.respond_to?(:message)
error.message
elsif error.respond_to?(:to_s)
error.to_s
Expand Down
8 changes: 7 additions & 1 deletion lib/active_agent/generation_provider/open_ai_provider.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,13 @@ def initialize(config)
@access_token ||= config["api_key"] || config["access_token"] || OpenAI.configuration.access_token || ENV["OPENAI_ACCESS_TOKEN"]
@organization_id = config["organization_id"] || OpenAI.configuration.organization_id || ENV["OPENAI_ORGANIZATION_ID"]
@admin_token = config["admin_token"] || OpenAI.configuration.admin_token || ENV["OPENAI_ADMIN_TOKEN"]
@client = OpenAI::Client.new(access_token: @access_token, uri_base: @host, organization_id: @organization_id)
@client = OpenAI::Client.new(
access_token: @access_token,
uri_base: @host,
organization_id: @organization_id,
admin_token: @admin_token,
log_errors: Rails.env.development?
)

@model_name = config["model"] || "gpt-4o-mini"
end
Expand Down
70 changes: 32 additions & 38 deletions lib/active_agent/generation_provider/open_router_provider.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,6 @@
module ActiveAgent
module GenerationProvider
class OpenRouterProvider < OpenAIProvider
# Vision-capable models on OpenRouter
VISION_MODELS = [
"openai/gpt-4-vision-preview",
"openai/gpt-4o",
"openai/gpt-4o-mini",
"anthropic/claude-3-5-sonnet",
"anthropic/claude-3-opus",
"anthropic/claude-3-sonnet",
"anthropic/claude-3-haiku",
"google/gemini-pro-1.5",
"google/gemini-pro-vision"
].freeze

# Models that support structured output
STRUCTURED_OUTPUT_MODELS = [
"openai/gpt-4o",
"openai/gpt-4o-2024-08-06",
"openai/gpt-4o-mini",
"openai/gpt-4o-mini-2024-07-18",
"openai/gpt-4-turbo",
"openai/gpt-4-turbo-2024-04-09",
"openai/gpt-3.5-turbo-0125",
"openai/gpt-3.5-turbo-1106"
].freeze

def initialize(config)
@config = config
@access_token = config["api_key"] || config["access_token"] ||
Expand All @@ -52,7 +27,7 @@ def initialize(config)
@client = OpenAI::Client.new(
uri_base: "https://openrouter.ai/api/v1",
access_token: @access_token,
log_errors: true,
log_errors: Rails.env.development?,
default_headers: openrouter_headers
)
end
Expand All @@ -69,15 +44,6 @@ def generate(prompt)
handle_openrouter_error(e)
end

# Helper methods for checking model capabilities
def supports_vision?(model = @model_name)
VISION_MODELS.include?(model)
end

def supports_structured_output?(model = @model_name)
STRUCTURED_OUTPUT_MODELS.include?(model)
end

protected

def build_provider_parameters
Expand All @@ -88,6 +54,32 @@ def build_provider_parameters
add_openrouter_params(params)
end

def format_content_item(item)
# Handle OpenRouter-specific content formats
if item.is_a?(Hash)
case item[:type] || item["type"]
when "file"
# Convert file type to image_url for OpenRouter PDF support
file_data = item.dig(:file, :file_data) || item.dig("file", "file_data")
if file_data
{
type: "image_url",
image_url: {
url: file_data
}
}
else
item
end
else
# Use default formatting for other types
super
end
else
super
end
end

private

def default_app_name
Expand Down Expand Up @@ -158,6 +150,10 @@ def build_openrouter_parameters
parameters[:provider] = build_provider_preferences
end

# Add plugins (e.g., for PDF processing)

parameters[:plugins] = prompt.options[:plugins] if prompt.options[:plugins].present?
parameters[:models] = prompt.options[:fallback_models] if prompt.options[:enable_fallbacks] && prompt.options[:fallback_models].present?
parameters
end

Expand Down Expand Up @@ -208,7 +204,6 @@ def execute_with_fallback(parameters)
parameters[:stream] = provider_stream if prompt.options[:stream] || config["stream"]

response = @client.chat(parameters: parameters)

# Log if fallback was used
if response.respond_to?(:headers) && response.headers["x-model"] != @model_name
Rails.logger.info "[OpenRouter] Fallback model used: #{response.headers['x-model']}" if defined?(Rails)
Expand All @@ -229,7 +224,6 @@ def process_openrouter_response(response)
message = handle_message(message_json) if message_json

update_context(prompt: prompt, message: message, response: response) if message

# Create response with OpenRouter metadata
@response = ActiveAgent::GenerationProvider::Response.new(
prompt: prompt,
Expand Down Expand Up @@ -315,7 +309,7 @@ def handle_openrouter_error(error)
handle_timeout_error(error)
else
# Fall back to parent error handling
super(error) if defined?(super)
raise GenerationProviderError, error, error.backtrace
end
end

Expand Down
2 changes: 1 addition & 1 deletion lib/active_agent/generation_provider/parameter_builder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def extract_prompt_options
options = {}

# Common options that map directly
[ :stream, :top_p, :frequency_penalty, :presence_penalty, :seed, :stop, :user ].each do |key|
[ :stream, :top_p, :frequency_penalty, :presence_penalty, :seed, :stop, :user, :plugins ].each do |key|
options[key] = @prompt.options[key] if @prompt.options.key?(key)
end

Expand Down
14 changes: 14 additions & 0 deletions test/action_prompt/prompt_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,20 @@ class PromptTest < ActiveSupport::TestCase
assert prompt.multimodal?
end

test "multimodal? handles nil messages gracefully" do
# Test with empty messages array
prompt = Prompt.new(messages: [])
assert_not prompt.multimodal?

# Test with nil message content but array in messages
prompt_with_nil = Prompt.new(message: nil, messages: [ Message.new(content: [ "image.png" ]) ])
assert prompt_with_nil.multimodal?

# Test with only nil message and empty messages
prompt_all_nil = Prompt.new(message: nil, messages: [])
assert_not prompt_all_nil.multimodal?
end

test "from_messages initializes messages from an array of Message objects" do
prompt = Prompt.new(
messages: [
Expand Down
18 changes: 9 additions & 9 deletions test/agents/open_router_agent_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ class OpenRouterAgentTest < ActiveSupport::TestCase
enable_fallbacks: true
end

# Just verify the agent can be created with these options
agent = agent_class.new
assert_not_nil agent
# Just verify the prompt can be created with these options
prompt = agent_class.with(message: "test").prompt_context
assert_not_nil prompt
end

test "it can configure provider preferences" do
Expand All @@ -53,9 +53,9 @@ class OpenRouterAgentTest < ActiveSupport::TestCase
}
end

# Just verify the agent can be created with these options
agent = agent_class.new
assert_not_nil agent
# Just verify the prompt can be created with these options
prompt = agent_class.with(message: "test").prompt_context
assert_not_nil prompt
end

test "it can enable transforms" do
Expand All @@ -66,8 +66,8 @@ class OpenRouterAgentTest < ActiveSupport::TestCase
transforms: [ "middle-out" ]
end

# Just verify the agent can be created with these options
agent = agent_class.new
assert_not_nil agent
# Just verify the prompt can be created with these options
prompt = agent_class.with(message: "test").prompt_context
assert_not_nil prompt
end
end
Loading