Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions lib/active_agent/action_prompt/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -309,15 +309,17 @@ def prompt(headers = {}, &block)
context.options.merge!(merged_options)

content_type = headers[:content_type]

headers = apply_defaults(headers)
context.messages = headers[:messages] || []
context.context_id = headers[:context_id]
context.params = params

context.output_schema = load_schema(headers[:output_schema], set_prefixes(headers[:output_schema], lookup_context.prefixes))

context.charset = charset = headers[:charset]

headers = prepare_message(headers)

headers = prepare_message(headers)
# wrap_generation_behavior!(headers[:generation_method], headers[:generation_method_options])
# assign_headers_to_context(context, headers)
responses = collect_responses(headers, &block)
Expand All @@ -327,6 +329,7 @@ def prompt(headers = {}, &block)
create_parts_from_responses(context, responses)

context.content_type = set_content_type(context, content_type, headers[:content_type])

context.charset = charset
context.actions = headers[:actions] || action_schemas

Expand Down Expand Up @@ -367,11 +370,11 @@ def load_input_data(headers)
]
elsif headers[:file_data].present?
headers[:body] = [
ActiveAgent::ActionPrompt::Message.new(content: headers[:file_data], content_type: "file_data"),
ActiveAgent::ActionPrompt::Message.new(content: headers[:file_data], metadata: { filename: "resume.pdf" }, content_type: "file_data"),
ActiveAgent::ActionPrompt::Message.new(content: headers[:body], content_type: "input_text")
]
end

headers
end

Expand Down
2 changes: 1 addition & 1 deletion lib/active_agent/action_prompt/prompt.rb
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def initialize(attributes = {})
end

def multimodal?
@multimodal ||= @message&.content.is_a?(Array) || @messages.any? { |m| m.content.is_a?(Array) }
@multimodal ||= @message&.content.is_a?(Array) || @messages.any? { |m| m.content.is_a?(Array) }
end

def messages=(messages)
Expand Down
1 change: 1 addition & 0 deletions lib/active_agent/base.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# frozen_string_literal: true

require "active_agent/action_prompt"
require "active_agent/prompt_helper"
require "active_agent/action_prompt/base"
Expand Down
5 changes: 3 additions & 2 deletions lib/active_agent/generation_provider/open_ai_provider.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
require "active_agent/action_prompt/action"
require_relative "base"
require_relative "response"
require_relative "responses_adapter"

module ActiveAgent
module GenerationProvider
Expand Down Expand Up @@ -127,7 +128,7 @@ def responses_response(response)
role: message_json["role"].intern,
action_requested: message_json["finish_reason"] == "tool_calls",
raw_actions: message_json["tool_calls"] || [],
requested_actions: handle_actions(message_json["tool_calls"])
content_type: prompt.output_schema.present? ? "application/json" : "text/plain",
)

@response = ActiveAgent::GenerationProvider::Response.new(prompt: prompt, message: message, raw_response: response)
Expand Down Expand Up @@ -172,7 +173,7 @@ def responses_prompt(parameters: responses_parameters)
def responses_parameters(model: @prompt.options[:model] || @model_name, messages: @prompt.messages, temperature: @prompt.options[:temperature] || @config["temperature"] || 0.7, tools: @prompt.actions, structured_output: @prompt.output_schema)
{
model: model,
input: ActiveAgent::GenerationProvider::OpenAIAdapters::ResponsesAdapter.new(@prompt).input,
input: ActiveAgent::GenerationProvider::ResponsesAdapter.new(@prompt).input,
tools: tools.presence,
text: structured_output
}.compact
Expand Down
72 changes: 35 additions & 37 deletions lib/active_agent/generation_provider/responses_adapter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,45 +2,43 @@

module ActiveAgent
module GenerationProvider
module OpenAIAdapters
class ResponsesAdapter < BaseAdapter
def initialize(prompt)
super(prompt)
@prompt = prompt
end
class ResponsesAdapter < BaseAdapter
def initialize(prompt)
super(prompt)
@prompt = prompt
end

def input
messages.map do |message|
if message.content.is_a?(Array)
{
role: message.role,
content: message.content.map do |content_part|
if content_part.is_a?(String)
{ type: "input_text", text: content_part }
elsif content_part.is_a?(ActiveAgent::ActionPrompt::Message) && content_part.content_type == "input_text"
{ type: "input_text", text: content_part.content }
elsif content_part.is_a?(ActiveAgent::ActionPrompt::Message) && content_part.content_type == "image_data"
{ type: "input_image", image_url: content_part.content }
elsif content_part.is_a?(ActiveAgent::ActionPrompt::Message) && content_part.content_type == "file_data"
{ type: "input_file", filename: content_part.metadata[:filename], file_data: content_part.content }
else
raise ArgumentError, "Unsupported content type in message"
end
end.compact
}
else
{
role: message.role,
content: message.content
}
end
end.compact
end
def input
messages.map do |message|
if message.content.is_a?(Array)
{
role: message.role,
content: message.content.map do |content_part|
if content_part.is_a?(String)
{ type: "input_text", text: content_part }
elsif content_part.is_a?(ActiveAgent::ActionPrompt::Message) && content_part.content_type == "input_text"
{ type: "input_text", text: content_part.content }
elsif content_part.is_a?(ActiveAgent::ActionPrompt::Message) && content_part.content_type == "image_data"
{ type: "input_image", image_url: content_part.content }
elsif content_part.is_a?(ActiveAgent::ActionPrompt::Message) && content_part.content_type == "file_data"
{ type: "input_file", filename: content_part.metadata[:filename], file_data: content_part.content }
else
raise ArgumentError, "Unsupported content type in message"
end
end.compact
}
else
{
role: message.role,
content: message.content
}
end
end.compact
end

def messages
prompt.messages
end
def messages
prompt.messages
end
end
end
end
end
4 changes: 2 additions & 2 deletions test/action_prompt/prompt_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,12 @@ class PromptTest < ActiveSupport::TestCase
end

test "multimodal? returns true if message content is an array" do
prompt = Prompt.new(message: Message.new(content: ["image1.png", "image2.png"]))
prompt = Prompt.new(message: Message.new(content: [ "image1.png", "image2.png" ]))
assert prompt.multimodal?
end

test "multimodal? returns true if any message content is an array" do
prompt = Prompt.new(messages: [ Message.new(content: "text"), Message.new(content:["image1.png", "image2.png"]) ])
prompt = Prompt.new(messages: [ Message.new(content: "text"), Message.new(content: [ "image1.png", "image2.png" ]) ])
assert prompt.multimodal?
end

Expand Down
95 changes: 94 additions & 1 deletion test/agents/data_extraction_agent_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,97 @@ class DataExtractionAgentTest < ActiveSupport::TestCase
assert_equal response.message.content, "The cat in the image has a sleek, short coat that appears to be a grayish-brown color. Its eyes are large and striking, with a vivid green hue. The cat is sitting comfortably, being gently petted by a hand that is adorned with a bracelet. Overall, it has a calm and curious expression. The background features a dark, soft surface, adding to the cozy atmosphere of the scene."
end
end
end

test "parse_resume creates a multimodal prompt with file data" do
prompt = nil
VCR.use_cassette("data_extraction_agent_parse_resume") do
prompt = DataExtractionAgent.with(file_path: Rails.root.join("..", "..", "test", "fixtures", "files", "sample_resume.pdf")).parse_content

assert_equal "multipart/mixed", prompt.content_type
assert prompt.multimodal?
assert prompt.message.content.is_a?(Array)
assert_equal 2, prompt.message.content.size
end

VCR.use_cassette("data_extraction_agent_parse_resume_generation_response") do
response = prompt.generate_now

assert response.message.content.include?("John Doe")
assert response.message.content.include?("Software Engineer")
end
end

test "parse_resume creates a multimodal prompt with file data with structured output schema" do
prompt = nil
VCR.use_cassette("data_extraction_agent_parse_resume_with_structured_output") do
prompt = DataExtractionAgent.with(output_schema: :resume_schema, file_path: Rails.root.join("..", "..", "test", "fixtures", "files", "sample_resume.pdf")).parse_content

assert_equal "multipart/mixed", prompt.content_type
assert prompt.multimodal?, "Prompt should be multimodal with file data"
assert prompt.message.content.is_a?(Array), "Prompt message content should be an array for multimodal support"
assert_equal 2, prompt.message.content.size
end

VCR.use_cassette("data_extraction_agent_parse_resume_generation_response_with_structured_output") do
response = prompt.generate_now
json_response = JSON.parse(response.message.content)
assert_equal "application/json", response.message.content_type

assert_equal "resume_schema", response.prompt.output_schema["format"]["name"]
assert_equal json_response["name"], "John Doe"
assert_equal json_response["email"], "john.doe@example.com"
assert_equal response.message.content, "{\"name\":\"John Doe\",\"email\":\"john.doe@example.com\",\"phone\":\"(555) 123-4567\",\"education\":[{\"degree\":\"BS Computer Science\",\"institution\":\"Stanford University\",\"year\":2020}],\"experience\":[{\"job_title\":\"Senior Software Engineer\",\"company\":\"TechCorp\",\"duration\":\"2020-2024\"}]}"
assert response.message.content.include?("John Doe")
assert response.message.content.include?("Software Engineer")
end
end

test "parse_chart content from image data" do
prompt = nil
VCR.use_cassette("data_extraction_agent_parse_chart") do
prompt = DataExtractionAgent.with(image_path: Rails.root.join("..", "..", "test", "fixtures", "images", "sales_chart.png")).parse_content

assert_equal "multipart/mixed", prompt.content_type
assert prompt.multimodal?, "Prompt should be multimodal with image data"
assert prompt.message.content.is_a?(Array)
assert_equal 2, prompt.message.content.size
end

VCR.use_cassette("data_extraction_agent_parse_chart_generation_response") do
response = prompt.generate_now

assert_equal response.message.content, "The graph titled \"Quarterly Sales Report\" displays sales revenue for four quarters in 2024. Key points include:\n\n- **Q1**: Blue bar represents the lowest sales revenue.\n- **Q2**: Green bar shows an increase in sales compared to Q1.\n- **Q3**: Yellow bar continues the upward trend with higher sales than Q2.\n- **Q4**: Red bar indicates the highest sales revenue of the year.\n\nOverall, there is a clear upward trend in sales revenue over the quarters, reaching a peak in Q4."
end
end

test "parse_chart content from image data with structured output schema" do
prompt = nil
VCR.use_cassette("data_extraction_agent_parse_chart_with_structured_output") do
prompt = DataExtractionAgent.with(output_schema: :chart_schema, image_path: Rails.root.join("..", "..", "test", "fixtures", "images", "sales_chart.png")).parse_content

assert_equal "multipart/mixed", prompt.content_type
assert prompt.multimodal?, "Prompt should be multimodal with image data"
assert prompt.message.content.is_a?(Array)
assert_equal 2, prompt.message.content.size
end

VCR.use_cassette("data_extraction_agent_parse_chart_generation_response_with_structured_output") do
response = prompt.generate_now
json_response = JSON.parse(response.message.content)
assert_equal "application/json", response.message.content_type

assert_equal "chart_schema", response.prompt.output_schema["format"]["name"]

assert_equal json_response["title"], "Quarterly Sales Report"
assert json_response["data_points"].is_a?(Array), "Data points should be an array"
assert_equal json_response["data_points"].first["label"], "Q1"
assert_equal json_response["data_points"].first["value"], 25000
assert_equal json_response["data_points"][1]["label"], "Q2"
assert_equal json_response["data_points"][1]["value"], 50000
assert_equal json_response["data_points"][2]["label"], "Q3"
assert_equal json_response["data_points"][2]["value"], 75000
assert_equal json_response["data_points"].last["label"], "Q4"
assert_equal json_response["data_points"].last["value"], 100000
end
end
end
17 changes: 16 additions & 1 deletion test/dummy/app/agents/data_extraction_agent.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,20 @@
class DataExtractionAgent < ApplicationAgent
before_action :set_multimodal_content, only: [ :parse_content ]

def describe_cat_image
prompt(message: "Describe the cat in the image", image_data: CatImageService.fetch_base64_image)
end
end

def parse_content
prompt(message: "Parse the resume", image_data: @image_data, file_data: @file_data, output_schema: params[:output_schema])
end

private
def set_multimodal_content
if params[:file_path].present?
@file_data ||= "data:application/pdf;base64,#{Base64.encode64(File.read(params[:file_path]))}"
elsif params[:image_path].present?
@image_data ||= "data:image/jpeg;base64,#{Base64.encode64(File.read(params[:image_path]))}"
end
end
end
40 changes: 40 additions & 0 deletions test/dummy/app/views/data_extraction_agent/chart_schema.json.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
"format": {
"type": "json_schema",
"name": "chart_schema",
"schema": {
"type": "object",
"properties": {
"title": {
"type": "string",
"description": "The title of the chart."
},
"data_points": {
"type": "array",
"items": {
"$ref": "#/$defs/data_point"
}
}
},
"required": ["title", "data_points"],
"additionalProperties": false,
"$defs": {
"data_point": {
"type": "object",
"properties": {
"label": {
"type": "string",
"description": "The label for the data point."
},
"value": {
"type": "number",
"description": "The value of the data point."
}
},
"required": ["label", "value"],
"additionalProperties": false
}
}
}
}
}
Loading