Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

FIX: Malformed message in systemless + inline img scenario #771

Merged
merged 1 commit into from
Aug 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion lib/ai_helper/assistant.rb
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,6 @@ def generate_image_caption(upload, user)
upload_ids: [upload.id],
},
],
skip_validations: true,
)

DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_helper_image_caption_model).generate(
Expand Down
17 changes: 11 additions & 6 deletions lib/completions/dialects/open_ai_compatible.rb
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,16 @@ def translate

return translated unless llm_model.lookup_custom_param("disable_system_prompt")

system_and_user_msgs = translated.shift(2)
user_msg = system_and_user_msgs.last
user_msg[:content] = [system_and_user_msgs.first[:content], user_msg[:content]].join("\n")
system_msg, user_msg = translated.shift(2)

if user_msg[:content].is_a?(Array) # Has inline images.
user_msg[:content].first[:text] = [
system_msg[:content],
user_msg[:content].first[:text],
].join("\n")
else
user_msg[:content] = [system_msg[:content], user_msg[:content]].join("\n")
end

translated.unshift(user_msg)
end
Expand Down Expand Up @@ -79,16 +86,14 @@ def inline_images(content, message)
return content if encoded_uploads.blank?

content_w_imgs =
encoded_uploads.reduce([]) do |memo, details|
encoded_uploads.reduce([{ type: "text", text: message[:content] }]) do |memo, details|
memo << {
type: "image_url",
image_url: {
url: "data:#{details[:mime_type]};base64,#{details[:base64]}",
},
}
end

content_w_imgs << { type: "text", text: message[:content] }
end
end
end
Expand Down
4 changes: 0 additions & 4 deletions lib/completions/prompt.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ def initialize(
system_message_text = nil,
messages: [],
tools: [],
skip_validations: false,
topic_id: nil,
post_id: nil,
max_pixels: nil
Expand All @@ -26,7 +25,6 @@ def initialize(
@post_id = post_id

@messages = []
@skip_validations = skip_validations

if system_message_text
system_message = { type: :system, content: system_message_text }
Expand Down Expand Up @@ -68,7 +66,6 @@ def encoded_uploads(message)
private

def validate_message(message)
return if @skip_validations
valid_types = %i[system user model tool tool_call]
if !valid_types.include?(message[:type])
raise ArgumentError, "message type must be one of #{valid_types}"
Expand All @@ -91,7 +88,6 @@ def validate_message(message)
end

def validate_turn(last_turn, new_turn)
return if @skip_validations
valid_types = %i[tool tool_call model user]
raise INVALID_TURN if !valid_types.include?(new_turn[:type])

Expand Down
49 changes: 47 additions & 2 deletions spec/lib/completions/dialects/open_ai_compatible_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

RSpec.describe DiscourseAi::Completions::Dialects::OpenAiCompatible do
context "when system prompts are disabled" do
fab!(:model) do
Fabricate(:vllm_model, vision_enabled: true, provider_params: { disable_system_prompt: true })
end

it "merges the system prompt into the first message" do
system_msg = "This is a system message"
user_msg = "user message"
Expand All @@ -11,15 +15,56 @@
messages: [{ type: :user, content: user_msg }],
)

model = Fabricate(:vllm_model, provider_params: { disable_system_prompt: true })

translated_messages = described_class.new(prompt, model).translate

expect(translated_messages.length).to eq(1)
expect(translated_messages).to contain_exactly(
{ role: "user", content: [system_msg, user_msg].join("\n") },
)
end

context "when the prompt has inline images" do
let(:image100x100) { plugin_file_from_fixtures("100x100.jpg") }

it "produces a valid message" do
upload = UploadCreator.new(image100x100, "image.jpg").create_for(Discourse.system_user.id)
prompt =
DiscourseAi::Completions::Prompt.new(
"You are a bot specializing in image captioning.",
messages: [
{
type: :user,
content: "Describe this image in a single sentence.",
upload_ids: [upload.id],
},
],
)
encoded_upload =
DiscourseAi::Completions::UploadEncoder.encode(
upload_ids: [upload.id],
max_pixels: prompt.max_pixels,
).first

translated_messages = described_class.new(prompt, model).translate

expect(translated_messages.length).to eq(1)

expected_user_message = {
role: "user",
content: [
{ type: "text", text: prompt.messages.map { |m| m[:content] }.join("\n") },
{
type: "image_url",
image_url: {
url: "data:#{encoded_upload[:mime_type]};base64,#{encoded_upload[:base64]}",
},
},
],
}

expect(translated_messages).to contain_exactly(expected_user_message)
end
end
end

context "when system prompts are enabled" do
Expand Down