|
21 | 21 | import com.google.common.collect.ImmutableList; |
22 | 22 | import com.google.common.collect.ImmutableMap; |
23 | 23 | import com.google.common.collect.Iterables; |
| 24 | +import com.google.genai.types.Blob; |
24 | 25 | import com.google.genai.types.Candidate; |
25 | 26 | import com.google.genai.types.Content; |
26 | 27 | import com.google.genai.types.FinishReason; |
|
29 | 30 | import com.google.genai.types.GenerateContentResponseUsageMetadata; |
30 | 31 | import com.google.genai.types.Part; |
31 | 32 | import com.google.genai.types.PartialArg; |
| 33 | +import com.google.genai.types.ToolCall; |
| 34 | +import com.google.genai.types.ToolResponse; |
32 | 35 | import io.reactivex.rxjava3.core.Flowable; |
33 | 36 | import io.reactivex.rxjava3.functions.Predicate; |
34 | 37 | import io.reactivex.rxjava3.subscribers.TestSubscriber; |
@@ -368,6 +371,126 @@ public void processRawResponses_twoStreamingFunctionCalls_keepArgsSeparate() { |
368 | 371 | assertThat(second.args().get()).containsExactly("b", "2"); |
369 | 372 | } |
370 | 373 |
|
| 374 | + @Test |
| 375 | + public void processRawResponses_imageOnlyWithStop_emitsFinalImagePart() { |
| 376 | + Part imagePart = Part.fromBytes(new byte[] {1, 2, 3}, "image/png"); |
| 377 | + GenerateContentResponse imageWithStop = |
| 378 | + toResponse( |
| 379 | + Candidate.builder() |
| 380 | + .content(Content.builder().role("model").parts(imagePart).build()) |
| 381 | + .finishReason(new FinishReason(FinishReason.Known.STOP)) |
| 382 | + .build()); |
| 383 | + |
| 384 | + ImmutableList<LlmResponse> responses = |
| 385 | + ImmutableList.copyOf( |
| 386 | + Gemini.processRawResponses(Flowable.just(imageWithStop)).blockingIterable()); |
| 387 | + |
| 388 | + LlmResponse finalResponse = Iterables.getLast(responses); |
| 389 | + assertThat(finalResponse.content().get().parts().get()).hasSize(1); |
| 390 | + assertThat(finalResponse.content().get().parts().get().get(0).inlineData()).isPresent(); |
| 391 | + } |
| 392 | + |
| 393 | + // Image-generation models return image bytes as an inline-data part, often alongside text. |
| 394 | + // Regression test: the aggregated response must retain the image, not just the text. |
| 395 | + @Test |
| 396 | + public void processRawResponses_textThenImageWithStop_finalKeepsTextAndImage() { |
| 397 | + Part imagePart = Part.fromBytes(new byte[] {1, 2, 3}, "image/png"); |
| 398 | + GenerateContentResponse textChunk = toResponseWithText("Here is your image:"); |
| 399 | + GenerateContentResponse imageWithStop = |
| 400 | + toResponse( |
| 401 | + Candidate.builder() |
| 402 | + .content(Content.builder().role("model").parts(imagePart).build()) |
| 403 | + .finishReason(new FinishReason(FinishReason.Known.STOP)) |
| 404 | + .build()); |
| 405 | + |
| 406 | + ImmutableList<LlmResponse> responses = |
| 407 | + ImmutableList.copyOf( |
| 408 | + Gemini.processRawResponses(Flowable.just(textChunk, imageWithStop)).blockingIterable()); |
| 409 | + |
| 410 | + LlmResponse finalResponse = Iterables.getLast(responses); |
| 411 | + assertThat(finalResponse.content().get().parts().get()).hasSize(2); |
| 412 | + assertThat(finalResponse.content().get().parts().get().get(0).text()) |
| 413 | + .hasValue("Here is your image:"); |
| 414 | + assertThat(finalResponse.content().get().parts().get().get(1).inlineData()).isPresent(); |
| 415 | + } |
| 416 | + |
| 417 | + // The aggregator must pass through any non-text, non-function-call part, not just an allowlist. |
| 418 | + // These guard the part types that were being silently dropped: server-side tool calls/responses |
| 419 | + // and function responses. Each uses a text-then-part sequence so the part must survive the final |
| 420 | + // aggregation (a lone part would otherwise slip through via the empty-sequence fallback). |
| 421 | + @Test |
| 422 | + public void processRawResponses_textThenToolCall_finalKeepsBoth() { |
| 423 | + Part toolCallPart = |
| 424 | + Part.builder() |
| 425 | + .toolCall(ToolCall.builder().id("tc-1").args(ImmutableMap.of("q", "weather")).build()) |
| 426 | + .build(); |
| 427 | + |
| 428 | + LlmResponse finalResponse = aggregateTextThenPart(toolCallPart); |
| 429 | + |
| 430 | + assertThat(finalResponse.content().get().parts().get()).hasSize(2); |
| 431 | + assertThat(finalResponse.content().get().parts().get().get(1).toolCall()).isPresent(); |
| 432 | + } |
| 433 | + |
| 434 | + @Test |
| 435 | + public void processRawResponses_textThenToolResponse_finalKeepsBoth() { |
| 436 | + Part toolResponsePart = |
| 437 | + Part.builder() |
| 438 | + .toolResponse( |
| 439 | + ToolResponse.builder().id("tc-1").response(ImmutableMap.of("ok", true)).build()) |
| 440 | + .build(); |
| 441 | + |
| 442 | + LlmResponse finalResponse = aggregateTextThenPart(toolResponsePart); |
| 443 | + |
| 444 | + assertThat(finalResponse.content().get().parts().get()).hasSize(2); |
| 445 | + assertThat(finalResponse.content().get().parts().get().get(1).toolResponse()).isPresent(); |
| 446 | + } |
| 447 | + |
| 448 | + @Test |
| 449 | + public void processRawResponses_textThenFunctionResponse_finalKeepsBoth() { |
| 450 | + Part functionResponsePart = Part.fromFunctionResponse("my_tool", ImmutableMap.of("result", 42)); |
| 451 | + |
| 452 | + LlmResponse finalResponse = aggregateTextThenPart(functionResponsePart); |
| 453 | + |
| 454 | + assertThat(finalResponse.content().get().parts().get()).hasSize(2); |
| 455 | + assertThat(finalResponse.content().get().parts().get().get(1).functionResponse()).isPresent(); |
| 456 | + } |
| 457 | + |
| 458 | + // Per the Gemini docs, a data part (e.g. inlineData) can carry a thoughtSignature with the |
| 459 | + // thought |
| 460 | + // flag unset (multi-turn image editing). The part must be kept verbatim, and its signature must |
| 461 | + // not leak onto the preceding text part (the docs forbid putting a signature on a part that did |
| 462 | + // not originally carry one). |
| 463 | + @Test |
| 464 | + public void processRawResponses_textThenDataPartWithSignature_keepsSignatureOnDataPartOnly() { |
| 465 | + Part imageWithSignature = |
| 466 | + Part.builder() |
| 467 | + .inlineData(Blob.builder().mimeType("image/png").data(new byte[] {1, 2, 3}).build()) |
| 468 | + .thoughtSignature("sig".getBytes(UTF_8)) |
| 469 | + .build(); |
| 470 | + |
| 471 | + LlmResponse finalResponse = aggregateTextThenPart(imageWithSignature); |
| 472 | + |
| 473 | + assertThat(finalResponse.content().get().parts().get()).hasSize(2); |
| 474 | + assertThat(finalResponse.content().get().parts().get().get(0).text()) |
| 475 | + .hasValue("Working on it:"); |
| 476 | + assertThat(finalResponse.content().get().parts().get().get(0).thoughtSignature()).isEmpty(); |
| 477 | + assertThat(finalResponse.content().get().parts().get().get(1).inlineData()).isPresent(); |
| 478 | + assertThat(finalResponse.content().get().parts().get().get(1).thoughtSignature()).isPresent(); |
| 479 | + } |
| 480 | + |
| 481 | + private LlmResponse aggregateTextThenPart(Part part) { |
| 482 | + GenerateContentResponse textChunk = toResponseWithText("Working on it:"); |
| 483 | + GenerateContentResponse partWithStop = |
| 484 | + toResponse( |
| 485 | + Candidate.builder() |
| 486 | + .content(Content.builder().role("model").parts(part).build()) |
| 487 | + .finishReason(new FinishReason(FinishReason.Known.STOP)) |
| 488 | + .build()); |
| 489 | + return Iterables.getLast( |
| 490 | + ImmutableList.copyOf( |
| 491 | + Gemini.processRawResponses(Flowable.just(textChunk, partWithStop)).blockingIterable())); |
| 492 | + } |
| 493 | + |
371 | 494 | @Test |
372 | 495 | public void processRawResponses_textAndStopReason_emitsPartialThenFinalText() { |
373 | 496 | Flowable<GenerateContentResponse> rawResponses = |
|
0 commit comments