diff --git a/src/main/java/com/williamcallahan/javachat/service/OpenAIStreamingService.java b/src/main/java/com/williamcallahan/javachat/service/OpenAIStreamingService.java index 3c93394..217d039 100644 --- a/src/main/java/com/williamcallahan/javachat/service/OpenAIStreamingService.java +++ b/src/main/java/com/williamcallahan/javachat/service/OpenAIStreamingService.java @@ -168,7 +168,6 @@ public Mono streamResponse(StructuredPrompt structuredPrompt, d * @return completion text from the first successful provider attempt */ public Mono complete(String prompt, double temperature) { - String truncatedPrompt = requestFactory.truncatePromptForCompletion(prompt); return Mono.defer(() -> { List availableProviders = providerRoutingService.selectAvailableProviderCandidates(clientPrimary, clientSecondary); @@ -185,7 +184,7 @@ public Mono complete(String prompt, double temperature) { RateLimitService.ApiProvider activeProvider = providerCandidate.provider(); ResponseCreateParams requestParameters = - requestFactory.buildCompletionRequest(truncatedPrompt, temperature, activeProvider); + requestFactory.buildCompletionRequest(prompt, temperature, activeProvider); try { log.info("[LLM] Complete started (providerId={})", activeProvider.ordinal()); RequestOptions requestOptions = RequestOptions.builder() diff --git a/src/main/java/com/williamcallahan/javachat/service/OpenAiRequestFactory.java b/src/main/java/com/williamcallahan/javachat/service/OpenAiRequestFactory.java index f331a24..aa353e1 100644 --- a/src/main/java/com/williamcallahan/javachat/service/OpenAiRequestFactory.java +++ b/src/main/java/com/williamcallahan/javachat/service/OpenAiRequestFactory.java @@ -114,7 +114,8 @@ public ResponseCreateParams buildCompletionRequest( String prompt, double temperature, RateLimitService.ApiProvider provider) { boolean useGitHubModels = provider == RateLimitService.ApiProvider.GITHUB_MODELS; String modelId = normalizedModelId(useGitHubModels); - return buildResponseParams(prompt, temperature, modelId); + String truncatedPrompt = truncatePromptForCompletion(prompt, modelId); + return buildResponseParams(truncatedPrompt, temperature, modelId); } /** @@ -124,16 +125,29 @@ public ResponseCreateParams buildCompletionRequest( * @return original prompt when no truncation is required, otherwise a notice-prefixed prompt */ public String truncatePromptForCompletion(String prompt) { + return truncatePromptForCompletion(prompt, RateLimitService.ApiProvider.OPENAI); + } + + /** + * Truncates completion prompts to token limits for the selected provider's model. + * + * @param prompt full completion prompt + * @param provider provider chosen for this request attempt + * @return original prompt when no truncation is required, otherwise a notice-prefixed prompt + */ + public String truncatePromptForCompletion(String prompt, RateLimitService.ApiProvider provider) { + boolean useGitHubModels = provider == RateLimitService.ApiProvider.GITHUB_MODELS; + String modelId = normalizedModelId(useGitHubModels); + return truncatePromptForCompletion(prompt, modelId); + } + + private String truncatePromptForCompletion(String prompt, String modelId) { if (prompt == null || prompt.isEmpty()) { return prompt; } - String openaiModelId = normalizedModelId(false); - String githubModelId = normalizedModelId(true); - boolean gpt5Family = isGpt5Family(openaiModelId) || isGpt5Family(githubModelId); - boolean reasoningModel = gpt5Family - || canonicalModelName(openaiModelId).startsWith("o") - || canonicalModelName(githubModelId).startsWith("o"); + boolean gpt5Family = isGpt5Family(modelId); + boolean reasoningModel = gpt5Family || canonicalModelName(modelId).startsWith("o"); int tokenLimit = reasoningModel ? MAX_TOKENS_GPT5_INPUT : MAX_TOKENS_DEFAULT_INPUT; String truncatedPrompt = chunker.keepLastTokens(prompt, tokenLimit); diff --git a/src/test/java/com/williamcallahan/javachat/service/OpenAiRequestFactoryTest.java b/src/test/java/com/williamcallahan/javachat/service/OpenAiRequestFactoryTest.java index f1032af..643d6ec 100644 --- a/src/test/java/com/williamcallahan/javachat/service/OpenAiRequestFactoryTest.java +++ b/src/test/java/com/williamcallahan/javachat/service/OpenAiRequestFactoryTest.java @@ -48,4 +48,29 @@ void buildCompletionRequestRetainsQualifiedGitHubModelIdentifier() { assertTrue(responseCreateParams.maxOutputTokens().isEmpty()); assertEquals(0.25, responseCreateParams.temperature().orElseThrow(), 0.000_001); } + + @Test + void truncatePromptForCompletionUsesSelectedOpenAiModelLimit() { + OpenAiRequestFactory requestFactory = + new OpenAiRequestFactory(new Chunker(), new PromptTruncator(), "gpt-4o", "openai/gpt-5", ""); + String prompt = "context ".repeat(8_000); + + String truncatedPrompt = + requestFactory.truncatePromptForCompletion(prompt, RateLimitService.ApiProvider.OPENAI); + + assertEquals(prompt, truncatedPrompt); + } + + @Test + void truncatePromptForCompletionUsesSelectedGitHubModelsLimit() { + OpenAiRequestFactory requestFactory = + new OpenAiRequestFactory(new Chunker(), new PromptTruncator(), "gpt-4o", "gpt-5", ""); + String prompt = "context ".repeat(8_000); + + String truncatedPrompt = + requestFactory.truncatePromptForCompletion(prompt, RateLimitService.ApiProvider.GITHUB_MODELS); + + assertTrue(truncatedPrompt.startsWith("[Context truncated due to GPT-5 8K input limit]")); + assertTrue(truncatedPrompt.length() < prompt.length()); + } }