From eb11014672f8f2bcbf722733ea07ca2688d4b4f8 Mon Sep 17 00:00:00 2001 From: Codex Date: Fri, 20 Mar 2026 16:09:21 -0700 Subject: [PATCH 1/2] fix: handle reasoning model output in OpenAI Responses API Reasoning models (e.g. gpt-oss-120b) return a "reasoning" output block before the "message" output, both using "output_text" content type. extractResponsesOutputText now skips reasoning outputs and prefers the message output. As defense-in-depth, both parseSectionClassifiedResponse and parseCriticResponse fall back to extracting the outermost JSON array when the response contains chain-of-thought preamble. Co-Authored-By: Claude Opus 4.6 --- internal/integrations/llm/llm.go | 43 ++++++++++++- internal/integrations/llm/llm_test.go | 91 +++++++++++++++++++++++++++ 2 files changed, 133 insertions(+), 1 deletion(-) diff --git a/internal/integrations/llm/llm.go b/internal/integrations/llm/llm.go index a9656f6..bab4a3d 100644 --- a/internal/integrations/llm/llm.go +++ b/internal/integrations/llm/llm.go @@ -352,6 +352,20 @@ func loadTemplateGuidance(path string) string { return text } +// extractJSONArray finds the outermost JSON array in text that may contain +// surrounding chain-of-thought reasoning from models that ignore json_schema. +func extractJSONArray(text string) (string, bool) { + start := strings.Index(text, "[") + if start == -1 { + return "", false + } + end := strings.LastIndex(text, "]") + if end == -1 || end <= start { + return "", false + } + return text[start : end+1], true +} + func parseSectionClassifiedResponse(responseText string) (map[int64]LLMSectionDecision, error) { responseText = strings.TrimSpace(responseText) responseText = strings.TrimPrefix(responseText, "```json") @@ -361,8 +375,17 @@ func parseSectionClassifiedResponse(responseText string) (map[int64]LLMSectionDe var classified []sectionClassifiedItem if err := json.Unmarshal([]byte(responseText), &classified); err != nil { + // Some OpenAI-compatible endpoints ignore json_schema and return + // chain-of-thought reasoning around the JSON array. Try to extract it. + if extracted, ok := extractJSONArray(responseText); ok { + if err2 := json.Unmarshal([]byte(extracted), &classified); err2 == nil { + log.Printf("llm warning: response contained non-JSON preamble, extracted JSON array (len=%d)", len(extracted)) + goto parsed + } + } return nil, fmt.Errorf("parsing LLM section response: %w (response: %s)", err, responseText) } +parsed: decisions := make(map[int64]LLMSectionDecision) for _, c := range classified { @@ -739,17 +762,29 @@ func doOpenAIResponsesRequest(apiKey, baseURL string, reqBody openAIResponsesReq } func extractResponsesOutputText(resp openAIResponsesResponse) (string, error) { + // Reasoning models return a "reasoning" output followed by a "message" + // output. Prefer the "message" output; fall back to any output_text. + var fallback string for _, output := range resp.Output { + isReasoning := output.Type == "reasoning" for _, content := range output.Content { if strings.TrimSpace(content.Text) == "" { continue } switch content.Type { case "output_text", "text": - return content.Text, nil + if !isReasoning { + return content.Text, nil + } + if fallback == "" { + fallback = content.Text + } } } } + if fallback != "" { + return fallback, nil + } return "", fmt.Errorf("no structured text content in OpenAI Responses payload") } @@ -836,6 +871,12 @@ func parseCriticResponse(responseText string) ([]criticFlagged, error) { var flagged []criticFlagged if err := json.Unmarshal([]byte(responseText), &flagged); err != nil { + if extracted, ok := extractJSONArray(responseText); ok { + if err2 := json.Unmarshal([]byte(extracted), &flagged); err2 == nil { + log.Printf("llm warning: critic response contained non-JSON preamble, extracted JSON array (len=%d)", len(extracted)) + return flagged, nil + } + } truncated := responseText if len(truncated) > 512 { truncated = truncated[:512] + fmt.Sprintf("... [truncated, total_length=%d]", len(responseText)) diff --git a/internal/integrations/llm/llm_test.go b/internal/integrations/llm/llm_test.go index 3714202..1a38fa8 100644 --- a/internal/integrations/llm/llm_test.go +++ b/internal/integrations/llm/llm_test.go @@ -210,6 +210,50 @@ func TestExtractResponsesOutputText(t *testing.T) { } } +func TestExtractResponsesOutputText_ReasoningWithOutputText(t *testing.T) { + // Real-world case: reasoning output also uses "output_text" content type. + // The extractor must skip reasoning and return the message output. + resp := openAIResponsesResponse{ + Output: []struct { + Type string `json:"type"` + Role string `json:"role,omitempty"` + Content []struct { + Type string `json:"type"` + Text string `json:"text"` + } `json:"content,omitempty"` + }{ + { + Type: "reasoning", + Role: "assistant", + Content: []struct { + Type string `json:"type"` + Text string `json:"text"` + }{ + {Type: "output_text", Text: "Let me think about classifying these items..."}, + }, + }, + { + Type: "message", + Role: "assistant", + Content: []struct { + Type string `json:"type"` + Text string `json:"text"` + }{ + {Type: "output_text", Text: `[{"id":1,"section_id":"S7_0","normalized_status":"done","ticket_ids":[],"duplicate_of":""}]`}, + }, + }, + }, + } + + got, err := extractResponsesOutputText(resp) + if err != nil { + t.Fatalf("extractResponsesOutputText error: %v", err) + } + if !strings.HasPrefix(got, "[") { + t.Fatalf("expected JSON array, got reasoning text: %q", got[:50]) + } +} + func TestParseCriticResponse(t *testing.T) { response := `[ {"id": 42, "reason": "This is a database task not infra", "suggested_section_id": "S1_0"}, @@ -240,3 +284,50 @@ func TestParseCriticResponse_Empty(t *testing.T) { t.Fatalf("expected 0 flagged items, got %d", len(flagged)) } } + +func TestExtractJSONArray(t *testing.T) { + tests := []struct { + name string + input string + want string + ok bool + }{ + {"pure array", `[{"id":1}]`, `[{"id":1}]`, true}, + {"reasoning prefix", `Let me think about this.\n[{"id":1}]`, `[{"id":1}]`, true}, + {"reasoning both sides", `Some reasoning\n[{"id":1}]\nDone.`, `[{"id":1}]`, true}, + {"no array", `No JSON here`, "", false}, + {"only open bracket", `text [ but no close`, "", false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, ok := extractJSONArray(tt.input) + if ok != tt.ok { + t.Fatalf("extractJSONArray ok=%v, want %v", ok, tt.ok) + } + if got != tt.want { + t.Fatalf("extractJSONArray=%q, want %q", got, tt.want) + } + }) + } +} + +func TestParseSectionClassifiedResponse_WithReasoning(t *testing.T) { + // Simulate a model that dumps chain-of-thought before JSON + reasoning := `We need to classify these items. Let me think... +ID 228 goes to S7_0. ID 237 is UND. +[{"id":228,"section_id":"S7_0","normalized_status":"in progress","ticket_ids":[],"duplicate_of":""},{"id":237,"section_id":"UND","normalized_status":"done","ticket_ids":[],"duplicate_of":""}] +That should be correct.` + decisions, err := parseSectionClassifiedResponse(reasoning) + if err != nil { + t.Fatalf("parseSectionClassifiedResponse with reasoning: %v", err) + } + if len(decisions) != 2 { + t.Fatalf("expected 2 decisions, got %d", len(decisions)) + } + if decisions[228].SectionID != "S7_0" { + t.Errorf("expected S7_0, got %q", decisions[228].SectionID) + } + if decisions[237].SectionID != "UND" { + t.Errorf("expected UND, got %q", decisions[237].SectionID) + } +} From 98f5a465bf68d04d1643e8566e2585208ec2739a Mon Sep 17 00:00:00 2001 From: Codex Date: Fri, 20 Mar 2026 16:14:13 -0700 Subject: [PATCH 2/2] docs: add tls_skip_verify to README and CLAUDE.md Co-Authored-By: Claude Opus 4.6 --- CLAUDE.md | 1 + README.md | 3 +++ 2 files changed, 4 insertions(+) diff --git a/CLAUDE.md b/CLAUDE.md index 2809827..48a8f22 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -34,6 +34,7 @@ Configuration is layered: `config.yaml` is loaded first, then environment variab - **Nudge**: `team_members` (list of Slack full names or user IDs; used by `/check` and scheduled nudge), `nudge_day` (Monday-Sunday), `nudge_time` (HH:MM 24h format) - **Auto-fetch**: `auto_fetch_schedule` (5-field cron expression, e.g. `"0 9 * * 1-5"` for weekdays at 9am; empty to disable) - **Report**: `report_private` (bool, when true `/generate-report` DMs the report to the caller instead of posting to the channel; default false) +- **Network**: `tls_skip_verify` (bool, skip TLS cert verification for internal/corporate CAs; default false) - **Team**: `team_name` (used in report header and filename) See `config.yaml` and `README.md` for full reference. diff --git a/README.md b/README.md index e3c9914..4b3cd95 100644 --- a/README.md +++ b/README.md @@ -176,6 +176,7 @@ team_name: "Example Team" # Report channel (Slack channel ID for reminders) report_channel_id: "C01234567" external_http_timeout_seconds: 90 # optional: timeout for GitLab/GitHub/LLM HTTP calls +tls_skip_verify: false # optional: skip TLS cert verification for internal/corporate CAs ``` @@ -203,6 +204,7 @@ export LLM_CRITIC_ENABLED=true # Optional: enable generator-cri export MANAGER_SLACK_IDS="U01ABC123,U02DEF456" # Comma-separated Slack user IDs export REPORT_CHANNEL_ID=C01234567 export EXTERNAL_HTTP_TIMEOUT_SECONDS=90 # Optional: timeout for external API HTTP calls +export TLS_SKIP_VERIFY=true # Optional: skip TLS cert verification export AUTO_FETCH_SCHEDULE="0 9 * * 1-5" # Optional: cron schedule for auto-fetch export MONDAY_CUTOFF_TIME=12:00 export TIMEZONE=America/Los_Angeles @@ -224,6 +226,7 @@ Set `llm_glossary_path` / `LLM_GLOSSARY_PATH` to apply glossary memory rules (se Set `llm_critic_enabled` / `LLM_CRITIC_ENABLED` to enable a second LLM pass that reviews classifications for errors. Set `openai_base_url` / `OPENAI_BASE_URL` when `llm_provider=openai` and you want to use an OpenAI-compatible endpoint instead of `api.openai.com` (for example a lab-hosted `gpt-oss-120b` server). Set `external_http_timeout_seconds` / `EXTERNAL_HTTP_TIMEOUT_SECONDS` to tune timeout limits for GitLab/GitHub/LLM API requests. +Set `tls_skip_verify` / `TLS_SKIP_VERIFY` to skip TLS certificate verification when connecting to internal or corporate API servers with self-signed or internal CA certificates. Glossary example (`llm_glossary.yaml`):