From eb11014672f8f2bcbf722733ea07ca2688d4b4f8 Mon Sep 17 00:00:00 2001
From: Codex <codex@local>
Date: Fri, 20 Mar 2026 16:09:21 -0700
Subject: [PATCH 1/2] fix: handle reasoning model output in OpenAI Responses
 API

Reasoning models (e.g. gpt-oss-120b) return a "reasoning" output block
before the "message" output, both using "output_text" content type.
extractResponsesOutputText now skips reasoning outputs and prefers the
message output. As defense-in-depth, both parseSectionClassifiedResponse
and parseCriticResponse fall back to extracting the outermost JSON array
when the response contains chain-of-thought preamble.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 internal/integrations/llm/llm.go      | 43 ++++++++++++-
 internal/integrations/llm/llm_test.go | 91 +++++++++++++++++++++++++++
 2 files changed, 133 insertions(+), 1 deletion(-)

diff --git a/internal/integrations/llm/llm.go b/internal/integrations/llm/llm.go
index a9656f6..bab4a3d 100644
--- a/internal/integrations/llm/llm.go
+++ b/internal/integrations/llm/llm.go
@@ -352,6 +352,20 @@ func loadTemplateGuidance(path string) string {
 	return text
 }
 
+// extractJSONArray finds the outermost JSON array in text that may contain
+// surrounding chain-of-thought reasoning from models that ignore json_schema.
+func extractJSONArray(text string) (string, bool) {
+	start := strings.Index(text, "[")
+	if start == -1 {
+		return "", false
+	}
+	end := strings.LastIndex(text, "]")
+	if end == -1 || end <= start {
+		return "", false
+	}
+	return text[start : end+1], true
+}
+
 func parseSectionClassifiedResponse(responseText string) (map[int64]LLMSectionDecision, error) {
 	responseText = strings.TrimSpace(responseText)
 	responseText = strings.TrimPrefix(responseText, "```json")
@@ -361,8 +375,17 @@ func parseSectionClassifiedResponse(responseText string) (map[int64]LLMSectionDe
 
 	var classified []sectionClassifiedItem
 	if err := json.Unmarshal([]byte(responseText), &classified); err != nil {
+		// Some OpenAI-compatible endpoints ignore json_schema and return
+		// chain-of-thought reasoning around the JSON array. Try to extract it.
+		if extracted, ok := extractJSONArray(responseText); ok {
+			if err2 := json.Unmarshal([]byte(extracted), &classified); err2 == nil {
+				log.Printf("llm warning: response contained non-JSON preamble, extracted JSON array (len=%d)", len(extracted))
+				goto parsed
+			}
+		}
 		return nil, fmt.Errorf("parsing LLM section response: %w (response: %s)", err, responseText)
 	}
+parsed:
 
 	decisions := make(map[int64]LLMSectionDecision)
 	for _, c := range classified {
@@ -739,17 +762,29 @@ func doOpenAIResponsesRequest(apiKey, baseURL string, reqBody openAIResponsesReq
 }
 
 func extractResponsesOutputText(resp openAIResponsesResponse) (string, error) {
+	// Reasoning models return a "reasoning" output followed by a "message"
+	// output. Prefer the "message" output; fall back to any output_text.
+	var fallback string
 	for _, output := range resp.Output {
+		isReasoning := output.Type == "reasoning"
 		for _, content := range output.Content {
 			if strings.TrimSpace(content.Text) == "" {
 				continue
 			}
 			switch content.Type {
 			case "output_text", "text":
-				return content.Text, nil
+				if !isReasoning {
+					return content.Text, nil
+				}
+				if fallback == "" {
+					fallback = content.Text
+				}
 			}
 		}
 	}
+	if fallback != "" {
+		return fallback, nil
+	}
 	return "", fmt.Errorf("no structured text content in OpenAI Responses payload")
 }
 
@@ -836,6 +871,12 @@ func parseCriticResponse(responseText string) ([]criticFlagged, error) {
 
 	var flagged []criticFlagged
 	if err := json.Unmarshal([]byte(responseText), &flagged); err != nil {
+		if extracted, ok := extractJSONArray(responseText); ok {
+			if err2 := json.Unmarshal([]byte(extracted), &flagged); err2 == nil {
+				log.Printf("llm warning: critic response contained non-JSON preamble, extracted JSON array (len=%d)", len(extracted))
+				return flagged, nil
+			}
+		}
 		truncated := responseText
 		if len(truncated) > 512 {
 			truncated = truncated[:512] + fmt.Sprintf("... [truncated, total_length=%d]", len(responseText))
diff --git a/internal/integrations/llm/llm_test.go b/internal/integrations/llm/llm_test.go
index 3714202..1a38fa8 100644
--- a/internal/integrations/llm/llm_test.go
+++ b/internal/integrations/llm/llm_test.go
@@ -210,6 +210,50 @@ func TestExtractResponsesOutputText(t *testing.T) {
 	}
 }
 
+func TestExtractResponsesOutputText_ReasoningWithOutputText(t *testing.T) {
+	// Real-world case: reasoning output also uses "output_text" content type.
+	// The extractor must skip reasoning and return the message output.
+	resp := openAIResponsesResponse{
+		Output: []struct {
+			Type    string `json:"type"`
+			Role    string `json:"role,omitempty"`
+			Content []struct {
+				Type string `json:"type"`
+				Text string `json:"text"`
+			} `json:"content,omitempty"`
+		}{
+			{
+				Type: "reasoning",
+				Role: "assistant",
+				Content: []struct {
+					Type string `json:"type"`
+					Text string `json:"text"`
+				}{
+					{Type: "output_text", Text: "Let me think about classifying these items..."},
+				},
+			},
+			{
+				Type: "message",
+				Role: "assistant",
+				Content: []struct {
+					Type string `json:"type"`
+					Text string `json:"text"`
+				}{
+					{Type: "output_text", Text: `[{"id":1,"section_id":"S7_0","normalized_status":"done","ticket_ids":[],"duplicate_of":""}]`},
+				},
+			},
+		},
+	}
+
+	got, err := extractResponsesOutputText(resp)
+	if err != nil {
+		t.Fatalf("extractResponsesOutputText error: %v", err)
+	}
+	if !strings.HasPrefix(got, "[") {
+		t.Fatalf("expected JSON array, got reasoning text: %q", got[:50])
+	}
+}
+
 func TestParseCriticResponse(t *testing.T) {
 	response := `[
 		{"id": 42, "reason": "This is a database task not infra", "suggested_section_id": "S1_0"},
@@ -240,3 +284,50 @@ func TestParseCriticResponse_Empty(t *testing.T) {
 		t.Fatalf("expected 0 flagged items, got %d", len(flagged))
 	}
 }
+
+func TestExtractJSONArray(t *testing.T) {
+	tests := []struct {
+		name  string
+		input string
+		want  string
+		ok    bool
+	}{
+		{"pure array", `[{"id":1}]`, `[{"id":1}]`, true},
+		{"reasoning prefix", `Let me think about this.\n[{"id":1}]`, `[{"id":1}]`, true},
+		{"reasoning both sides", `Some reasoning\n[{"id":1}]\nDone.`, `[{"id":1}]`, true},
+		{"no array", `No JSON here`, "", false},
+		{"only open bracket", `text [ but no close`, "", false},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, ok := extractJSONArray(tt.input)
+			if ok != tt.ok {
+				t.Fatalf("extractJSONArray ok=%v, want %v", ok, tt.ok)
+			}
+			if got != tt.want {
+				t.Fatalf("extractJSONArray=%q, want %q", got, tt.want)
+			}
+		})
+	}
+}
+
+func TestParseSectionClassifiedResponse_WithReasoning(t *testing.T) {
+	// Simulate a model that dumps chain-of-thought before JSON
+	reasoning := `We need to classify these items. Let me think...
+ID 228 goes to S7_0. ID 237 is UND.
+[{"id":228,"section_id":"S7_0","normalized_status":"in progress","ticket_ids":[],"duplicate_of":""},{"id":237,"section_id":"UND","normalized_status":"done","ticket_ids":[],"duplicate_of":""}]
+That should be correct.`
+	decisions, err := parseSectionClassifiedResponse(reasoning)
+	if err != nil {
+		t.Fatalf("parseSectionClassifiedResponse with reasoning: %v", err)
+	}
+	if len(decisions) != 2 {
+		t.Fatalf("expected 2 decisions, got %d", len(decisions))
+	}
+	if decisions[228].SectionID != "S7_0" {
+		t.Errorf("expected S7_0, got %q", decisions[228].SectionID)
+	}
+	if decisions[237].SectionID != "UND" {
+		t.Errorf("expected UND, got %q", decisions[237].SectionID)
+	}
+}

From 98f5a465bf68d04d1643e8566e2585208ec2739a Mon Sep 17 00:00:00 2001
From: Codex <codex@local>
Date: Fri, 20 Mar 2026 16:14:13 -0700
Subject: [PATCH 2/2] docs: add tls_skip_verify to README and CLAUDE.md

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 CLAUDE.md | 1 +
 README.md | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/CLAUDE.md b/CLAUDE.md
index 2809827..48a8f22 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -34,6 +34,7 @@ Configuration is layered: `config.yaml` is loaded first, then environment variab
 - **Nudge**: `team_members` (list of Slack full names or user IDs; used by `/check` and scheduled nudge), `nudge_day` (Monday-Sunday), `nudge_time` (HH:MM 24h format)
 - **Auto-fetch**: `auto_fetch_schedule` (5-field cron expression, e.g. `"0 9 * * 1-5"` for weekdays at 9am; empty to disable)
 - **Report**: `report_private` (bool, when true `/generate-report` DMs the report to the caller instead of posting to the channel; default false)
+- **Network**: `tls_skip_verify` (bool, skip TLS cert verification for internal/corporate CAs; default false)
 - **Team**: `team_name` (used in report header and filename)
 
 See `config.yaml` and `README.md` for full reference.
diff --git a/README.md b/README.md
index e3c9914..4b3cd95 100644
--- a/README.md
+++ b/README.md
@@ -176,6 +176,7 @@ team_name: "Example Team"
 # Report channel (Slack channel ID for reminders)
 report_channel_id: "C01234567"
 external_http_timeout_seconds: 90  # optional: timeout for GitLab/GitHub/LLM HTTP calls
+tls_skip_verify: false             # optional: skip TLS cert verification for internal/corporate CAs
 
 ```
 
@@ -203,6 +204,7 @@ export LLM_CRITIC_ENABLED=true                  # Optional: enable generator-cri
 export MANAGER_SLACK_IDS="U01ABC123,U02DEF456"  # Comma-separated Slack user IDs
 export REPORT_CHANNEL_ID=C01234567
 export EXTERNAL_HTTP_TIMEOUT_SECONDS=90          # Optional: timeout for external API HTTP calls
+export TLS_SKIP_VERIFY=true                      # Optional: skip TLS cert verification
 export AUTO_FETCH_SCHEDULE="0 9 * * 1-5"        # Optional: cron schedule for auto-fetch
 export MONDAY_CUTOFF_TIME=12:00
 export TIMEZONE=America/Los_Angeles
@@ -224,6 +226,7 @@ Set `llm_glossary_path` / `LLM_GLOSSARY_PATH` to apply glossary memory rules (se
 Set `llm_critic_enabled` / `LLM_CRITIC_ENABLED` to enable a second LLM pass that reviews classifications for errors.
 Set `openai_base_url` / `OPENAI_BASE_URL` when `llm_provider=openai` and you want to use an OpenAI-compatible endpoint instead of `api.openai.com` (for example a lab-hosted `gpt-oss-120b` server).
 Set `external_http_timeout_seconds` / `EXTERNAL_HTTP_TIMEOUT_SECONDS` to tune timeout limits for GitLab/GitHub/LLM API requests.
+Set `tls_skip_verify` / `TLS_SKIP_VERIFY` to skip TLS certificate verification when connecting to internal or corporate API servers with self-signed or internal CA certificates.
 
 Glossary example (`llm_glossary.yaml`):