From c2498273f48afcc0d4e4434087c083f5f2dd794b Mon Sep 17 00:00:00 2001
From: Peli de Halleux <pelikhan@users.noreply.github.com>
Date: Wed, 13 Aug 2025 00:33:09 +0000
Subject: [PATCH] Enhance timestamp and token usage extraction for Codex format
 in logs

---
 pkg/cli/logs.go      |  5 ++-
 pkg/cli/logs_test.go | 91 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 95 insertions(+), 1 deletion(-)

diff --git a/pkg/cli/logs.go b/pkg/cli/logs.go
index 1ca1481c2d..99237b74d6 100644
--- a/pkg/cli/logs.go
+++ b/pkg/cli/logs.go
@@ -533,12 +533,14 @@ func extractTimestamp(line string) time.Time {
 	patterns := []string{
 		"2006-01-02T15:04:05Z",
 		"2006-01-02T15:04:05.000Z",
+		"2006-01-02T15:04:05", // Codex format without Z
 		"2006-01-02 15:04:05",
 		"Jan 02 15:04:05",
 	}
 
 	// First try to extract the timestamp string from the line
-	timestampRegex := regexp.MustCompile(`(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z)`)
+	// Updated regex to handle timestamps both with and without Z, and in brackets
+	timestampRegex := regexp.MustCompile(`(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})Z?`)
 	matches := timestampRegex.FindStringSubmatch(line)
 	if len(matches) > 1 {
 		timestampStr := matches[1]
@@ -561,6 +563,7 @@ func extractTokenUsage(line string) int {
 		`input[_\s]tokens[:\s]+(\d+)`,
 		`output[_\s]tokens[:\s]+(\d+)`,
 		`total[_\s]tokens[_\s]used[:\s]+(\d+)`,
+		`tokens\s+used[:\s]+(\d+)`, // Codex format: "tokens used: 13934"
 	}
 
 	for _, pattern := range patterns {
diff --git a/pkg/cli/logs_test.go b/pkg/cli/logs_test.go
index 93e3060183..06d8493d30 100644
--- a/pkg/cli/logs_test.go
+++ b/pkg/cli/logs_test.go
@@ -38,6 +38,8 @@ func TestExtractTokenUsage(t *testing.T) {
 		{"token_count: 567", 567},
 		{"input_tokens: 890", 890},
 		{"Total tokens used: 999", 999},
+		{"tokens used: 13934", 13934},                       // Codex format
+		{"[2025-08-13T00:24:50] tokens used: 13934", 13934}, // Codex format with timestamp
 		{"no token info here", 0},
 		{"tokens: invalid", 0},
 	}
@@ -480,3 +482,92 @@ Claude processing request...
 		t.Errorf("Expected duration %v, got %v", expectedDuration, metrics.Duration)
 	}
 }
+
+func TestParseLogFileWithCodexFormat(t *testing.T) {
+	// Create a temporary log file with the Codex output format from the issue
+	tmpDir := t.TempDir()
+	logFile := filepath.Join(tmpDir, "test-codex.log")
+
+	// This is the exact Codex format provided in the issue
+	logContent := `[2025-08-13T00:24:45] Starting Codex workflow execution
+[2025-08-13T00:24:50] codex
+
+I'm ready to generate a Codex PR summary, but I need the pull request number to fetch its details. Could you please share the PR number (and confirm the repo/owner if it isn't ` + "`githubnext/gh-aw`" + `)?
+[2025-08-13T00:24:50] tokens used: 13934
+[2025-08-13T00:24:55] Workflow completed successfully`
+
+	err := os.WriteFile(logFile, []byte(logContent), 0644)
+	if err != nil {
+		t.Fatalf("Failed to create test log file: %v", err)
+	}
+
+	metrics, err := parseLogFile(logFile, false)
+	if err != nil {
+		t.Fatalf("parseLogFile failed: %v", err)
+	}
+
+	// Check token usage extraction from Codex format
+	expectedTokens := 13934
+	if metrics.TokenUsage != expectedTokens {
+		t.Errorf("Expected token usage %d, got %d", expectedTokens, metrics.TokenUsage)
+	}
+
+	// Check duration (10 seconds between start and end)
+	expectedDuration := 10 * time.Second
+	if metrics.Duration != expectedDuration {
+		t.Errorf("Expected duration %v, got %v", expectedDuration, metrics.Duration)
+	}
+}
+
+func TestExtractTokenUsageCodexPatterns(t *testing.T) {
+	tests := []struct {
+		name     string
+		line     string
+		expected int
+	}{
+		{
+			name:     "Codex basic format",
+			line:     "tokens used: 13934",
+			expected: 13934,
+		},
+		{
+			name:     "Codex format with timestamp",
+			line:     "[2025-08-13T00:24:50] tokens used: 13934",
+			expected: 13934,
+		},
+		{
+			name:     "Codex format with different timestamp",
+			line:     "[2024-12-01T15:30:45] tokens used: 5678",
+			expected: 5678,
+		},
+		{
+			name:     "Codex format mixed with other text",
+			line:     "Processing completed. tokens used: 999 - Summary generated",
+			expected: 999,
+		},
+		{
+			name:     "Standard format still works",
+			line:     "tokens: 1234",
+			expected: 1234,
+		},
+		{
+			name:     "Total tokens used format",
+			line:     "total tokens used: 4567",
+			expected: 4567,
+		},
+		{
+			name:     "No token info",
+			line:     "[2025-08-13T00:24:50] codex processing",
+			expected: 0,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := extractTokenUsage(tt.line)
+			if result != tt.expected {
+				t.Errorf("extractTokenUsage(%q) = %d, expected %d", tt.line, result, tt.expected)
+			}
+		})
+	}
+}