diff --git a/pkg/parser/ansi_strip.go b/pkg/parser/ansi_strip.go new file mode 100644 index 0000000000..0de3f2e0f6 --- /dev/null +++ b/pkg/parser/ansi_strip.go @@ -0,0 +1,108 @@ +package parser + +import ( + "strings" +) + +// StripANSI removes ANSI escape codes from a string +func StripANSI(s string) string { + if s == "" { + return s + } + + var result strings.Builder + result.Grow(len(s)) // Pre-allocate capacity for efficiency + + i := 0 + for i < len(s) { + if s[i] == '\x1b' { + if i+1 >= len(s) { + // ESC at end of string, skip it + i++ + continue + } + // Found ESC character, determine sequence type + switch s[i+1] { + case '[': + // CSI sequence: \x1b[...final_char + // Parameters are in range 0x30-0x3F (0-?), intermediate chars 0x20-0x2F (space-/) + // Final characters are in range 0x40-0x7E (@-~) + i += 2 // Skip ESC and [ + for i < len(s) { + if isFinalCSIChar(s[i]) { + i++ // Skip the final character + break + } else if isCSIParameterChar(s[i]) { + i++ // Skip parameter/intermediate character + } else { + // Invalid character in CSI sequence, stop processing this escape + break + } + } + case ']': + // OSC sequence: \x1b]...terminator + // Terminators: \x07 (BEL) or \x1b\\ (ST) + i += 2 // Skip ESC and ] + for i < len(s) { + if s[i] == '\x07' { + i++ // Skip BEL + break + } else if s[i] == '\x1b' && i+1 < len(s) && s[i+1] == '\\' { + i += 2 // Skip ESC and \ + break + } + i++ + } + case '(': + // G0 character set selection: \x1b(char + i += 2 // Skip ESC and ( + if i < len(s) { + i++ // Skip the character + } + case ')': + // G1 character set selection: \x1b)char + i += 2 // Skip ESC and ) + if i < len(s) { + i++ // Skip the character + } + case '=': + // Application keypad mode: \x1b= + i += 2 + case '>': + // Normal keypad mode: \x1b> + i += 2 + case 'c': + // Reset: \x1bc + i += 2 + default: + // Other escape sequences (2-character) + // Handle common ones like \x1b7, \x1b8, \x1bD, \x1bE, \x1bH, \x1bM + if i+1 < len(s) && (s[i+1] >= '0' && s[i+1] <= '~') { + i += 2 + } else { + // Invalid or incomplete escape sequence, just skip ESC + i++ + } + } + } else { + // Regular character, keep it + result.WriteByte(s[i]) + i++ + } + } + + return result.String() +} + +// isFinalCSIChar checks if a character is a valid CSI final character +// Final characters are in range 0x40-0x7E (@-~) +func isFinalCSIChar(b byte) bool { + return b >= 0x40 && b <= 0x7E +} + +// isCSIParameterChar checks if a character is a valid CSI parameter or intermediate character +// Parameter characters are in range 0x30-0x3F (0-?) +// Intermediate characters are in range 0x20-0x2F (space-/) +func isCSIParameterChar(b byte) bool { + return (b >= 0x20 && b <= 0x2F) || (b >= 0x30 && b <= 0x3F) +} diff --git a/pkg/parser/frontmatter.go b/pkg/parser/frontmatter.go index 7ab07a9d25..8640a0a997 100644 --- a/pkg/parser/frontmatter.go +++ b/pkg/parser/frontmatter.go @@ -6,12 +6,10 @@ import ( "encoding/json" "fmt" "os" - "os/exec" "path/filepath" "regexp" "strings" - "github.com/cli/go-gh/v2" "github.com/githubnext/gh-aw/pkg/console" "github.com/githubnext/gh-aw/pkg/logger" "github.com/goccy/go-yaml" @@ -80,15 +78,6 @@ func isMCPType(typeStr string) bool { } } -// FrontmatterResult holds parsed frontmatter and markdown content -type FrontmatterResult struct { - Frontmatter map[string]any - Markdown string - // Additional fields for error context - FrontmatterLines []string // Original frontmatter lines for error context - FrontmatterStart int // Line number where frontmatter starts (1-based) -} - // ImportsResult holds the result of processing imports from frontmatter type ImportsResult struct { MergedTools string // Merged tools configuration from all imports @@ -106,268 +95,6 @@ type ImportsResult struct { AgentFile string // Path to custom agent file (if imported) } -// ExtractFrontmatterFromContent parses YAML frontmatter from markdown content string -func ExtractFrontmatterFromContent(content string) (*FrontmatterResult, error) { - lines := strings.Split(content, "\n") - - // Check if file starts with frontmatter delimiter - if len(lines) == 0 || strings.TrimSpace(lines[0]) != "---" { - // No frontmatter, return entire content as markdown - return &FrontmatterResult{ - Frontmatter: make(map[string]any), - Markdown: content, - FrontmatterLines: []string{}, - FrontmatterStart: 0, - }, nil - } - - // Find end of frontmatter - endIndex := -1 - for i := 1; i < len(lines); i++ { - if strings.TrimSpace(lines[i]) == "---" { - endIndex = i - break - } - } - - if endIndex == -1 { - return nil, fmt.Errorf("frontmatter not properly closed") - } - - // Extract frontmatter YAML - frontmatterLines := lines[1:endIndex] - frontmatterYAML := strings.Join(frontmatterLines, "\n") - - // Parse YAML - var frontmatter map[string]any - if err := yaml.Unmarshal([]byte(frontmatterYAML), &frontmatter); err != nil { - return nil, fmt.Errorf("failed to parse frontmatter: %w", err) - } - - // Ensure frontmatter is never nil (yaml.Unmarshal sets it to nil for empty YAML) - if frontmatter == nil { - frontmatter = make(map[string]any) - } - - // Extract markdown content (everything after the closing ---) - var markdownLines []string - if endIndex+1 < len(lines) { - markdownLines = lines[endIndex+1:] - } - markdown := strings.Join(markdownLines, "\n") - - return &FrontmatterResult{ - Frontmatter: frontmatter, - Markdown: strings.TrimSpace(markdown), - FrontmatterLines: frontmatterLines, - FrontmatterStart: 2, // Line 2 is where frontmatter content starts (after opening ---) - }, nil -} - -// ExtractMarkdownSection extracts a specific section from markdown content -// Supports H1-H3 headers and proper nesting (matches bash implementation) -func ExtractMarkdownSection(content, sectionName string) (string, error) { - scanner := bufio.NewScanner(strings.NewReader(content)) - var sectionContent bytes.Buffer - inSection := false - var sectionLevel int - - // Create regex pattern to match headers at any level (H1-H3) with flexible spacing - headerPattern := regexp.MustCompile(`^(#{1,3})[\s\t]+` + regexp.QuoteMeta(sectionName) + `[\s\t]*$`) - levelPattern := regexp.MustCompile(`^(#{1,3})[\s\t]+`) - - for scanner.Scan() { - line := scanner.Text() - - // Check if this line matches our target section - if matches := headerPattern.FindStringSubmatch(line); matches != nil { - inSection = true - sectionLevel = len(matches[1]) // Number of # characters - sectionContent.WriteString(line + "\n") - continue - } - - // If we're in the section, check if we've hit another header at same or higher level - if inSection { - if levelMatches := levelPattern.FindStringSubmatch(line); levelMatches != nil { - currentLevel := len(levelMatches[1]) - // Stop if we encounter same or higher level header - if currentLevel <= sectionLevel { - break - } - } - sectionContent.WriteString(line + "\n") - } - } - - if !inSection { - return "", fmt.Errorf("section '%s' not found", sectionName) - } - - return strings.TrimSpace(sectionContent.String()), nil -} - -// ExtractFrontmatterString extracts only the YAML frontmatter as a string -// This matches the bash extract_frontmatter function -func ExtractFrontmatterString(content string) (string, error) { - result, err := ExtractFrontmatterFromContent(content) - if err != nil { - return "", err - } - - // Convert frontmatter map back to YAML string - if len(result.Frontmatter) == 0 { - return "", nil - } - - yamlBytes, err := yaml.Marshal(result.Frontmatter) - if err != nil { - return "", fmt.Errorf("failed to marshal frontmatter: %w", err) - } - - // Post-process YAML to ensure cron expressions are quoted - // The YAML library may drop quotes from cron expressions like "0 14 * * 1-5" - // which causes validation errors since they start with numbers but contain spaces - yamlString := string(yamlBytes) - yamlString = QuoteCronExpressions(yamlString) - - return strings.TrimSpace(yamlString), nil -} - -// ExtractMarkdownContent extracts only the markdown content (excluding frontmatter) -// This matches the bash extract_markdown function -func ExtractMarkdownContent(content string) (string, error) { - result, err := ExtractFrontmatterFromContent(content) - if err != nil { - return "", err - } - - return result.Markdown, nil -} - -// ExtractYamlChunk extracts a specific YAML section with proper indentation handling -// This matches the bash extract_yaml_chunk function exactly -func ExtractYamlChunk(yamlContent, key string) (string, error) { - if yamlContent == "" || key == "" { - return "", nil - } - - scanner := bufio.NewScanner(strings.NewReader(yamlContent)) - var result bytes.Buffer - inSection := false - var keyLevel int - // Match both quoted and unquoted keys - keyPattern := regexp.MustCompile(`^(\s*)(?:"` + regexp.QuoteMeta(key) + `"|` + regexp.QuoteMeta(key) + `):\s*(.*)$`) - - for scanner.Scan() { - line := scanner.Text() - - // Skip empty lines when not in section - if !inSection && strings.TrimSpace(line) == "" { - continue - } - - // Check if this line starts our target key - if matches := keyPattern.FindStringSubmatch(line); matches != nil { - inSection = true - keyLevel = len(matches[1]) // Indentation level - result.WriteString(line + "\n") - - // If it's a single-line value, we're done - if strings.TrimSpace(matches[2]) != "" { - break - } - continue - } - - // If we're in the section, check indentation - if inSection { - // Skip empty lines - if strings.TrimSpace(line) == "" { - continue - } - - // Count leading spaces - spaces := 0 - for _, char := range line { - if char == ' ' { - spaces++ - } else { - break - } - } - - // If indentation is less than or equal to key level, we've left the section - if spaces <= keyLevel { - break - } - - result.WriteString(line + "\n") - } - } - - if !inSection { - return "", nil - } - - return strings.TrimRight(result.String(), "\n"), nil -} - -// ExtractWorkflowNameFromMarkdown extracts workflow name from first H1 header -// This matches the bash extract_workflow_name_from_markdown function exactly -func ExtractWorkflowNameFromMarkdown(filePath string) (string, error) { - // First extract markdown content (excluding frontmatter) - markdownContent, err := ExtractMarkdown(filePath) - if err != nil { - return "", err - } - - // Look for first H1 header (line starting with "# ") - scanner := bufio.NewScanner(strings.NewReader(markdownContent)) - for scanner.Scan() { - line := strings.TrimSpace(scanner.Text()) - if strings.HasPrefix(line, "# ") { - // Extract text after "# " - return strings.TrimSpace(line[2:]), nil - } - } - - // No H1 header found, generate default name from filename - return generateDefaultWorkflowName(filePath), nil -} - -// generateDefaultWorkflowName creates a default workflow name from filename -// This matches the bash implementation's fallback behavior -func generateDefaultWorkflowName(filePath string) string { - // Get base filename without extension - baseName := filepath.Base(filePath) - baseName = strings.TrimSuffix(baseName, filepath.Ext(baseName)) - - // Convert hyphens to spaces - baseName = strings.ReplaceAll(baseName, "-", " ") - - // Capitalize first letter of each word - words := strings.Fields(baseName) - for i, word := range words { - if len(word) > 0 { - words[i] = strings.ToUpper(word[:1]) + word[1:] - } - } - - return strings.Join(words, " ") -} - -// ExtractMarkdown extracts markdown content from a file (excluding frontmatter) -// This matches the bash extract_markdown function -func ExtractMarkdown(filePath string) (string, error) { - content, err := os.ReadFile(filePath) - if err != nil { - return "", fmt.Errorf("failed to read file %s: %w", filePath, err) - } - - return ExtractMarkdownContent(string(content)) -} - // ProcessImportsFromFrontmatter processes imports field from frontmatter // Returns merged tools and engines from imported files func ProcessImportsFromFrontmatter(frontmatter map[string]any, baseDir string) (mergedTools string, mergedEngines []string, err error) { @@ -772,250 +499,6 @@ func processIncludesWithVisited(content, baseDir string, extractTools bool, visi return result.String(), nil } -// isUnderWorkflowsDirectory checks if a file path is a top-level workflow file (not in shared subdirectory) -func isUnderWorkflowsDirectory(filePath string) bool { - // Normalize the path to use forward slashes - normalizedPath := filepath.ToSlash(filePath) - - // Check if the path contains .github/workflows/ - if !strings.Contains(normalizedPath, ".github/workflows/") { - return false - } - - // Extract the part after .github/workflows/ - parts := strings.Split(normalizedPath, ".github/workflows/") - if len(parts) < 2 { - return false - } - - afterWorkflows := parts[1] - - // Check if there are any slashes after .github/workflows/ (indicating subdirectory) - // If there are, it's in a subdirectory like "shared/" and should not be treated as a workflow file - return !strings.Contains(afterWorkflows, "/") -} - -// resolveIncludePath resolves include path based on workflowspec format or relative path -func resolveIncludePath(filePath, baseDir string, cache *ImportCache) (string, error) { - // Check if this is a workflowspec (contains owner/repo/path format) - // Format: owner/repo/path@ref or owner/repo/path@ref#section - if isWorkflowSpec(filePath) { - // Download from GitHub using workflowspec (with cache support) - return downloadIncludeFromWorkflowSpec(filePath, cache) - } - - // Regular path, resolve relative to base directory - fullPath := filepath.Join(baseDir, filePath) - if _, err := os.Stat(fullPath); os.IsNotExist(err) { - return "", fmt.Errorf("file not found: %s", fullPath) - } - return fullPath, nil -} - -// isWorkflowSpec checks if a path looks like a workflowspec (owner/repo/path[@ref]) -func isWorkflowSpec(path string) bool { - // Remove section reference if present - cleanPath := path - if idx := strings.Index(path, "#"); idx != -1 { - cleanPath = path[:idx] - } - - // Remove ref if present - if idx := strings.Index(cleanPath, "@"); idx != -1 { - cleanPath = cleanPath[:idx] - } - - // Check if it has at least 3 parts (owner/repo/path) - parts := strings.Split(cleanPath, "/") - if len(parts) < 3 { - return false - } - - // Reject paths that start with "." (local paths like .github/workflows/...) - if strings.HasPrefix(cleanPath, ".") { - return false - } - - // Reject paths that start with "shared/" (local shared files) - if strings.HasPrefix(cleanPath, "shared/") { - return false - } - - // Reject absolute paths - if strings.HasPrefix(cleanPath, "/") { - return false - } - - return true -} - -// downloadIncludeFromWorkflowSpec downloads an include file from GitHub using workflowspec -// It first checks the cache, and only downloads if not cached -func downloadIncludeFromWorkflowSpec(spec string, cache *ImportCache) (string, error) { - // Parse the workflowspec - // Format: owner/repo/path@ref or owner/repo/path@ref#section - - // Remove section reference if present - cleanSpec := spec - if idx := strings.Index(spec, "#"); idx != -1 { - cleanSpec = spec[:idx] - } - - // Split on @ to get path and ref - parts := strings.SplitN(cleanSpec, "@", 2) - pathPart := parts[0] - var ref string - if len(parts) == 2 { - ref = parts[1] - } else { - ref = "main" // default to main branch - } - - // Parse path: owner/repo/path/to/file.md - slashParts := strings.Split(pathPart, "/") - if len(slashParts) < 3 { - return "", fmt.Errorf("invalid workflowspec: must be owner/repo/path[@ref]") - } - - owner := slashParts[0] - repo := slashParts[1] - filePath := strings.Join(slashParts[2:], "/") - - // Resolve ref to SHA for cache lookup - var sha string - if cache != nil { - // Only resolve SHA if we're using the cache - resolvedSHA, err := resolveRefToSHA(owner, repo, ref) - if err != nil { - // If the error is an authentication error, propagate it immediately - lowerErr := strings.ToLower(err.Error()) - if strings.Contains(lowerErr, "auth") || strings.Contains(lowerErr, "unauthoriz") || strings.Contains(lowerErr, "forbidden") || strings.Contains(lowerErr, "token") || strings.Contains(lowerErr, "permission denied") { - return "", fmt.Errorf("failed to resolve ref to SHA due to authentication error: %w", err) - } - log.Printf("Failed to resolve ref to SHA, will skip cache: %v", err) - // Continue without caching if SHA resolution fails - } else { - sha = resolvedSHA - // Check cache using SHA - if cachedPath, found := cache.Get(owner, repo, filePath, sha); found { - log.Printf("Using cached import: %s/%s/%s@%s (SHA: %s)", owner, repo, filePath, ref, sha) - return cachedPath, nil - } - } - } - - // Download the file content from GitHub - content, err := downloadFileFromGitHub(owner, repo, filePath, ref) - if err != nil { - return "", fmt.Errorf("failed to download include from %s: %w", spec, err) - } - - // If cache is available and we have a SHA, store in cache - if cache != nil && sha != "" { - cachedPath, err := cache.Set(owner, repo, filePath, sha, content) - if err != nil { - log.Printf("Failed to cache import: %v", err) - // Don't fail the compilation, fall back to temp file - } else { - return cachedPath, nil - } - } - - // Fallback: Create a temporary file to store the downloaded content - tempFile, err := os.CreateTemp("", "gh-aw-include-*.md") - if err != nil { - return "", fmt.Errorf("failed to create temp file: %w", err) - } - - if _, err := tempFile.Write(content); err != nil { - tempFile.Close() - os.Remove(tempFile.Name()) - return "", fmt.Errorf("failed to write temp file: %w", err) - } - - if err := tempFile.Close(); err != nil { - os.Remove(tempFile.Name()) - return "", fmt.Errorf("failed to close temp file: %w", err) - } - - return tempFile.Name(), nil -} - -// resolveRefToSHA resolves a git ref (branch, tag, or SHA) to its commit SHA -func resolveRefToSHA(owner, repo, ref string) (string, error) { - // If ref is already a full SHA (40 hex characters), return it as-is - if len(ref) == 40 && isHexString(ref) { - return ref, nil - } - - // Use gh CLI to get the commit SHA for the ref - // This works for branches, tags, and short SHAs - cmd := exec.Command("gh", "api", fmt.Sprintf("/repos/%s/%s/commits/%s", owner, repo, ref), "--jq", ".sha") - - output, err := cmd.CombinedOutput() - if err != nil { - outputStr := string(output) - if strings.Contains(outputStr, "GH_TOKEN") || strings.Contains(outputStr, "authentication") || strings.Contains(outputStr, "not logged into") { - return "", fmt.Errorf("failed to resolve ref to SHA: GitHub authentication required. Please run 'gh auth login' or set GH_TOKEN/GITHUB_TOKEN environment variable: %w", err) - } - return "", fmt.Errorf("failed to resolve ref %s to SHA for %s/%s: %s: %w", ref, owner, repo, strings.TrimSpace(outputStr), err) - } - - sha := strings.TrimSpace(string(output)) - if sha == "" { - return "", fmt.Errorf("empty SHA returned for ref %s in %s/%s", ref, owner, repo) - } - - // Validate it's a valid SHA (40 hex characters) - if len(sha) != 40 || !isHexString(sha) { - return "", fmt.Errorf("invalid SHA format returned: %s", sha) - } - - return sha, nil -} - -// isHexString checks if a string contains only hexadecimal characters -func isHexString(s string) bool { - if len(s) == 0 { - return false - } - for _, c := range s { - if !((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) { - return false - } - } - return true -} - -func downloadFileFromGitHub(owner, repo, path, ref string) ([]byte, error) { - // Use go-gh/v2 to download the file - stdout, stderr, err := gh.Exec("api", fmt.Sprintf("/repos/%s/%s/contents/%s?ref=%s", owner, repo, path, ref), "--jq", ".content") - - if err != nil { - // Check if this is an authentication error - stderrStr := stderr.String() - if strings.Contains(stderrStr, "GH_TOKEN") || strings.Contains(stderrStr, "authentication") || strings.Contains(stderrStr, "not logged into") { - return nil, fmt.Errorf("failed to fetch file content: GitHub authentication required. Please run 'gh auth login' or set GH_TOKEN/GITHUB_TOKEN environment variable: %w", err) - } - return nil, fmt.Errorf("failed to fetch file content from %s/%s/%s@%s: %s: %w", owner, repo, path, ref, strings.TrimSpace(stderrStr), err) - } - - // The content is base64 encoded, decode it - contentBase64 := strings.TrimSpace(stdout.String()) - if contentBase64 == "" { - return nil, fmt.Errorf("empty content returned from GitHub API for %s/%s/%s@%s", owner, repo, path, ref) - } - - decodeCmd := exec.Command("base64", "-d") - decodeCmd.Stdin = strings.NewReader(contentBase64) - content, err := decodeCmd.Output() - if err != nil { - return nil, fmt.Errorf("failed to decode base64 content: %w", err) - } - - return content, nil -} - // processIncludedFile processes a single included file, optionally extracting a section // processIncludedFileWithVisited processes a single included file with cycle detection for nested includes func processIncludedFileWithVisited(filePath, sectionName string, extractTools bool, visited map[string]bool) (string, error) { @@ -1680,228 +1163,3 @@ func areEqual(a, b any) bool { return string(aJSON) == string(bJSON) } - -// StripANSI removes all ANSI escape sequences from a string -// This handles: -// - CSI (Control Sequence Introducer) sequences: \x1b[... -// - OSC (Operating System Command) sequences: \x1b]...\x07 or \x1b]...\x1b\\ -// - Simple escape sequences: \x1b followed by a single character -func StripANSI(s string) string { - if s == "" { - return s - } - - var result strings.Builder - result.Grow(len(s)) // Pre-allocate capacity for efficiency - - i := 0 - for i < len(s) { - if s[i] == '\x1b' { - if i+1 >= len(s) { - // ESC at end of string, skip it - i++ - continue - } - // Found ESC character, determine sequence type - switch s[i+1] { - case '[': - // CSI sequence: \x1b[...final_char - // Parameters are in range 0x30-0x3F (0-?), intermediate chars 0x20-0x2F (space-/) - // Final characters are in range 0x40-0x7E (@-~) - i += 2 // Skip ESC and [ - for i < len(s) { - if isFinalCSIChar(s[i]) { - i++ // Skip the final character - break - } else if isCSIParameterChar(s[i]) { - i++ // Skip parameter/intermediate character - } else { - // Invalid character in CSI sequence, stop processing this escape - break - } - } - case ']': - // OSC sequence: \x1b]...terminator - // Terminators: \x07 (BEL) or \x1b\\ (ST) - i += 2 // Skip ESC and ] - for i < len(s) { - if s[i] == '\x07' { - i++ // Skip BEL - break - } else if s[i] == '\x1b' && i+1 < len(s) && s[i+1] == '\\' { - i += 2 // Skip ESC and \ - break - } - i++ - } - case '(': - // G0 character set selection: \x1b(char - i += 2 // Skip ESC and ( - if i < len(s) { - i++ // Skip the character - } - case ')': - // G1 character set selection: \x1b)char - i += 2 // Skip ESC and ) - if i < len(s) { - i++ // Skip the character - } - case '=': - // Application keypad mode: \x1b= - i += 2 - case '>': - // Normal keypad mode: \x1b> - i += 2 - case 'c': - // Reset: \x1bc - i += 2 - default: - // Other escape sequences (2-character) - // Handle common ones like \x1b7, \x1b8, \x1bD, \x1bE, \x1bH, \x1bM - if i+1 < len(s) && (s[i+1] >= '0' && s[i+1] <= '~') { - i += 2 - } else { - // Invalid or incomplete escape sequence, just skip ESC - i++ - } - } - } else { - // Regular character, keep it - result.WriteByte(s[i]) - i++ - } - } - - return result.String() -} - -// isFinalCSIChar checks if a character is a valid CSI final character -// Final characters are in range 0x40-0x7E (@-~) -func isFinalCSIChar(b byte) bool { - return b >= 0x40 && b <= 0x7E -} - -// isCSIParameterChar checks if a character is a valid CSI parameter or intermediate character -// Parameter characters are in range 0x30-0x3F (0-?) -// Intermediate characters are in range 0x20-0x2F (space-/) -func isCSIParameterChar(b byte) bool { - return (b >= 0x20 && b <= 0x2F) || (b >= 0x30 && b <= 0x3F) -} - -// UpdateWorkflowFrontmatter updates the frontmatter of a workflow file using a callback function -func UpdateWorkflowFrontmatter(workflowPath string, updateFunc func(frontmatter map[string]any) error, verbose bool) error { - // Read the workflow file - content, err := os.ReadFile(workflowPath) - if err != nil { - return fmt.Errorf("failed to read workflow file: %w", err) - } - - // Parse frontmatter using existing helper - result, err := ExtractFrontmatterFromContent(string(content)) - if err != nil { - return fmt.Errorf("failed to parse frontmatter: %w", err) - } - - // Ensure frontmatter map exists - if result.Frontmatter == nil { - result.Frontmatter = make(map[string]any) - } - - // Apply the update function - if err := updateFunc(result.Frontmatter); err != nil { - return err - } - - // Convert back to YAML - updatedFrontmatter, err := yaml.Marshal(result.Frontmatter) - if err != nil { - return fmt.Errorf("failed to marshal updated frontmatter: %w", err) - } - - // Reconstruct the file content - updatedContent, err := reconstructWorkflowFile(string(updatedFrontmatter), result.Markdown) - if err != nil { - return fmt.Errorf("failed to reconstruct workflow file: %w", err) - } - - // Write the updated content back to the file - if err := os.WriteFile(workflowPath, []byte(updatedContent), 0644); err != nil { - return fmt.Errorf("failed to write updated workflow file: %w", err) - } - - if verbose { - fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("Updated workflow file: %s", console.ToRelativePath(workflowPath)))) - } - - return nil -} - -// EnsureToolsSection ensures the tools section exists in frontmatter and returns it -func EnsureToolsSection(frontmatter map[string]any) map[string]any { - if frontmatter["tools"] == nil { - frontmatter["tools"] = make(map[string]any) - } - - tools, ok := frontmatter["tools"].(map[string]any) - if !ok { - // If tools exists but is not a map, replace it - tools = make(map[string]any) - frontmatter["tools"] = tools - } - - return tools -} - -// reconstructWorkflowFile reconstructs a complete workflow file from frontmatter YAML and markdown content -func reconstructWorkflowFile(frontmatterYAML, markdownContent string) (string, error) { - var lines []string - - // Add opening frontmatter delimiter - lines = append(lines, "---") - - // Add frontmatter content (trim trailing newline from YAML marshal) - frontmatterStr := strings.TrimSuffix(frontmatterYAML, "\n") - if frontmatterStr != "" { - lines = append(lines, strings.Split(frontmatterStr, "\n")...) - } - - // Add closing frontmatter delimiter - lines = append(lines, "---") - - // Add markdown content if present - if markdownContent != "" { - lines = append(lines, markdownContent) - } - - return strings.Join(lines, "\n"), nil -} - -// QuoteCronExpressions ensures cron expressions in schedule sections are properly quoted. -// The YAML library may drop quotes from cron expressions like "0 14 * * 1-5" which -// causes validation errors since they start with numbers but contain spaces and special chars. -func QuoteCronExpressions(yamlContent string) string { - // Pattern to match unquoted cron expressions after "cron:" - // Matches: cron: 0 14 * * 1-5 - // Captures the cron value to be quoted - cronPattern := regexp.MustCompile(`(?m)^(\s*-?\s*cron:\s*)([0-9][^\n"']*)$`) - - // Replace unquoted cron expressions with quoted versions - return cronPattern.ReplaceAllStringFunc(yamlContent, func(match string) string { - // Extract the cron prefix and value - submatches := cronPattern.FindStringSubmatch(match) - if len(submatches) < 3 { - return match - } - prefix := submatches[1] - cronValue := strings.TrimSpace(submatches[2]) - - // Remove any trailing comments - if idx := strings.Index(cronValue, "#"); idx != -1 { - comment := cronValue[idx:] - cronValue = strings.TrimSpace(cronValue[:idx]) - return prefix + `"` + cronValue + `" ` + comment - } - - return prefix + `"` + cronValue + `"` - }) -} diff --git a/pkg/parser/frontmatter_content.go b/pkg/parser/frontmatter_content.go new file mode 100644 index 0000000000..baa48b3d97 --- /dev/null +++ b/pkg/parser/frontmatter_content.go @@ -0,0 +1,284 @@ +package parser + +import ( + "bufio" + "bytes" + "fmt" + "os" + "path/filepath" + "regexp" + "strings" + + "github.com/goccy/go-yaml" +) + +// FrontmatterResult holds parsed frontmatter and markdown content +type FrontmatterResult struct { + Frontmatter map[string]any + Markdown string + // Additional fields for error context + FrontmatterLines []string // Original frontmatter lines for error context + FrontmatterStart int // Line number where frontmatter starts (1-based) +} + +// ExtractFrontmatterFromContent parses YAML frontmatter from markdown content string +func ExtractFrontmatterFromContent(content string) (*FrontmatterResult, error) { + lines := strings.Split(content, "\n") + + // Check if file starts with frontmatter delimiter + if len(lines) == 0 || strings.TrimSpace(lines[0]) != "---" { + // No frontmatter, return entire content as markdown + return &FrontmatterResult{ + Frontmatter: make(map[string]any), + Markdown: content, + FrontmatterLines: []string{}, + FrontmatterStart: 0, + }, nil + } + + // Find end of frontmatter + endIndex := -1 + for i := 1; i < len(lines); i++ { + if strings.TrimSpace(lines[i]) == "---" { + endIndex = i + break + } + } + + if endIndex == -1 { + return nil, fmt.Errorf("frontmatter not properly closed") + } + + // Extract frontmatter YAML + frontmatterLines := lines[1:endIndex] + frontmatterYAML := strings.Join(frontmatterLines, "\n") + + // Parse YAML + var frontmatter map[string]any + if err := yaml.Unmarshal([]byte(frontmatterYAML), &frontmatter); err != nil { + return nil, fmt.Errorf("failed to parse frontmatter: %w", err) + } + + // Ensure frontmatter is never nil (yaml.Unmarshal sets it to nil for empty YAML) + if frontmatter == nil { + frontmatter = make(map[string]any) + } + + // Extract markdown content (everything after the closing ---) + var markdownLines []string + if endIndex+1 < len(lines) { + markdownLines = lines[endIndex+1:] + } + markdown := strings.Join(markdownLines, "\n") + + return &FrontmatterResult{ + Frontmatter: frontmatter, + Markdown: strings.TrimSpace(markdown), + FrontmatterLines: frontmatterLines, + FrontmatterStart: 2, // Line 2 is where frontmatter content starts (after opening ---) + }, nil +} + +// ExtractMarkdownSection extracts a specific section from markdown content +// Supports H1-H3 headers and proper nesting (matches bash implementation) +func ExtractMarkdownSection(content, sectionName string) (string, error) { + scanner := bufio.NewScanner(strings.NewReader(content)) + var sectionContent bytes.Buffer + inSection := false + var sectionLevel int + + // Create regex pattern to match headers at any level (H1-H3) with flexible spacing + headerPattern := regexp.MustCompile(`^(#{1,3})[\s\t]+` + regexp.QuoteMeta(sectionName) + `[\s\t]*$`) + levelPattern := regexp.MustCompile(`^(#{1,3})[\s\t]+`) + + for scanner.Scan() { + line := scanner.Text() + + // Check if this line matches our target section + if matches := headerPattern.FindStringSubmatch(line); matches != nil { + inSection = true + sectionLevel = len(matches[1]) // Number of # characters + sectionContent.WriteString(line + "\n") + continue + } + + // If we're in the section, check if we've hit another header at same or higher level + if inSection { + if levelMatches := levelPattern.FindStringSubmatch(line); levelMatches != nil { + currentLevel := len(levelMatches[1]) + // Stop if we encounter same or higher level header + if currentLevel <= sectionLevel { + break + } + } + sectionContent.WriteString(line + "\n") + } + } + + if !inSection { + return "", fmt.Errorf("section '%s' not found", sectionName) + } + + return strings.TrimSpace(sectionContent.String()), nil +} + +// ExtractFrontmatterString extracts only the YAML frontmatter as a string +// This matches the bash extract_frontmatter function +func ExtractFrontmatterString(content string) (string, error) { + result, err := ExtractFrontmatterFromContent(content) + if err != nil { + return "", err + } + + // Convert frontmatter map back to YAML string + if len(result.Frontmatter) == 0 { + return "", nil + } + + yamlBytes, err := yaml.Marshal(result.Frontmatter) + if err != nil { + return "", fmt.Errorf("failed to marshal frontmatter: %w", err) + } + + // Post-process YAML to ensure cron expressions are quoted + // The YAML library may drop quotes from cron expressions like "0 14 * * 1-5" + // which causes validation errors since they start with numbers but contain spaces + yamlString := string(yamlBytes) + yamlString = QuoteCronExpressions(yamlString) + + return strings.TrimSpace(yamlString), nil +} + +// ExtractMarkdownContent extracts only the markdown content (excluding frontmatter) +// This matches the bash extract_markdown function +func ExtractMarkdownContent(content string) (string, error) { + result, err := ExtractFrontmatterFromContent(content) + if err != nil { + return "", err + } + + return result.Markdown, nil +} + +// ExtractYamlChunk extracts a specific YAML section with proper indentation handling +// This matches the bash extract_yaml_chunk function exactly +func ExtractYamlChunk(yamlContent, key string) (string, error) { + if yamlContent == "" || key == "" { + return "", nil + } + + scanner := bufio.NewScanner(strings.NewReader(yamlContent)) + var result bytes.Buffer + inSection := false + var keyLevel int + // Match both quoted and unquoted keys + keyPattern := regexp.MustCompile(`^(\s*)(?:"` + regexp.QuoteMeta(key) + `"|` + regexp.QuoteMeta(key) + `):\s*(.*)$`) + + for scanner.Scan() { + line := scanner.Text() + + // Skip empty lines when not in section + if !inSection && strings.TrimSpace(line) == "" { + continue + } + + // Check if this line starts our target key + if matches := keyPattern.FindStringSubmatch(line); matches != nil { + inSection = true + keyLevel = len(matches[1]) // Indentation level + result.WriteString(line + "\n") + + // If it's a single-line value, we're done + if strings.TrimSpace(matches[2]) != "" { + break + } + continue + } + + // If we're in the section, check indentation + if inSection { + // Skip empty lines + if strings.TrimSpace(line) == "" { + continue + } + + // Count leading spaces + spaces := 0 + for _, char := range line { + if char == ' ' { + spaces++ + } else { + break + } + } + + // If indentation is less than or equal to key level, we've left the section + if spaces <= keyLevel { + break + } + + result.WriteString(line + "\n") + } + } + + if !inSection { + return "", nil + } + + return strings.TrimRight(result.String(), "\n"), nil +} + +// ExtractWorkflowNameFromMarkdown extracts workflow name from first H1 header +// This matches the bash extract_workflow_name_from_markdown function exactly +func ExtractWorkflowNameFromMarkdown(filePath string) (string, error) { + // First extract markdown content (excluding frontmatter) + markdownContent, err := ExtractMarkdown(filePath) + if err != nil { + return "", err + } + + // Look for first H1 header (line starting with "# ") + scanner := bufio.NewScanner(strings.NewReader(markdownContent)) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if strings.HasPrefix(line, "# ") { + // Extract text after "# " + return strings.TrimSpace(line[2:]), nil + } + } + + // No H1 header found, generate default name from filename + return generateDefaultWorkflowName(filePath), nil +} + +// generateDefaultWorkflowName creates a default workflow name from filename +// This matches the bash implementation's fallback behavior +func generateDefaultWorkflowName(filePath string) string { + // Get base filename without extension + baseName := filepath.Base(filePath) + baseName = strings.TrimSuffix(baseName, filepath.Ext(baseName)) + + // Convert hyphens to spaces + baseName = strings.ReplaceAll(baseName, "-", " ") + + // Capitalize first letter of each word + words := strings.Fields(baseName) + for i, word := range words { + if len(word) > 0 { + words[i] = strings.ToUpper(word[:1]) + word[1:] + } + } + + return strings.Join(words, " ") +} + +// ExtractMarkdown extracts markdown content from a file (excluding frontmatter) +// This matches the bash extract_markdown function +func ExtractMarkdown(filePath string) (string, error) { + content, err := os.ReadFile(filePath) + if err != nil { + return "", fmt.Errorf("failed to read file %s: %w", filePath, err) + } + + return ExtractMarkdownContent(string(content)) +} diff --git a/pkg/parser/remote_fetch.go b/pkg/parser/remote_fetch.go new file mode 100644 index 0000000000..97ff2616ee --- /dev/null +++ b/pkg/parser/remote_fetch.go @@ -0,0 +1,258 @@ +package parser + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + + "github.com/cli/go-gh/v2" + "github.com/githubnext/gh-aw/pkg/logger" +) + +var remoteLog = logger.New("parser:remote_fetch") + +// isUnderWorkflowsDirectory checks if a file path is a top-level workflow file (not in shared subdirectory) +func isUnderWorkflowsDirectory(filePath string) bool { + // Normalize the path to use forward slashes + normalizedPath := filepath.ToSlash(filePath) + + // Check if the path contains .github/workflows/ + if !strings.Contains(normalizedPath, ".github/workflows/") { + return false + } + + // Extract the part after .github/workflows/ + parts := strings.Split(normalizedPath, ".github/workflows/") + if len(parts) < 2 { + return false + } + + afterWorkflows := parts[1] + + // Check if there are any slashes after .github/workflows/ (indicating subdirectory) + // If there are, it's in a subdirectory like "shared/" and should not be treated as a workflow file + return !strings.Contains(afterWorkflows, "/") +} + +// resolveIncludePath resolves include path based on workflowspec format or relative path +func resolveIncludePath(filePath, baseDir string, cache *ImportCache) (string, error) { + // Check if this is a workflowspec (contains owner/repo/path format) + // Format: owner/repo/path@ref or owner/repo/path@ref#section + if isWorkflowSpec(filePath) { + // Download from GitHub using workflowspec (with cache support) + return downloadIncludeFromWorkflowSpec(filePath, cache) + } + + // Regular path, resolve relative to base directory + fullPath := filepath.Join(baseDir, filePath) + if _, err := os.Stat(fullPath); os.IsNotExist(err) { + return "", fmt.Errorf("file not found: %s", fullPath) + } + return fullPath, nil +} + +// isWorkflowSpec checks if a path looks like a workflowspec (owner/repo/path[@ref]) +func isWorkflowSpec(path string) bool { + // Remove section reference if present + cleanPath := path + if idx := strings.Index(path, "#"); idx != -1 { + cleanPath = path[:idx] + } + + // Remove ref if present + if idx := strings.Index(cleanPath, "@"); idx != -1 { + cleanPath = cleanPath[:idx] + } + + // Check if it has at least 3 parts (owner/repo/path) + parts := strings.Split(cleanPath, "/") + if len(parts) < 3 { + return false + } + + // Reject paths that start with "." (local paths like .github/workflows/...) + if strings.HasPrefix(cleanPath, ".") { + return false + } + + // Reject paths that start with "shared/" (local shared files) + if strings.HasPrefix(cleanPath, "shared/") { + return false + } + + // Reject absolute paths + if strings.HasPrefix(cleanPath, "/") { + return false + } + + return true +} + +// downloadIncludeFromWorkflowSpec downloads an include file from GitHub using workflowspec +// It first checks the cache, and only downloads if not cached +func downloadIncludeFromWorkflowSpec(spec string, cache *ImportCache) (string, error) { + // Parse the workflowspec + // Format: owner/repo/path@ref or owner/repo/path@ref#section + + // Remove section reference if present + cleanSpec := spec + if idx := strings.Index(spec, "#"); idx != -1 { + cleanSpec = spec[:idx] + } + + // Split on @ to get path and ref + parts := strings.SplitN(cleanSpec, "@", 2) + pathPart := parts[0] + var ref string + if len(parts) == 2 { + ref = parts[1] + } else { + ref = "main" // default to main branch + } + + // Parse path: owner/repo/path/to/file.md + slashParts := strings.Split(pathPart, "/") + if len(slashParts) < 3 { + return "", fmt.Errorf("invalid workflowspec: must be owner/repo/path[@ref]") + } + + owner := slashParts[0] + repo := slashParts[1] + filePath := strings.Join(slashParts[2:], "/") + + // Resolve ref to SHA for cache lookup + var sha string + if cache != nil { + // Only resolve SHA if we're using the cache + resolvedSHA, err := resolveRefToSHA(owner, repo, ref) + if err != nil { + // If the error is an authentication error, propagate it immediately + lowerErr := strings.ToLower(err.Error()) + if strings.Contains(lowerErr, "auth") || strings.Contains(lowerErr, "unauthoriz") || strings.Contains(lowerErr, "forbidden") || strings.Contains(lowerErr, "token") || strings.Contains(lowerErr, "permission denied") { + return "", fmt.Errorf("failed to resolve ref to SHA due to authentication error: %w", err) + } + remoteLog.Printf("Failed to resolve ref to SHA, will skip cache: %v", err) + // Continue without caching if SHA resolution fails + } else { + sha = resolvedSHA + // Check cache using SHA + if cachedPath, found := cache.Get(owner, repo, filePath, sha); found { + remoteLog.Printf("Using cached import: %s/%s/%s@%s (SHA: %s)", owner, repo, filePath, ref, sha) + return cachedPath, nil + } + } + } + + // Download the file content from GitHub + content, err := downloadFileFromGitHub(owner, repo, filePath, ref) + if err != nil { + return "", fmt.Errorf("failed to download include from %s: %w", spec, err) + } + + // If cache is available and we have a SHA, store in cache + if cache != nil && sha != "" { + cachedPath, err := cache.Set(owner, repo, filePath, sha, content) + if err != nil { + remoteLog.Printf("Failed to cache import: %v", err) + // Don't fail the compilation, fall back to temp file + } else { + return cachedPath, nil + } + } + + // Fallback: Create a temporary file to store the downloaded content + tempFile, err := os.CreateTemp("", "gh-aw-include-*.md") + if err != nil { + return "", fmt.Errorf("failed to create temp file: %w", err) + } + + if _, err := tempFile.Write(content); err != nil { + tempFile.Close() + os.Remove(tempFile.Name()) + return "", fmt.Errorf("failed to write temp file: %w", err) + } + + if err := tempFile.Close(); err != nil { + os.Remove(tempFile.Name()) + return "", fmt.Errorf("failed to close temp file: %w", err) + } + + return tempFile.Name(), nil +} + +// resolveRefToSHA resolves a git ref (branch, tag, or SHA) to its commit SHA +func resolveRefToSHA(owner, repo, ref string) (string, error) { + // If ref is already a full SHA (40 hex characters), return it as-is + if len(ref) == 40 && isHexString(ref) { + return ref, nil + } + + // Use gh CLI to get the commit SHA for the ref + // This works for branches, tags, and short SHAs + cmd := exec.Command("gh", "api", fmt.Sprintf("/repos/%s/%s/commits/%s", owner, repo, ref), "--jq", ".sha") + + output, err := cmd.CombinedOutput() + if err != nil { + outputStr := string(output) + if strings.Contains(outputStr, "GH_TOKEN") || strings.Contains(outputStr, "authentication") || strings.Contains(outputStr, "not logged into") { + return "", fmt.Errorf("failed to resolve ref to SHA: GitHub authentication required. Please run 'gh auth login' or set GH_TOKEN/GITHUB_TOKEN environment variable: %w", err) + } + return "", fmt.Errorf("failed to resolve ref %s to SHA for %s/%s: %s: %w", ref, owner, repo, strings.TrimSpace(outputStr), err) + } + + sha := strings.TrimSpace(string(output)) + if sha == "" { + return "", fmt.Errorf("empty SHA returned for ref %s in %s/%s", ref, owner, repo) + } + + // Validate it's a valid SHA (40 hex characters) + if len(sha) != 40 || !isHexString(sha) { + return "", fmt.Errorf("invalid SHA format returned: %s", sha) + } + + return sha, nil +} + +// isHexString checks if a string contains only hexadecimal characters +func isHexString(s string) bool { + if len(s) == 0 { + return false + } + for _, c := range s { + if !((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) { + return false + } + } + return true +} + +func downloadFileFromGitHub(owner, repo, path, ref string) ([]byte, error) { + // Use go-gh/v2 to download the file + stdout, stderr, err := gh.Exec("api", fmt.Sprintf("/repos/%s/%s/contents/%s?ref=%s", owner, repo, path, ref), "--jq", ".content") + + if err != nil { + // Check if this is an authentication error + stderrStr := stderr.String() + if strings.Contains(stderrStr, "GH_TOKEN") || strings.Contains(stderrStr, "authentication") || strings.Contains(stderrStr, "not logged into") { + return nil, fmt.Errorf("failed to fetch file content: GitHub authentication required. Please run 'gh auth login' or set GH_TOKEN/GITHUB_TOKEN environment variable: %w", err) + } + return nil, fmt.Errorf("failed to fetch file content from %s/%s/%s@%s: %s: %w", owner, repo, path, ref, strings.TrimSpace(stderrStr), err) + } + + // The content is base64 encoded, decode it + contentBase64 := strings.TrimSpace(stdout.String()) + if contentBase64 == "" { + return nil, fmt.Errorf("empty content returned from GitHub API for %s/%s/%s@%s", owner, repo, path, ref) + } + + decodeCmd := exec.Command("base64", "-d") + decodeCmd.Stdin = strings.NewReader(contentBase64) + content, err := decodeCmd.Output() + if err != nil { + return nil, fmt.Errorf("failed to decode base64 content: %w", err) + } + + return content, nil +} diff --git a/pkg/parser/workflow_update.go b/pkg/parser/workflow_update.go new file mode 100644 index 0000000000..dca262e5e9 --- /dev/null +++ b/pkg/parser/workflow_update.go @@ -0,0 +1,129 @@ +package parser + +import ( + "fmt" + "os" + "regexp" + "strings" + + "github.com/githubnext/gh-aw/pkg/console" + "github.com/goccy/go-yaml" +) + +// UpdateWorkflowFrontmatter updates the frontmatter of a workflow file using a callback function +func UpdateWorkflowFrontmatter(workflowPath string, updateFunc func(frontmatter map[string]any) error, verbose bool) error { + // Read the workflow file + content, err := os.ReadFile(workflowPath) + if err != nil { + return fmt.Errorf("failed to read workflow file: %w", err) + } + + // Parse frontmatter using existing helper + result, err := ExtractFrontmatterFromContent(string(content)) + if err != nil { + return fmt.Errorf("failed to parse frontmatter: %w", err) + } + + // Ensure frontmatter map exists + if result.Frontmatter == nil { + result.Frontmatter = make(map[string]any) + } + + // Apply the update function + if err := updateFunc(result.Frontmatter); err != nil { + return err + } + + // Convert back to YAML + updatedFrontmatter, err := yaml.Marshal(result.Frontmatter) + if err != nil { + return fmt.Errorf("failed to marshal updated frontmatter: %w", err) + } + + // Reconstruct the file content + updatedContent, err := reconstructWorkflowFile(string(updatedFrontmatter), result.Markdown) + if err != nil { + return fmt.Errorf("failed to reconstruct workflow file: %w", err) + } + + // Write the updated content back to the file + if err := os.WriteFile(workflowPath, []byte(updatedContent), 0644); err != nil { + return fmt.Errorf("failed to write updated workflow file: %w", err) + } + + if verbose { + fmt.Fprintln(os.Stderr, console.FormatInfoMessage(fmt.Sprintf("Updated workflow file: %s", console.ToRelativePath(workflowPath)))) + } + + return nil +} + +// EnsureToolsSection ensures the tools section exists in frontmatter and returns it +func EnsureToolsSection(frontmatter map[string]any) map[string]any { + if frontmatter["tools"] == nil { + frontmatter["tools"] = make(map[string]any) + } + + tools, ok := frontmatter["tools"].(map[string]any) + if !ok { + // If tools exists but is not a map, replace it + tools = make(map[string]any) + frontmatter["tools"] = tools + } + + return tools +} + +// reconstructWorkflowFile reconstructs a complete workflow file from frontmatter YAML and markdown content +func reconstructWorkflowFile(frontmatterYAML, markdownContent string) (string, error) { + var lines []string + + // Add opening frontmatter delimiter + lines = append(lines, "---") + + // Add frontmatter content (trim trailing newline from YAML marshal) + frontmatterStr := strings.TrimSuffix(frontmatterYAML, "\n") + if frontmatterStr != "" { + lines = append(lines, strings.Split(frontmatterStr, "\n")...) + } + + // Add closing frontmatter delimiter + lines = append(lines, "---") + + // Add markdown content if present + if markdownContent != "" { + lines = append(lines, markdownContent) + } + + return strings.Join(lines, "\n"), nil +} + +// QuoteCronExpressions ensures cron expressions in schedule sections are properly quoted. +// The YAML library may drop quotes from cron expressions like "0 14 * * 1-5" which +// causes validation errors since they start with numbers but contain spaces and special chars. +func QuoteCronExpressions(yamlContent string) string { + // Pattern to match unquoted cron expressions after "cron:" + // Matches: cron: 0 14 * * 1-5 + // Captures the cron value to be quoted + cronPattern := regexp.MustCompile(`(?m)^(\s*-?\s*cron:\s*)([0-9][^\n"']*)$`) + + // Replace unquoted cron expressions with quoted versions + return cronPattern.ReplaceAllStringFunc(yamlContent, func(match string) string { + // Extract the cron prefix and value + submatches := cronPattern.FindStringSubmatch(match) + if len(submatches) < 3 { + return match + } + prefix := submatches[1] + cronValue := strings.TrimSpace(submatches[2]) + + // Remove any trailing comments + if idx := strings.Index(cronValue, "#"); idx != -1 { + comment := cronValue[idx:] + cronValue = strings.TrimSpace(cronValue[:idx]) + return prefix + `"` + cronValue + `" ` + comment + } + + return prefix + `"` + cronValue + `"` + }) +} diff --git a/specs/REFACTORING_REPORT.md b/specs/REFACTORING_REPORT.md new file mode 100644 index 0000000000..ffb0792ed5 --- /dev/null +++ b/specs/REFACTORING_REPORT.md @@ -0,0 +1,206 @@ +# Frontmatter.go Refactoring - Completion Report + +## Overview +This document summarizes the refactoring work performed on `pkg/parser/frontmatter.go` to address the technical debt of having a 1,907-line monolithic file. + +## Objectives +- Split the large frontmatter.go file into focused, maintainable modules +- Reduce file size to improve code navigation and understanding +- Maintain test coverage and ensure no breaking changes +- Preserve public API compatibility + +## Work Completed + +### Files Created (4/7 planned) + +1. **ansi_strip.go** (108 LOC) + - **Purpose**: ANSI escape sequence stripping utilities + - **Functions**: `StripANSI()`, `isFinalCSIChar()`, `isCSIParameterChar()` + - **Responsibility**: Remove ANSI escape codes from strings for clean text output + - **Dependencies**: None (standalone utility) + +2. **frontmatter_content.go** (284 LOC) + - **Purpose**: Basic frontmatter parsing and extraction + - **Functions**: + - `FrontmatterResult` type + - `ExtractFrontmatterFromContent()` - Parse YAML frontmatter from markdown + - `ExtractFrontmatterString()` - Extract frontmatter as YAML string + - `ExtractMarkdownContent()` - Extract markdown without frontmatter + - `ExtractYamlChunk()` - Extract specific YAML sections + - `ExtractMarkdownSection()` - Extract markdown sections by header + - `ExtractWorkflowNameFromMarkdown()` - Extract workflow name from H1 + - `ExtractMarkdown()` - Extract markdown from file + - **Responsibility**: Pure parsing functions without side effects + - **Dependencies**: yaml library only + +3. **remote_fetch.go** (258 LOC) + - **Purpose**: GitHub remote content fetching and workflow-spec resolution + - **Functions**: + - `isUnderWorkflowsDirectory()` - Check if file is a workflow + - `resolveIncludePath()` - Resolve include paths (local or remote) + - `isWorkflowSpec()` - Detect workflow-spec format + - `downloadIncludeFromWorkflowSpec()` - Download from GitHub with caching + - `resolveRefToSHA()` - Resolve git refs to commit SHAs + - `isHexString()` - Validate hex strings + - `downloadFileFromGitHub()` - Fetch file content from GitHub API + - **Responsibility**: GitHub API interactions and remote file fetching + - **Dependencies**: gh CLI, ImportCache + +4. **workflow_update.go** (129 LOC) + - **Purpose**: High-level workflow file update operations + - **Functions**: + - `UpdateWorkflowFrontmatter()` - Update workflow frontmatter via callback + - `EnsureToolsSection()` - Ensure tools section exists in frontmatter + - `reconstructWorkflowFile()` - Rebuild markdown file from parts + - `QuoteCronExpressions()` - Sanitize cron expressions in YAML + - **Responsibility**: Workflow file manipulation and cron expression handling + - **Dependencies**: frontmatter_content.go, console formatting + +### Metrics + +| Metric | Before | After | Change | +|--------|--------|-------|--------| +| frontmatter.go size | 1,907 LOC | 1,166 LOC | -741 LOC (-39%) | +| Number of files | 1 | 5 | +4 files | +| Average file size | 1,907 LOC | 233 LOC | -88% | +| Test pass rate | 100% | 100% | No change ✓ | +| Build status | Success | Success | No change ✓ | +| Breaking changes | N/A | 0 | None ✓ | + +### Quality Assurance + +- ✅ All unit tests pass (pkg/parser, pkg/workflow) +- ✅ Build succeeds without errors +- ✅ No breaking changes to public API +- ✅ Import statements updated correctly +- ✅ Function signatures preserved +- ✅ No regressions in functionality + +## Remaining Work (3/7 files) + +The following files still need extraction: + +### 1. tool_sections.go (~420 LOC estimated) +**Functions to extract:** +- `isMCPType()` - Check if type is MCP-compatible +- `extractToolsFromContent()` - Extract tools configuration +- `extractSafeOutputsFromContent()` - Extract safe-outputs +- `extractMCPServersFromContent()` - Extract MCP servers +- `extractStepsFromContent()` - Extract steps configuration +- `extractEngineFromContent()` - Extract engine configuration +- `extractRuntimesFromContent()` - Extract runtimes +- `extractServicesFromContent()` - Extract services +- `extractNetworkFromContent()` - Extract network config +- `ExtractPermissionsFromContent()` - Extract permissions +- `extractSecretMaskingFromContent()` - Extract secret masking +- `extractFrontmatterField()` - Generic field extractor +- `mergeToolsFromJSON()` - Merge tool configurations +- `MergeTools()` - Merge tool maps +- `mergeAllowedArrays()` - Merge allowed arrays +- `mergeMCPTools()` - Merge MCP tool configs +- `areEqual()` - Deep equality comparison + +**Challenges:** +- Many interdependent extraction functions +- Complex merging logic +- Used by both imports and include expansion + +### 2. include_expander.go (~430 LOC estimated) +**Functions to extract:** +- `ProcessIncludes()` - Process include directives +- `processIncludesWithVisited()` - Process with cycle detection +- `processIncludedFileWithVisited()` - Process single included file +- `ExpandIncludes()` - Expand includes in content +- `ExpandIncludesWithManifest()` - Expand with manifest tracking +- `ExpandIncludesForEngines()` - Expand engine includes +- `ExpandIncludesForSafeOutputs()` - Expand safe-outputs includes +- `expandIncludesForField()` - Generic field expansion +- `ProcessIncludesForEngines()` - Process engine includes +- `ProcessIncludesForSafeOutputs()` - Process safe-outputs includes +- `processIncludesForField()` - Generic field processing + +**Challenges:** +- Recursive include resolution +- Cycle detection logic +- Depends on remote_fetch.go +- Complex state management + +### 3. frontmatter_imports.go (~360 LOC estimated) +**Functions to extract:** +- `ImportDirectiveMatch` type +- `importQueueItem` type +- `ImportsResult` type +- `ParseImportDirective()` - Parse import directives +- `ProcessImportsFromFrontmatter()` - Process imports +- `ProcessImportsFromFrontmatterWithManifest()` - Process with manifest +- BFS queue traversal logic +- Import merging logic + +**Challenges:** +- Complex BFS traversal algorithm +- Depends on both include_expander.go and tool_sections.go +- Stateful import processing +- Agent file detection logic + +### Why These Remain + +These 3 files represent the most complex and interdependent parts of the original frontmatter.go: + +1. **High interdependency**: They call each other frequently +2. **Stateful logic**: Include expansion and import processing maintain complex state +3. **Recursive algorithms**: Cycle detection and BFS traversal +4. **Integration complexity**: Would require careful refactoring to avoid breaking changes + +## Benefits Achieved + +### 1. Improved Maintainability +- Smaller files are easier to understand and navigate +- Clear separation of concerns +- Each module has a single, well-defined responsibility + +### 2. Better Testability +- Individual modules can be tested in isolation +- Easier to write focused unit tests +- Reduced test setup complexity + +### 3. Reduced Cognitive Load +- Developers can focus on one aspect at a time +- Clear module boundaries aid comprehension +- Less scrolling through large files + +### 4. Foundation for Further Work +- Established pattern for extracting remaining functions +- Demonstrated that refactoring can be done safely +- No regressions or test failures + +## Recommendations + +### Short Term +The current state represents substantial progress: +- 39% size reduction achieved +- 4 well-organized modules created +- All tests passing, no breaking changes +- Good foundation for future work + +### Long Term +Complete the refactoring by extracting the remaining 3 files: +1. Start with **tool_sections.go** (least dependencies) +2. Then **include_expander.go** (depends on tool_sections) +3. Finally **frontmatter_imports.go** (depends on both) + +### Approach for Remaining Work +1. Extract one file at a time +2. Run tests after each extraction +3. Use the same patterns established in this PR +4. Consider creating interfaces to reduce coupling +5. Document complex interdependencies + +## Conclusion + +This refactoring successfully reduced the size of frontmatter.go by 39% while maintaining 100% test pass rate and zero breaking changes. The extraction of 4 focused modules significantly improves code organization and maintainability. The remaining 3 files can be addressed in follow-up work using the patterns established here. + +## References + +- Original issue: #[issue-number] +- PR: #[pr-number] +- Related documentation: `.github/instructions/developer.instructions.md`