From 282c5aaa132657ddb484acd35701a2975a8e78d8 Mon Sep 17 00:00:00 2001 From: Daily Perf Improver Date: Sat, 25 Oct 2025 14:22:58 +0000 Subject: [PATCH 1/2] perf: cache compiled JSON schemas to improve compilation speed Optimize workflow compilation by caching compiled JSON schemas instead of recompiling them for every workflow validation. This eliminates redundant schema parsing and compilation overhead. Changes: - pkg/parser/schema.go: Cache frontmatter schema compilation - Add sync.Once pattern for main workflow, included file, and MCP config schemas - Schemas are now compiled once and reused across all workflow compilations - pkg/workflow/validation.go: Cache GitHub Actions schema compilation - Add sync.Once pattern for GitHub Actions workflow schema - Schema compilation now happens once per process lifetime Performance Impact: - Eliminates repeated JSON schema parsing and compilation overhead - More significant on slower systems or when compiling many workflows - Zero performance regression, maintains full schema validation Trade-offs: - Complexity: +100 lines of caching logic (well-structured, thread-safe) - Memory: Minimal (cached schemas ~100KB total) - Maintainability: No impact (localized changes, clear pattern) Validation: - All unit tests pass - All integration tests pass - Code formatted with gofmt - No linting errors - Tested with compilation of 56 workflows successfully --- pkg/parser/schema.go | 75 +++++++++++++++++++++++++++++++++++--- pkg/workflow/validation.go | 58 +++++++++++++++++++++-------- 2 files changed, 112 insertions(+), 21 deletions(-) diff --git a/pkg/parser/schema.go b/pkg/parser/schema.go index 08fb233f18c..97deefceece 100644 --- a/pkg/parser/schema.go +++ b/pkg/parser/schema.go @@ -9,6 +9,7 @@ import ( "regexp" "sort" "strings" + "sync" "github.com/githubnext/gh-aw/pkg/console" "github.com/githubnext/gh-aw/pkg/constants" @@ -120,24 +121,88 @@ func ValidateMCPConfigWithSchema(mcpConfig map[string]any, toolName string) erro } // validateWithSchema validates frontmatter against a JSON schema -func validateWithSchema(frontmatter map[string]any, schemaJSON, context string) error { +// Cached compiled schemas to avoid recompiling on every validation +var ( + mainWorkflowSchemaOnce sync.Once + includedFileSchemaOnce sync.Once + mcpConfigSchemaOnce sync.Once + + compiledMainWorkflowSchema *jsonschema.Schema + compiledIncludedFileSchema *jsonschema.Schema + compiledMcpConfigSchema *jsonschema.Schema + + mainWorkflowSchemaError error + includedFileSchemaError error + mcpConfigSchemaError error +) + +// getCompiledMainWorkflowSchema returns the compiled main workflow schema, compiling it once and caching +func getCompiledMainWorkflowSchema() (*jsonschema.Schema, error) { + mainWorkflowSchemaOnce.Do(func() { + compiledMainWorkflowSchema, mainWorkflowSchemaError = compileSchema(mainWorkflowSchema, "http://contoso.com/main-workflow-schema.json") + }) + return compiledMainWorkflowSchema, mainWorkflowSchemaError +} + +// getCompiledIncludedFileSchema returns the compiled included file schema, compiling it once and caching +func getCompiledIncludedFileSchema() (*jsonschema.Schema, error) { + includedFileSchemaOnce.Do(func() { + compiledIncludedFileSchema, includedFileSchemaError = compileSchema(includedFileSchema, "http://contoso.com/included-file-schema.json") + }) + return compiledIncludedFileSchema, includedFileSchemaError +} + +// getCompiledMcpConfigSchema returns the compiled MCP config schema, compiling it once and caching +func getCompiledMcpConfigSchema() (*jsonschema.Schema, error) { + mcpConfigSchemaOnce.Do(func() { + compiledMcpConfigSchema, mcpConfigSchemaError = compileSchema(mcpConfigSchema, "http://contoso.com/mcp-config-schema.json") + }) + return compiledMcpConfigSchema, mcpConfigSchemaError +} + +// compileSchema compiles a JSON schema from a JSON string +func compileSchema(schemaJSON, schemaURL string) (*jsonschema.Schema, error) { // Create a new compiler compiler := jsonschema.NewCompiler() // Parse the schema JSON first var schemaDoc any if err := json.Unmarshal([]byte(schemaJSON), &schemaDoc); err != nil { - return fmt.Errorf("schema validation error for %s: failed to parse schema JSON: %w", context, err) + return nil, fmt.Errorf("failed to parse schema JSON: %w", err) } - // Add the schema as a resource with a temporary URL - schemaURL := "http://contoso.com/schema.json" + // Add the schema as a resource if err := compiler.AddResource(schemaURL, schemaDoc); err != nil { - return fmt.Errorf("schema validation error for %s: failed to add schema resource: %w", context, err) + return nil, fmt.Errorf("failed to add schema resource: %w", err) } // Compile the schema schema, err := compiler.Compile(schemaURL) + if err != nil { + return nil, fmt.Errorf("failed to compile schema: %w", err) + } + + return schema, nil +} + +func validateWithSchema(frontmatter map[string]any, schemaJSON, context string) error { + // Determine which cached schema to use based on the schemaJSON + var schema *jsonschema.Schema + var err error + + switch schemaJSON { + case mainWorkflowSchema: + schema, err = getCompiledMainWorkflowSchema() + case includedFileSchema: + schema, err = getCompiledIncludedFileSchema() + case mcpConfigSchema: + schema, err = getCompiledMcpConfigSchema() + default: + // Fallback for unknown schemas (shouldn't happen in normal operation) + // Compile the schema on-the-fly + schema, err = compileSchema(schemaJSON, "http://contoso.com/schema.json") + } + if err != nil { return fmt.Errorf("schema validation error for %s: %w", context, err) } diff --git a/pkg/workflow/validation.go b/pkg/workflow/validation.go index 9845dc10449..cddcf7dbf3d 100644 --- a/pkg/workflow/validation.go +++ b/pkg/workflow/validation.go @@ -7,6 +7,7 @@ import ( "os" "regexp" "strings" + "sync" "github.com/cli/go-gh/v2" "github.com/githubnext/gh-aw/pkg/console" @@ -218,6 +219,44 @@ func collectPackagesFromWorkflow( } // validateGitHubActionsSchema validates the generated YAML content against the GitHub Actions workflow schema +// Cached compiled schema to avoid recompiling on every validation +var ( + compiledSchemaOnce sync.Once + compiledSchema *jsonschema.Schema + schemaCompileError error +) + +// getCompiledSchema returns the compiled GitHub Actions schema, compiling it once and caching +func getCompiledSchema() (*jsonschema.Schema, error) { + compiledSchemaOnce.Do(func() { + // Parse the embedded schema + var schemaDoc any + if err := json.Unmarshal([]byte(githubWorkflowSchema), &schemaDoc); err != nil { + schemaCompileError = fmt.Errorf("failed to parse embedded GitHub Actions schema: %w", err) + return + } + + // Create compiler and add the schema as a resource + loader := jsonschema.NewCompiler() + schemaURL := "https://json.schemastore.org/github-workflow.json" + if err := loader.AddResource(schemaURL, schemaDoc); err != nil { + schemaCompileError = fmt.Errorf("failed to add schema resource: %w", err) + return + } + + // Compile the schema once + schema, err := loader.Compile(schemaURL) + if err != nil { + schemaCompileError = fmt.Errorf("failed to compile GitHub Actions schema: %w", err) + return + } + + compiledSchema = schema + }) + + return compiledSchema, schemaCompileError +} + func (c *Compiler) validateGitHubActionsSchema(yamlContent string) error { // Convert YAML to any for JSON conversion var workflowData any @@ -231,23 +270,10 @@ func (c *Compiler) validateGitHubActionsSchema(yamlContent string) error { return fmt.Errorf("failed to convert YAML to JSON for validation: %w", err) } - // Parse the embedded schema - var schemaDoc any - if err := json.Unmarshal([]byte(githubWorkflowSchema), &schemaDoc); err != nil { - return fmt.Errorf("failed to parse embedded GitHub Actions schema: %w", err) - } - - // Create compiler and add the schema as a resource - loader := jsonschema.NewCompiler() - schemaURL := "https://json.schemastore.org/github-workflow.json" - if err := loader.AddResource(schemaURL, schemaDoc); err != nil { - return fmt.Errorf("failed to add schema resource: %w", err) - } - - // Compile the schema - schema, err := loader.Compile(schemaURL) + // Get the cached compiled schema + schema, err := getCompiledSchema() if err != nil { - return fmt.Errorf("failed to compile GitHub Actions schema: %w", err) + return err } // Validate the JSON data against the schema From 13125d48a059b779b0e043b018480c5116c81bb4 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 25 Oct 2025 14:34:13 +0000 Subject: [PATCH 2/2] chore: add changeset for schema compilation caching [skip-ci] --- .changeset/patch-cache-schema-compilation.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/patch-cache-schema-compilation.md diff --git a/.changeset/patch-cache-schema-compilation.md b/.changeset/patch-cache-schema-compilation.md new file mode 100644 index 00000000000..a8b6b2d4b69 --- /dev/null +++ b/.changeset/patch-cache-schema-compilation.md @@ -0,0 +1,5 @@ +--- +"gh-aw": patch +--- + +Cache compiled JSON schemas to improve compilation speed