From 994a723c59269c3e891bc4ce7c72f3ad843257b5 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Wed, 3 Jul 2024 09:49:18 -0400 Subject: [PATCH 1/2] [Go] test picoschema with yaml test cases Use the yaml file of test cases to test the Go picoschema implementation. This uncovered a few problems. Some new features were implemented in JS but not in Go; they are implemented here. More seriously, it was discovered that the JSON schema package we are using, github.com/invopop/jsonschema, does not support arrays for the "type" field, and probably never will (see https://github.com/invopop/jsonschema/issues/134). Tests that require that are skipped. --- go/plugins/dotprompt/dotprompt_test.go | 34 ++- go/plugins/dotprompt/picoschema.go | 106 +++++---- go/plugins/dotprompt/picoschema_test.go | 207 ++++++------------ .../dotprompt/tests/picoschema_tests.yaml | 2 +- 4 files changed, 154 insertions(+), 195 deletions(-) diff --git a/go/plugins/dotprompt/dotprompt_test.go b/go/plugins/dotprompt/dotprompt_test.go index 481ce34435..1e783a8357 100644 --- a/go/plugins/dotprompt/dotprompt_test.go +++ b/go/plugins/dotprompt/dotprompt_test.go @@ -43,7 +43,8 @@ func TestPrompts(t *testing.T) { "type": "object", "required": [ "food" - ] + ], + "additionalProperties": false }`, output: `{ "properties": { @@ -72,11 +73,13 @@ func TestPrompts(t *testing.T) { "required": [ "name", "quantity" - ] + ], + "additionalProperties": false }, "type": "array" } }, + "additionalProperties": false, "type": "object", "required": [ "ingredients", @@ -150,20 +153,29 @@ func cmpSchema(t *testing.T, got *jsonschema.Schema, want string) string { return "" } - // JSON sorts maps but not slices. - // jsonschema slices are not sorted consistently. - sortSchemaSlices(got) - - data, err := json.Marshal(got) + jsonGot, err := convertSchema(got) if err != nil { t.Fatal(err) } - var jsonGot, jsonWant any - if err := json.Unmarshal(data, &jsonGot); err != nil { - t.Fatal(err) - } + var jsonWant any if err := json.Unmarshal([]byte(want), &jsonWant); err != nil { t.Fatalf("unmarshaling %q failed: %v", want, err) } return cmp.Diff(jsonWant, jsonGot) } + +// convertSchema marshals s to JSON, then unmarshals the result. +func convertSchema(s *jsonschema.Schema) (any, error) { + // JSON sorts maps but not slices. + // jsonschema slices are not sorted consistently. + sortSchemaSlices(s) + data, err := json.Marshal(s) + if err != nil { + return nil, err + } + var a any + if err := json.Unmarshal(data, &a); err != nil { + return nil, err + } + return a, nil +} diff --git a/go/plugins/dotprompt/picoschema.go b/go/plugins/dotprompt/picoschema.go index 47ae2cd098..ca5b7c00c3 100644 --- a/go/plugins/dotprompt/picoschema.go +++ b/go/plugins/dotprompt/picoschema.go @@ -21,7 +21,7 @@ import ( "strings" "github.com/invopop/jsonschema" - "github.com/wk8/go-ordered-map/v2" + orderedmap "github.com/wk8/go-ordered-map/v2" ) // picoschemaToJSONSchema turns picoschema input into a JSONSchema. @@ -57,13 +57,20 @@ func picoschemaToJSONSchema(val any) (*jsonschema.Schema, error) { // parsePico parses picoschema from the result of the YAML parser. func parsePico(val any) (*jsonschema.Schema, error) { - if str, ok := val.(string); ok { - typ, desc, found := strings.Cut(str, ",") + switch val := val.(type) { + default: + return nil, fmt.Errorf("picoschema: value %v of type %[1]T is not an object, slice or string", val) + + case string: + typ, desc, found := strings.Cut(val, ",") switch typ { - case "string", "boolean", "null", "number", "integer": + case "string", "boolean", "null", "number", "integer", "any": default: return nil, fmt.Errorf("picoschema: unsupported scalar type %q", typ) } + if typ == "any" { + typ = "" + } ret := &jsonschema.Schema{ Type: typ, } @@ -71,57 +78,68 @@ func parsePico(val any) (*jsonschema.Schema, error) { ret.Description = strings.TrimSpace(desc) } return ret, nil - } - m, ok := val.(map[string]any) - if !ok { - return nil, fmt.Errorf("picoschema: value %v of type %T is not an object or a string", val, val) - } + case []any: // assume enum + return &jsonschema.Schema{Enum: val}, nil - ret := &jsonschema.Schema{ - Type: "object", - Properties: orderedmap.New[string, *jsonschema.Schema](), - } - for k, v := range m { - name, typ, found := strings.Cut(k, "(") - propertyName, isOptional := strings.CutSuffix(name, "?") - if !isOptional { - ret.Required = append(ret.Required, propertyName) + case map[string]any: + ret := &jsonschema.Schema{ + Type: "object", + Properties: orderedmap.New[string, *jsonschema.Schema](), + AdditionalProperties: jsonschema.FalseSchema, } + for k, v := range val { + name, typ, found := strings.Cut(k, "(") + propertyName, isOptional := strings.CutSuffix(name, "?") + if name != "" && !isOptional { + ret.Required = append(ret.Required, propertyName) + } - property, err := parsePico(v) - if err != nil { - return nil, err - } + property, err := parsePico(v) + if err != nil { + return nil, err + } - if !found { - ret.Properties.Set(propertyName, property) - continue - } + if !found { + ret.Properties.Set(propertyName, property) + continue + } + + typ = strings.TrimSuffix(typ, ")") + typ, desc, found := strings.Cut(strings.TrimSuffix(typ, ")"), ",") + switch typ { + case "array": + property = &jsonschema.Schema{ + Type: "array", + Items: property, + } + case "object": + // Use property unchanged. + case "enum": + if property.Enum == nil { + return nil, fmt.Errorf("picoschema: enum value %v is not an array", property) + } + if isOptional { + property.Enum = append(property.Enum, nil) + } + + case "*": + ret.AdditionalProperties = property + continue + default: + return nil, fmt.Errorf("picoschema: parenthetical type %q is none of %q", typ, + []string{"object", "array", "enum", "*"}) - typ = strings.TrimSuffix(typ, ")") - typ, desc, found := strings.Cut(strings.TrimSuffix(typ, ")"), ",") - switch typ { - case "array": - property = &jsonschema.Schema{ - Type: "array", - Items: property, } - case "object": - // Use property unchanged. - default: - return nil, fmt.Errorf("picoschema: parenthetical type %q is neither %q nor %q", typ, "object", "array") - } + if found { + property.Description = strings.TrimSpace(desc) + } - if found { - property.Description = strings.TrimSpace(desc) + ret.Properties.Set(propertyName, property) } - - ret.Properties.Set(propertyName, property) + return ret, nil } - - return ret, nil } // mapToJSONSchema converts a YAML value to a JSONSchema. diff --git a/go/plugins/dotprompt/picoschema_test.go b/go/plugins/dotprompt/picoschema_test.go index 2923e18dbc..74bc7f1554 100644 --- a/go/plugins/dotprompt/picoschema_test.go +++ b/go/plugins/dotprompt/picoschema_test.go @@ -15,158 +15,87 @@ package dotprompt import ( + "os" + "path/filepath" "testing" + "github.com/google/go-cmp/cmp" + "github.com/invopop/jsonschema" "gopkg.in/yaml.v3" ) // TestPicoschema tests the same cases as picoschema_test.ts. func TestPicoschema(t *testing.T) { - tests := []struct { - description string - yaml string - want string - }{ - { - description: "simple scalar, no description", - yaml: `schema: string`, - want: `{ "type": "string" }`, - }, - { - description: "simple scalar, with description", - yaml: `schema: number, the description`, - want: `{ "type": "number", "description": "the description" }`, - }, - { - description: "simple scalar, with description (no whitespace)", - yaml: `schema: number,the description`, - want: `{ "type": "number", "description": "the description" }`, - }, - { - description: "simple scalar, with description (comma in description)", - yaml: `schema: number,the description, which has, multiple commas`, - want: `{ - "type": "number", - "description": "the description, which has, multiple commas" - }`, - }, - { - description: "simple scalar, with description (extra whitespace)", - yaml: `schema: number, the description`, - want: `{ "type": "number", "description": "the description" }`, - }, - { - description: "simple object", - yaml: `schema: - field1: boolean - field2: string`, - want: `{ - "type": "object", - "properties": { - "field1": { "type": "boolean" }, - "field2": { "type": "string" } - }, - "required": ["field1", "field2"] - }`, - }, - { - description: "required field", - yaml: `schema: - req: string, required field - nonreq?: boolean, optional field`, - want: `{ - "type": "object", - "properties": { - "req": { "type": "string", "description": "required field" }, - "nonreq": { "type": "boolean", "description": "optional field" } - }, - "required": ["req"] - }`, - }, - { - description: "array of scalars, with and without description", - yaml: `schema: - tags(array, list of tags): string, the tag - vector(array): number`, - want: `{ - "type": "object", - "properties": { - "tags": { - "type": "array", - "description": "list of tags", - "items": { "type": "string", "description": "the tag" } - }, - "vector": { "type": "array", "items": { "type": "number" } } - }, - "required": ["tags", "vector"] - }`, - }, - { - description: "nested object in array and out", - yaml: `schema: - obj?(object, a nested object): - nest1?: string - arr(array, array of objects): - nest2?: boolean`, - want: `{ - "type": "object", - "properties": { - "obj": { - "type": "object", - "description": "a nested object", - "properties": { "nest1": { "type": "string" } } - }, - "arr": { - "type": "array", - "description": "array of objects", - "items": { - "type": "object", - "properties": { "nest2": { "type": "boolean" } } - } - } - }, - "required": ["arr"] - }`, - }, - { - description: "simple json schema type", - yaml: `schema: - type: string`, - want: `{ "type": "string" }`, - }, - { - description: "simple json schema type", - yaml: `schema: - type: string`, - want: `{ "type": "string" }`, - }, - { - description: "inferred json schema from properties", - yaml: `schema: - properties: - foo: {type: string}`, - want: `{ "type": "object", "properties": { "foo": { "type": "string" } } }`, - }, + type test struct { + Description string + YAML string + Want map[string]any + } + + convertSchema(&jsonschema.Schema{}) + data, err := os.ReadFile(filepath.FromSlash("../../../js/plugins/dotprompt/tests/picoschema_tests.yaml")) + if err != nil { + t.Fatal(err) + } + + var tests []test + if err := yaml.Unmarshal(data, &tests); err != nil { + t.Fatal(err) + } + + skip := map[string]bool{ + "required field": true, + "nested object in array and out": true, } for _, test := range tests { - var val any - if err := yaml.Unmarshal([]byte(test.yaml), &val); err != nil { - t.Errorf("%s YAML unmarshal failure: %v", test.description, err) - continue - } + t.Run(test.Description, func(t *testing.T) { + if skip[test.Description] { + t.Skip("no support for type as an array") + } + var val any + if err := yaml.Unmarshal([]byte(test.YAML), &val); err != nil { + t.Fatalf("YAML unmarshal failure: %v", err) + } - // The tests, copied from TypeScript, use a schema field. - val = val.(map[string]any)["schema"] + // The tests, copied from TypeScript, use a schema field. + val = val.(map[string]any)["schema"] - schema, err := picoschemaToJSONSchema(val) - if err != nil { - t.Errorf("%s: %v", test.description, err) - continue - } + schema, err := picoschemaToJSONSchema(val) + if err != nil { + t.Fatal(err) + } + got, err := convertSchema(schema) + if err != nil { + t.Fatal(err) + } + want := replaceEmptySchemas(test.Want) + if diff := cmp.Diff(want, got); diff != "" { + t.Errorf("mismatch (-want, +got):\n%s", diff) + } + }) + } +} - if diff := cmpSchema(t, schema, test.want); diff != "" { - t.Errorf("%s: mismatch (-want, +got):\n%s", test.description, diff) +// replaceEmptySchemas replaces empty maps in m, which represent +// empty JSON schemas, with the value true. +// It transforms the expected values taken from the suite of JS test cases +// into a form that matches the JSON marshalling of jsonschema.Schema, +// which marshals empty schemas as "true". +func replaceEmptySchemas(m map[string]any) any { + if m == nil { + return nil + } + if len(m) == 0 { + return true + } + if p, ok := m["properties"]; ok { + pm := p.(map[string]any) + for k, v := range pm { + if vm, ok := v.(map[string]any); ok && len(vm) == 0 { + pm[k] = true + } } } + return m } diff --git a/js/plugins/dotprompt/tests/picoschema_tests.yaml b/js/plugins/dotprompt/tests/picoschema_tests.yaml index 52c81fae29..2e0834228a 100644 --- a/js/plugins/dotprompt/tests/picoschema_tests.yaml +++ b/js/plugins/dotprompt/tests/picoschema_tests.yaml @@ -161,7 +161,7 @@ type: object, } -- description: wildcard fields with other fields +- description: wildcard fields without other fields yaml: | schema: (*): number, lucky number From 1a36d753a7002ed2bcba2e513ba84dc25088cecd Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Wed, 3 Jul 2024 15:40:20 -0400 Subject: [PATCH 2/2] reviewer comments --- go/plugins/dotprompt/picoschema_test.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/go/plugins/dotprompt/picoschema_test.go b/go/plugins/dotprompt/picoschema_test.go index 74bc7f1554..c4cf669372 100644 --- a/go/plugins/dotprompt/picoschema_test.go +++ b/go/plugins/dotprompt/picoschema_test.go @@ -20,7 +20,6 @@ import ( "testing" "github.com/google/go-cmp/cmp" - "github.com/invopop/jsonschema" "gopkg.in/yaml.v3" ) @@ -32,7 +31,6 @@ func TestPicoschema(t *testing.T) { Want map[string]any } - convertSchema(&jsonschema.Schema{}) data, err := os.ReadFile(filepath.FromSlash("../../../js/plugins/dotprompt/tests/picoschema_tests.yaml")) if err != nil { t.Fatal(err) @@ -58,7 +56,7 @@ func TestPicoschema(t *testing.T) { t.Fatalf("YAML unmarshal failure: %v", err) } - // The tests, copied from TypeScript, use a schema field. + // The tests use a schema field. val = val.(map[string]any)["schema"] schema, err := picoschemaToJSONSchema(val)