diff --git a/schema.go b/schema.go new file mode 100644 index 00000000..84b3dd58 --- /dev/null +++ b/schema.go @@ -0,0 +1,110 @@ +package filetypes + +import ( + "reflect" + + cqjsonschema "github.com/cloudquery/codegen/jsonschema" + "github.com/cloudquery/filetypes/v4/csv" + jsonfile "github.com/cloudquery/filetypes/v4/json" + "github.com/cloudquery/filetypes/v4/parquet" + "github.com/invopop/jsonschema" + orderedmap "github.com/wk8/go-ordered-map/v2" +) + +// JSONSchemaOptions should be used when generating schema to add the nested spec info +func (FileSpec) JSONSchemaOptions() []cqjsonschema.Option { + fileSpecType := reflect.TypeOf(FileSpec{}) + return []cqjsonschema.Option{func(r *jsonschema.Reflector) { + fileSpecFields := func(t reflect.Type) []reflect.StructField { + if t != fileSpecType { + return nil + } + return reflect.VisibleFields(reflect.TypeOf(struct { + CSVSpec csv.Spec + JSONSpec jsonfile.Spec + ParquetSpec parquet.Spec + }{})) + } + if r.AdditionalFields == nil { + r.AdditionalFields = fileSpecFields + } else { + old := r.AdditionalFields + r.AdditionalFields = func(r reflect.Type) []reflect.StructField { + if extra := fileSpecFields(r); len(extra) > 0 { + return extra + } + return old(r) + } + } + }} +} + +func (FileSpec) JSONSchemaExtend(sc *jsonschema.Schema) { + // now we need to remove extra fields + refCSVSpec := sc.Properties.Value("CSVSpec").Ref + refJSONSpec := sc.Properties.Value("JSONSpec").Ref + refParquetSpec := sc.Properties.Value("ParquetSpec").Ref + sc.Properties.Delete("CSVSpec") + sc.Properties.Delete("JSONSpec") + sc.Properties.Delete("ParquetSpec") + + sc.Properties.Set("format_spec", &jsonschema.Schema{ + OneOf: []*jsonschema.Schema{ + { + AnyOf: []*jsonschema.Schema{ + {Ref: refCSVSpec}, + {Ref: refJSONSpec}, + {Ref: refParquetSpec}, + }, + }, + {Type: "null"}, + }, + }) + + // now we need to enforce format -> specific type + formatSpecOneOf := []*jsonschema.Schema{ + // CSV + { + Properties: func() *orderedmap.OrderedMap[string, *jsonschema.Schema] { + properties := jsonschema.NewProperties() + properties.Set("format", &jsonschema.Schema{Type: "string", Const: FormatTypeCSV}) + properties.Set("format_spec", &jsonschema.Schema{ + OneOf: []*jsonschema.Schema{{Ref: refCSVSpec}, {Type: "null"}}, + }) + return properties + }(), + }, + // JSON + { + Properties: func() *orderedmap.OrderedMap[string, *jsonschema.Schema] { + properties := jsonschema.NewProperties() + properties.Set("format", &jsonschema.Schema{Type: "string", Const: FormatTypeJSON}) + properties.Set("format_spec", &jsonschema.Schema{ + OneOf: []*jsonschema.Schema{{Ref: refJSONSpec}, {Type: "null"}}, + }) + return properties + }(), + }, + // Parquet + { + Properties: func() *orderedmap.OrderedMap[string, *jsonschema.Schema] { + properties := jsonschema.NewProperties() + properties.Set("format", &jsonschema.Schema{Type: "string", Const: FormatTypeParquet}) + properties.Set("format_spec", &jsonschema.Schema{ + OneOf: []*jsonschema.Schema{{Ref: refParquetSpec}, {Type: "null"}}, + }) + return properties + }(), + }, + } + if sc.OneOf == nil { + sc.OneOf = formatSpecOneOf + } else { + // may happen when embedding, so move to all_of{{one_of},{one_of}} + sc.AllOf = []*jsonschema.Schema{ + {OneOf: sc.OneOf}, + {OneOf: formatSpecOneOf}, + } + sc.OneOf = nil + } +} diff --git a/schema.json b/schema.json index e0a6bca6..cc2193cd 100644 --- a/schema.json +++ b/schema.json @@ -4,37 +4,6 @@ "$ref": "#/$defs/FileSpec", "$defs": { "FileSpec": { - "$id": "/schemas/FileSpec", - "$defs": { - "CSVSpec": { - "properties": { - "skip_header": { - "type": "boolean", - "description": "Specifies if the first line of a file should be the header.", - "default": false - }, - "delimiter": { - "type": "string", - "pattern": "^.$", - "description": "Character that will be used as the delimiter.", - "default": "," - } - }, - "additionalProperties": false, - "type": "object", - "description": "CloudQuery CSV file output spec." - }, - "JSONSpec": { - "additionalProperties": false, - "type": "object", - "description": "CloudQuery JSON file output spec." - }, - "ParquetSpec": { - "additionalProperties": false, - "type": "object", - "description": "CloudQuery Parquet file output spec." - } - }, "oneOf": [ { "properties": { @@ -45,7 +14,7 @@ "format_spec": { "oneOf": [ { - "$ref": "#/$defs/CSVSpec" + "$ref": "#/$defs/Spec" }, { "type": "null" @@ -63,7 +32,7 @@ "format_spec": { "oneOf": [ { - "$ref": "#/$defs/JSONSpec" + "$ref": "#/$defs/Spec-1" }, { "type": "null" @@ -81,7 +50,7 @@ "format_spec": { "oneOf": [ { - "$ref": "#/$defs/ParquetSpec" + "$ref": "#/$defs/Spec-2" }, { "type": "null" @@ -106,13 +75,13 @@ { "anyOf": [ { - "$ref": "#/$defs/CSVSpec" + "$ref": "#/$defs/Spec" }, { - "$ref": "#/$defs/JSONSpec" + "$ref": "#/$defs/Spec-1" }, { - "$ref": "#/$defs/ParquetSpec" + "$ref": "#/$defs/Spec-2" } ] }, @@ -135,6 +104,34 @@ "required": [ "format" ] + }, + "Spec": { + "properties": { + "skip_header": { + "type": "boolean", + "description": "Specifies if the first line of a file should be the header.", + "default": false + }, + "delimiter": { + "type": "string", + "pattern": "^.$", + "description": "Character that will be used as the delimiter.", + "default": "," + } + }, + "additionalProperties": false, + "type": "object", + "description": "CloudQuery CSV file output spec." + }, + "Spec-1": { + "additionalProperties": false, + "type": "object", + "description": "CloudQuery JSON file output spec." + }, + "Spec-2": { + "additionalProperties": false, + "type": "object", + "description": "CloudQuery Parquet file output spec." } } } diff --git a/schema_test.go b/schema_test.go new file mode 100644 index 00000000..fea8959e --- /dev/null +++ b/schema_test.go @@ -0,0 +1,91 @@ +package filetypes + +import ( + "testing" + + "github.com/cloudquery/codegen/jsonschema" + "github.com/stretchr/testify/require" +) + +func TestFileSpec_JSONSchemaExtend(t *testing.T) { + schema, err := jsonschema.Generate(FileSpec{}, FileSpec{}.JSONSchemaOptions()...) + require.NoError(t, err) + + jsonschema.TestJSONSchema(t, string(schema), []jsonschema.TestCase{ + { + Name: "empty", + Err: true, // missing format + Spec: `{}`, + }, + { + Name: "empty format", + Err: true, + Spec: `{"format":""}`, + }, + { + Name: "null format", + Err: true, + Spec: `{"format":null}`, + }, + { + Name: "bad format", + Err: true, + Spec: `{"format":123}`, + }, + { + Name: "bad format value", + Err: true, + Spec: `{"format":"abc"}`, + }, + { + Name: "csv format", + Spec: `{"format":"csv"}`, + }, + { + Name: "csv format + empty format_spec", + Spec: `{"format":"csv","format_spec":{}}`, + }, + { + Name: "csv format + null format_spec", + Spec: `{"format":"csv","format_spec":null}`, + }, + { + Name: "csv format + csv format_spec", + Spec: `{"format":"csv","format_spec":{"skip_header": true, "delimiter":","}}`, + }, + { + Name: "json format", + Spec: `{"format":"json"}`, + }, + { + Name: "json format + empty format_spec", + Spec: `{"format":"json","format_spec":{}}`, + }, + { + Name: "json format + null format_spec", + Spec: `{"format":"json","format_spec":null}`, + }, + { + Name: "json format + csv format_spec", + Err: true, + Spec: `{"format":"json","format_spec":{"skip_header": true, "delimiter":","}}`, + }, + { + Name: "parquet format", + Spec: `{"format":"parquet"}`, + }, + { + Name: "parquet format + empty format_spec", + Spec: `{"format":"parquet","format_spec":{}}`, + }, + { + Name: "parquet format + null format_spec", + Spec: `{"format":"parquet","format_spec":null}`, + }, + { + Name: "parquet format + csv format_spec", + Err: true, + Spec: `{"format":"parquet","format_spec":{"skip_header": true, "delimiter":","}}`, + }, + }) +} diff --git a/schemagen/main.go b/schemagen/main.go index 58d664c2..e3e5d5ba 100644 --- a/schemagen/main.go +++ b/schemagen/main.go @@ -13,7 +13,9 @@ import ( func main() { fmt.Println("Generating JSON schema for plugin spec") jsonschema.GenerateIntoFile(new(filetypes.FileSpec), path.Join(currDir(), "..", "schema.json"), - jsonschema.WithAddGoComments("github.com/cloudquery/filetypes/v4", path.Join(currDir(), "..")), + append(filetypes.FileSpec{}.JSONSchemaOptions(), + jsonschema.WithAddGoComments("github.com/cloudquery/filetypes/v4", path.Join(currDir(), "..")), + )..., ) } diff --git a/spec.go b/spec.go index cfc35fe4..a76d8f9d 100644 --- a/spec.go +++ b/spec.go @@ -8,8 +8,6 @@ import ( "github.com/cloudquery/filetypes/v4/csv" jsonfile "github.com/cloudquery/filetypes/v4/json" "github.com/cloudquery/filetypes/v4/parquet" - "github.com/invopop/jsonschema" - orderedmap "github.com/wk8/go-ordered-map/v2" ) type FormatType string @@ -44,84 +42,6 @@ type FileSpec struct { parquetSpec *parquet.Spec } -func (FileSpec) JSONSchemaExtend(sc *jsonschema.Schema) { - sc.ID = "/schemas/FileSpec" - sc.Definitions = jsonschema.Definitions{ - "CSVSpec": csv.Spec{}.JSONSchema(), - "JSONSpec": jsonfile.Spec{}.JSONSchema(), - "ParquetSpec": parquet.Spec{}.JSONSchema(), - } - - sc.Properties.Set("format_spec", &jsonschema.Schema{ - OneOf: []*jsonschema.Schema{ - { - AnyOf: []*jsonschema.Schema{ - {Ref: jsonschema.EmptyID.Def("CSVSpec").String()}, - {Ref: jsonschema.EmptyID.Def("JSONSpec").String()}, - {Ref: jsonschema.EmptyID.Def("ParquetSpec").String()}, - }, - }, - {Type: "null"}, - }, - }) - - // now we need to enforce format -> specific type - formatSpecOneOf := []*jsonschema.Schema{ - // CSV - { - Properties: func() *orderedmap.OrderedMap[string, *jsonschema.Schema] { - properties := jsonschema.NewProperties() - properties.Set("format", &jsonschema.Schema{Type: "string", Const: FormatTypeCSV}) - properties.Set("format_spec", &jsonschema.Schema{ - OneOf: []*jsonschema.Schema{ - {Ref: jsonschema.EmptyID.Def("CSVSpec").String()}, - {Type: "null"}, - }, - }) - return properties - }(), - }, - // JSON - { - Properties: func() *orderedmap.OrderedMap[string, *jsonschema.Schema] { - properties := jsonschema.NewProperties() - properties.Set("format", &jsonschema.Schema{Type: "string", Const: FormatTypeJSON}) - properties.Set("format_spec", &jsonschema.Schema{ - OneOf: []*jsonschema.Schema{ - {Ref: jsonschema.EmptyID.Def("JSONSpec").String()}, - {Type: "null"}, - }, - }) - return properties - }(), - }, - // Parquet - { - Properties: func() *orderedmap.OrderedMap[string, *jsonschema.Schema] { - properties := jsonschema.NewProperties() - properties.Set("format", &jsonschema.Schema{Type: "string", Const: FormatTypeParquet}) - properties.Set("format_spec", &jsonschema.Schema{ - OneOf: []*jsonschema.Schema{ - {Ref: jsonschema.EmptyID.Def("ParquetSpec").String()}, - {Type: "null"}, - }, - }) - return properties - }(), - }, - } - if sc.OneOf == nil { - sc.OneOf = formatSpecOneOf - } else { - // may happen when embedding, so move to all_of{{one_of},{one_of}} - sc.AllOf = []*jsonschema.Schema{ - {OneOf: sc.OneOf}, - {OneOf: formatSpecOneOf}, - } - sc.OneOf = nil - } -} - func (s *FileSpec) SetDefaults() { switch s.Format { case FormatTypeCSV: diff --git a/spec_test.go b/spec_test.go index c0dc8ac0..54e593ca 100644 --- a/spec_test.go +++ b/spec_test.go @@ -3,11 +3,9 @@ package filetypes import ( "testing" - "github.com/cloudquery/codegen/jsonschema" "github.com/cloudquery/filetypes/v4/csv" "github.com/cloudquery/filetypes/v4/json" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" ) func TestSpecMethods(t *testing.T) { @@ -104,86 +102,3 @@ func TestSpecMethods(t *testing.T) { assert.Equal(t, tc.postDefaultsJSON, tc.FileSpec.jsonSpec) } } - -func TestFileSpec_JSONSchemaExtend(t *testing.T) { - schema, err := jsonschema.Generate(FileSpec{}) - require.NoError(t, err) - - jsonschema.TestJSONSchema(t, string(schema), []jsonschema.TestCase{ - { - Name: "empty", - Err: true, // missing format - Spec: `{}`, - }, - { - Name: "empty format", - Err: true, - Spec: `{"format":""}`, - }, - { - Name: "null format", - Err: true, - Spec: `{"format":null}`, - }, - { - Name: "bad format", - Err: true, - Spec: `{"format":123}`, - }, - { - Name: "bad format value", - Err: true, - Spec: `{"format":"abc"}`, - }, - { - Name: "csv format", - Spec: `{"format":"csv"}`, - }, - { - Name: "csv format + empty format_spec", - Spec: `{"format":"csv","format_spec":{}}`, - }, - { - Name: "csv format + null format_spec", - Spec: `{"format":"csv","format_spec":null}`, - }, - { - Name: "csv format + csv format_spec", - Spec: `{"format":"csv","format_spec":{"skip_header": true, "delimiter":","}}`, - }, - { - Name: "json format", - Spec: `{"format":"json"}`, - }, - { - Name: "json format + empty format_spec", - Spec: `{"format":"json","format_spec":{}}`, - }, - { - Name: "json format + null format_spec", - Spec: `{"format":"json","format_spec":null}`, - }, - { - Name: "json format + csv format_spec", - Err: true, - Spec: `{"format":"json","format_spec":{"skip_header": true, "delimiter":","}}`, - }, - { - Name: "parquet format", - Spec: `{"format":"parquet"}`, - }, - { - Name: "parquet format + empty format_spec", - Spec: `{"format":"parquet","format_spec":{}}`, - }, - { - Name: "parquet format + null format_spec", - Spec: `{"format":"parquet","format_spec":null}`, - }, - { - Name: "parquet format + csv format_spec", - Err: true, - Spec: `{"format":"parquet","format_spec":{"skip_header": true, "delimiter":","}}`, - }, - }) -}