diff --git a/README.md b/README.md index 4c95977..16e9c83 100644 --- a/README.md +++ b/README.md @@ -137,6 +137,53 @@ If you have a lot of data and can no load everything in memory, you can easily i * [Table](https://godoc.org/github.com/frictionlessdata/tableschema-go/table#Table) * [Iterator](https://godoc.org/github.com/frictionlessdata/tableschema-go/table#Iterator) +### Field + +Class represents field in the schema. + +For example, data values can be decoded to native Go types. Decoding a value will check if the value is of the expected type, is in the correct format, and complies with any constraints imposed by a schema. + +```javascript +{ + 'name': 'birthday', + 'type': 'date', + 'format': 'default', + 'constraints': { + 'required': True, + 'minimum': '2015-05-30' + } +} +``` + +The following example will raise exception the passed-in is less than allowed by `minimum` constraints of the field. `Errors` will be returned as well when the user tries to decode values which are not well formatted dates. + +```go +date, err := field.Decode("2014-05-29") +// uh oh, something went wrong +``` + +Values that can't be decoded will return an `error`. +Decodeing a value that doesn't meet the constraints will return an `error`. + +Available types, formats and resultant value of the cast: + +| Type | Formats | Casting result | +| ---- | ------- | -------------- | +| any | default | interface{} | +| object | default | interface{} | +| array | default | []interface{} | +| boolean | default | bool | +| duration | default | time.Time | +| geopoint | default, array, object | [float64, float64] | +| integer | default | int64 | +| number | default | float64 | +| string | default, uri, email, binary | string | +| date | default, any, | time.Time | +| datetime | default, any, | time.Time | +| time | default, any, | time.Time | +| year | default | time.Time | +| yearmonth | default | time.Time | + ## Saving Tabular Data Once you're done processing the data, it is time to persist results. As an example, let us assume we have a remote table schema called `summary`, which contains two fields: diff --git a/schema/field.go b/schema/field.go index b18d093..fa8361a 100644 --- a/schema/field.go +++ b/schema/field.go @@ -63,6 +63,13 @@ type Constraints struct { MaxLength int `json:"maxLength,omitempty"` Pattern string `json:"pattern,omitempty"` compiledPattern *regexp.Regexp + + // Enum indicates that the value of the field must exactly match a value in the enum array. + // The values of the fields could need encoding, depending on the type. + // It applies to all field types. + Enum []interface{} `json:"enum,omitempty"` + // encodedEnum keeps the encoded version of the enum objects, to make validation faster and easier. + encodedEnum map[string]struct{} } // Field describes a single field in the table schema. @@ -120,7 +127,7 @@ func (f *Field) UnmarshalJSON(data []byte) error { return err } *f = Field(*u) - + // Transformation/Validation that should be done at creation time. if f.Constraints.Pattern != "" { p, err := regexp.Compile(f.Constraints.Pattern) if err != nil { @@ -128,6 +135,16 @@ func (f *Field) UnmarshalJSON(data []byte) error { } f.Constraints.compiledPattern = p } + if f.Constraints.Enum != nil { + f.Constraints.encodedEnum = make(map[string]struct{}) + for i := range f.Constraints.Enum { + e, err := f.Encode(f.Constraints.Enum[i]) + if err != nil { + return err + } + f.Constraints.encodedEnum[e] = struct{}{} + } + } return nil } @@ -140,37 +157,54 @@ func (f *Field) Decode(value string) (interface{}, error) { return nil, fmt.Errorf("%s is required", f.Name) } } + var decoded interface{} + var err error switch f.Type { case IntegerType: - return castInt(f.BareNumber, value, f.Constraints) + decoded, err = castInt(f.BareNumber, value, f.Constraints) case StringType: - return decodeString(f.Format, value, f.Constraints) + decoded, err = decodeString(f.Format, value, f.Constraints) case BooleanType: - return castBoolean(value, f.TrueValues, f.FalseValues) + decoded, err = castBoolean(value, f.TrueValues, f.FalseValues) case NumberType: - return castNumber(f.DecimalChar, f.GroupChar, f.BareNumber, value, f.Constraints) + decoded, err = castNumber(f.DecimalChar, f.GroupChar, f.BareNumber, value, f.Constraints) case DateType: - return decodeDate(f.Format, value, f.Constraints) + decoded, err = decodeDate(f.Format, value, f.Constraints) case ObjectType: - return castObject(value) + decoded, err = castObject(value) case ArrayType: - return castArray(value) + decoded, err = castArray(value) case TimeType: - return decodeTime(f.Format, value, f.Constraints) + decoded, err = decodeTime(f.Format, value, f.Constraints) case YearMonthType: - return decodeYearMonth(value, f.Constraints) + decoded, err = decodeYearMonth(value, f.Constraints) case YearType: - return decodeYear(value, f.Constraints) + decoded, err = decodeYear(value, f.Constraints) case DateTimeType: - return decodeDateTime(value, f.Constraints) + decoded, err = decodeDateTime(value, f.Constraints) case DurationType: - return castDuration(value) + decoded, err = castDuration(value) case GeoPointType: - return castGeoPoint(f.Format, value) + decoded, err = castGeoPoint(f.Format, value) case AnyType: - return castAny(value) + decoded, err = castAny(value) + } + if err != nil { + return nil, err + } + if decoded == nil { + return nil, fmt.Errorf("invalid field type: %s", f.Type) + } + if len(f.Constraints.encodedEnum) > 0 { + encodedValue, err := f.Encode(decoded) + if err != nil { + return nil, err + } + if _, ok := f.Constraints.encodedEnum[encodedValue]; !ok { + return nil, fmt.Errorf("decoded value:%s does not match enum constraints:%v", encodedValue, f.Constraints.encodedEnum) + } } - return nil, fmt.Errorf("invalid field type: %s", f.Type) + return decoded, nil } // Encode encodes the passed-in value into a string. It returns an error if the diff --git a/schema/field_test.go b/schema/field_test.go index b66baab..f35381f 100644 --- a/schema/field_test.go +++ b/schema/field_test.go @@ -135,6 +135,52 @@ func TestField_Decode(t *testing.T) { _, err := f.Decode("NA") is.True(err != nil) }) + t.Run("Enum", func(t *testing.T) { + data := []struct { + desc string + field Field + value string + }{ + { + "SimpleCase", + Field{Type: IntegerType, Constraints: Constraints{encodedEnum: map[string]struct{}{"1": struct{}{}}}}, + "1", + }, + { + "NilEnumList", + Field{Type: IntegerType}, + "10", + }, + { + "EmptyEnumList", + Field{Type: IntegerType, Constraints: Constraints{encodedEnum: map[string]struct{}{}}}, + "10", + }, + } + for _, d := range data { + t.Run(d.desc, func(t *testing.T) { + is := is.New(t) + _, err := d.field.Decode(d.value) + is.NoErr(err) + }) + } + }) + t.Run("EnumError", func(t *testing.T) { + data := []struct { + desc string + field Field + value string + }{ + {"NonEmptyEnumList", Field{Type: IntegerType, Constraints: Constraints{encodedEnum: map[string]struct{}{"8": struct{}{}, "9": struct{}{}}}}, "10"}, + } + for _, d := range data { + t.Run(d.desc, func(t *testing.T) { + is := is.New(t) + _, err := d.field.Decode(d.value) + is.True(err != nil) + }) + } + }) }) }