Skip to content
Permalink
Browse files
AVRO-3511: Fix the parsing canonical form for Decimal schema (#1679)
Signed-off-by: Martin Tzvetanov Grigorov <mgrigorov@apache.org>
  • Loading branch information
martin-g committed May 5, 2022
1 parent 7ba9447 commit ce74e7726b33ac414299a924c63a7a7bc036f8bd
Showing 3 changed files with 98 additions and 51 deletions.
@@ -256,12 +256,18 @@ pub enum Error {
#[error("Unknown primitive type: {0}")]
ParsePrimitive(String),

#[error("invalid JSON for {key:?}: {precision:?}")]
GetDecimalPrecisionFromJson {
#[error("invalid JSON for {key:?}: {value:?}")]
GetDecimalMetadataValueFromJson {
key: String,
precision: serde_json::Value,
value: serde_json::Value,
},

#[error("The decimal precision ({precision}) must be bigger or equal to the scale ({scale})")]
DecimalPrecisionLessThanScale { precision: usize, scale: usize },

#[error("The decimal precision ({precision}) must be a positive number")]
DecimalPrecisionMuBePositive { precision: usize },

#[error("Unexpected `type` {0} variant for `logicalType`")]
GetLogicalTypeVariant(serde_json::Value),

@@ -680,7 +680,7 @@ impl Schema {
/// https://avro.apache.org/docs/1.8.2/spec.html#Parsing+Canonical+Form+for+Schemas
pub fn canonical_form(&self) -> String {
let json = serde_json::to_value(self)
.unwrap_or_else(|e| panic!("cannot parse Schema from JSON: {0}", e));
.unwrap_or_else(|e| panic!("Cannot parse Schema from JSON: {0}", e));
parsing_canonical_form(&json)
}

@@ -868,16 +868,31 @@ impl Parser {
) -> Result<DecimalMetadata, Error> {
match complex.get(key) {
Some(&Value::Number(ref value)) => parse_json_integer_for_decimal(value),
None => Err(Error::GetDecimalMetadataFromJson(key)),
Some(precision) => Err(Error::GetDecimalPrecisionFromJson {
None => {
if key == "scale" {
Ok(0)
} else {
Err(Error::GetDecimalMetadataFromJson(key))
}
}
Some(value) => Err(Error::GetDecimalMetadataValueFromJson {
key: key.into(),
precision: precision.clone(),
value: value.clone(),
}),
}
}
let precision = get_decimal_integer(complex, "precision")?;
let scale = get_decimal_integer(complex, "scale")?;
Ok((precision, scale))

if precision < 1 {
return Err(Error::DecimalPrecisionMuBePositive { precision });
}

if precision < scale {
Err(Error::DecimalPrecisionLessThanScale { precision, scale })
} else {
Ok((precision, scale))
}
}

/// Parse a `serde_json::Value` representing a complex Avro type into a
@@ -1545,7 +1560,7 @@ fn pcf_map(schema: &Map<String, serde_json::Value>) -> String {
}

// Strip off quotes surrounding "size" type, if they exist ([INTEGERS] rule).
if k == "size" {
if k == "size" || k == "precision" || k == "scale" {
let i = match v.as_str() {
Some(s) => s.parse::<i64>().expect("Only valid schemas are accepted!"),
None => v.as_i64().unwrap(),
@@ -1597,6 +1612,8 @@ const RESERVED_FIELDS: &[&str] = &[
"doc",
"aliases",
"default",
"precision",
"scale",
];

// Used to define the ordering and inclusion of fields.
@@ -368,88 +368,112 @@ const OTHER_ATTRIBUTES_EXAMPLES: &[(&str, bool)] = &[
];

const DECIMAL_LOGICAL_TYPE: &[(&str, bool)] = &[
/*
(
r#"{
"type": "fixed",
"type": {
"type": "fixed",
"name": "TestDecimal",
"size": 10
},
"logicalType": "decimal",
"name": "TestDecimal",
"precision": 4,
"size": 10,
"scale": 2
}"#,
true,
),
(
r#"{
"type": "bytes",
"type": {
"type": "fixed",
"name": "ScaleIsImplicitlyZero",
"size": 10
},
"logicalType": "decimal",
"precision": 4,
"scale": 2
"precision": 4
}"#,
true,
),
(
r#"{
"type": "bytes",
"type": {
"type": "fixed",
"name": "PrecisionMustBeGreaterThanZero",
"size": 10
},
"logicalType": "decimal",
"precision": 2,
"scale": -2
"precision": 0
}"#,
false,
),
(
r#"{
"type": "bytes",
"logicalType": "decimal",
"precision": -2,
"scale": 2
}"#,
"type": "bytes",
"logicalType": "decimal",
"precision": 4,
"scale": 2
}"#,
true,
),
(
r#"{
"type": "bytes",
"logicalType": "decimal",
"precision": 2,
"scale": -2
}"#,
false,
),
(
r#"{
"type": "bytes",
"logicalType": "decimal",
"precision": 2,
"scale": 3
}"#,
"type": "bytes",
"logicalType": "decimal",
"precision": -2,
"scale": 2
}"#,
false,
),
(
r#"{
"type": "fixed",
"logicalType": "decimal",
"name": "TestDecimal",
"precision": -10,
"scale": 2,
"size": 5
}"#,
"type": "bytes",
"logicalType": "decimal",
"precision": 2,
"scale": 3
}"#,
false,
),
(
r#"{
"type": "fixed",
"logicalType": "decimal",
"name": "TestDecimal",
"precision": 2,
"scale": 3,
"size": 2
}"#,
"type": "fixed",
"logicalType": "decimal",
"name": "TestDecimal",
"precision": -10,
"scale": 2,
"size": 5
}"#,
false,
),
(
r#"{
"type": "fixed",
"logicalType": "decimal",
"name": "TestDecimal",
"precision": 2,
"scale": 2,
"size": -2
}"#,
"type": "fixed",
"logicalType": "decimal",
"name": "TestDecimal",
"precision": 2,
"scale": 3,
"size": 2
}"#,
false,
),
(
r#"{
"type": "fixed",
"logicalType": "decimal",
"name": "TestDecimal",
"precision": 2,
"scale": 2,
"size": -2
}"#,
false,
),
*/
];

const DECIMAL_LOGICAL_TYPE_ATTRIBUTES: &[(&str, bool)] = &[

0 comments on commit ce74e77

Please sign in to comment.