Skip to content

Commit

Permalink
Validate enum symbols (#181)
Browse files Browse the repository at this point in the history
Ensure that symbol names are valid ([A-Za-z_][A-Za-z0-9_]*) and unique
within an enum.

Fixes #179
  • Loading branch information
flavray committed Feb 7, 2021
1 parent 8e5a8ef commit 941000b
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 2 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ digest = "0.9"
libflate = "1"
num-bigint = "0.2.6"
rand = "0.7.0"
regex = "^1.4"
serde_json = "1.0"
serde = { version = "1.0", features = ["derive"] }
snap = { version = "0.2.3", optional = true }
Expand Down
6 changes: 6 additions & 0 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,12 @@ pub enum Error {
#[error("Unable to parse `symbols` in enum")]
GetEnumSymbols,

#[error("Invalid enum symbol name {0}")]
EnumSymbolName(String),

#[error("Duplicate enum symbol {0}")]
EnumSymbolDuplicate(String),

#[error("No `items` in array")]
GetArrayItemsField,

Expand Down
49 changes: 47 additions & 2 deletions src/schema.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,26 @@
//! Logic for parsing and interacting with schemas in Avro format.
use crate::{error::Error, types, util::MapHelper, AvroResult};
use digest::Digest;
use lazy_static::lazy_static;
use regex::Regex;
use serde::{
ser::{SerializeMap, SerializeSeq},
Deserialize, Serialize, Serializer,
};
use serde_json::{Map, Value};
use std::{borrow::Cow, collections::HashMap, convert::TryInto, fmt, str::FromStr};
use std::{
borrow::Cow,
collections::{HashMap, HashSet},
convert::TryInto,
fmt,
str::FromStr,
};
use strum_macros::{EnumDiscriminants, EnumString};

lazy_static! {
static ref ENUM_SYMBOL_NAME: Regex = Regex::new(r"[A-Za-z_][A-Za-z0-9_]*").unwrap();
}

/// Represents an Avro schema fingerprint
/// More information about Avro schema fingerprints can be found in the
/// [Avro Schema Fingerprint documentation](https://avro.apache.org/docs/current/spec.html#schema_fingerprints)
Expand Down Expand Up @@ -699,7 +711,7 @@ impl Parser {
fn parse_enum(complex: &Map<String, Value>) -> AvroResult<Schema> {
let name = Name::parse(complex)?;

let symbols = complex
let symbols: Vec<String> = complex
.get("symbols")
.and_then(|v| v.as_array())
.ok_or(Error::GetEnumSymbolsField)
Expand All @@ -711,6 +723,21 @@ impl Parser {
.ok_or(Error::GetEnumSymbols)
})?;

let mut existing_symbols: HashSet<&String> = HashSet::with_capacity(symbols.len());
for symbol in symbols.iter() {
// Ensure enum symbol names match [A-Za-z_][A-Za-z0-9_]*
if !ENUM_SYMBOL_NAME.is_match(symbol) {
return Err(Error::EnumSymbolName(symbol.to_string()));
}

// Ensure there are no duplicate symbols
if existing_symbols.contains(&symbol) {
return Err(Error::EnumSymbolDuplicate(symbol.to_string()));
}

existing_symbols.insert(symbol);
}

Ok(Schema::Enum {
name,
doc: complex.doc(),
Expand Down Expand Up @@ -1169,6 +1196,24 @@ mod tests {
assert_eq!(expected, schema);
}

#[test]
fn test_enum_schema_duplicate() {
// Duplicate "diamonds"
let schema = Schema::parse_str(
r#"{"type": "enum", "name": "Suit", "symbols": ["diamonds", "spades", "clubs", "diamonds"]}"#,
);
assert!(schema.is_err());
}

#[test]
fn test_enum_schema_name() {
// Invalid name "0000" does not match [A-Za-z_][A-Za-z0-9_]*
let schema = Schema::parse_str(
r#"{"type": "enum", "name": "Enum", "symbols": ["0000", "variant"]}"#,
);
assert!(schema.is_err());
}

#[test]
fn test_fixed_schema() {
let schema = Schema::parse_str(r#"{"type": "fixed", "name": "test", "size": 16}"#).unwrap();
Expand Down

0 comments on commit 941000b

Please sign in to comment.