Skip to content

Commit

Permalink
Fix initiatedContent="yes" with zero-length initiator
Browse files Browse the repository at this point in the history
A group with initiatedContent="yes" but a zero-length initiator should
be a schema definition error.  Fix the bug and add four TDML tests to
DelimiterProperties.tdml to check that it is fixed.

Refactor two methods and one field to make their purpose clearer:
- ElementBase: initTermTestExpression renamed to hasNonEmptyDelimiter
- ConstantExpression: value pulled into CompiledExpression
- ConstantExpression: isKnownNonEmpty pulled into CompiledExpression
- RuntimeExpressionDPath: isKnownNonEmpty pulled into CompiledExpression
- CompiledExpression: isKnownNonEmpty renamed to isConstantEmptyString
- CompiledExpression: valueForDebugPrinting renamed to value

Define three new methods to implement the check:
- InitiatedTerminatedMixin: hasNonZeroLengthInitiator
- InitiatedTerminatedMixin: mustMatchNonZeroData
- CompiledExpression: isKnownCanMatchEmptyString

Pass e.mustMatchNonZeroData to DelimiterTextParse constructor.

Add compile-time and run-time checks to the right places:
- ModelGroup: initiatedContentCheck
- DelimiterTextParse: parse

Fix a few misspellings, update comments, etc.

DAFFODIL-2199
  • Loading branch information
tuxji authored and mbeckerle committed Jun 11, 2020
1 parent d297545 commit 03a2a5b
Show file tree
Hide file tree
Showing 12 changed files with 174 additions and 53 deletions.
Expand Up @@ -686,8 +686,8 @@ trait ElementBase
private def NVDP = NilValueDelimiterPolicy
private def EVDP = EmptyValueDelimiterPolicy

protected final lazy val hasNilValueInitiator = initTermTestExpression(initiatorParseEv, nilValueDelimiterPolicy, NVDP.Both, NVDP.Initiator)
protected final lazy val hasNilValueTerminator = initTermTestExpression(terminatorParseEv, nilValueDelimiterPolicy, NVDP.Both, NVDP.Terminator)
protected final lazy val hasNilValueInitiator = hasNonEmptyDelimiter(initiatorParseEv, nilValueDelimiterPolicy, NVDP.Both, NVDP.Initiator)
protected final lazy val hasNilValueTerminator = hasNonEmptyDelimiter(terminatorParseEv, nilValueDelimiterPolicy, NVDP.Both, NVDP.Terminator)

/**
* We need the nil values in raw form for diagnostic messages.
Expand Down Expand Up @@ -729,19 +729,19 @@ trait ElementBase
// cause a nil value to be created.
(isDefinedNilValue && (isSimpleType && (simpleType.primType =:= PrimType.String || simpleType.primType =:= PrimType.HexBinary) && !hasESNilValue)))

final lazy val hasEmptyValueInitiator = initTermTestExpression(initiatorParseEv, emptyValueDelimiterPolicy, EVDP.Both, EVDP.Initiator)
final lazy val hasEmptyValueTerminator = initTermTestExpression(terminatorParseEv, emptyValueDelimiterPolicy, EVDP.Both, EVDP.Terminator)
final lazy val hasEmptyValueInitiator = hasNonEmptyDelimiter(initiatorParseEv, emptyValueDelimiterPolicy, EVDP.Both, EVDP.Initiator)
final lazy val hasEmptyValueTerminator = hasNonEmptyDelimiter(terminatorParseEv, emptyValueDelimiterPolicy, EVDP.Both, EVDP.Terminator)

// See how this function takes the prop: => Any that is pass by name (aka lazy pass).
// That allows us to not require the property to exist at all if
// expr.isKnownNotEmpty turns out to be false.
private def initTermTestExpression(expr: DelimiterParseEv, prop: => Any, true1: Any, true2: Any): Boolean = {
// expr.isConstantEmptyString turns out to be true.
private def hasNonEmptyDelimiter(expr: DelimiterParseEv, prop: => Any, true1: Any, true2: Any): Boolean = {
// changed from a match on a 2-tuple to if-then-else logic because we don't even want to ask for
// prop's value at all unless the first test is true.
if (expr.isKnownNonEmpty)
if (prop == true1 || prop == true2) true
else false
else false
// prop's value at all unless the first test is false.
if (expr.isConstantEmptyString)
false
else
prop == true1 || prop == true2
}

/**
Expand Down
Expand Up @@ -44,14 +44,30 @@ trait InitiatedTerminatedMixin
* True if the term has an initiator expressed on it.
*
* Do not confuse with the concept of the delimiter being able to match or not match zero-length data.
* Whether the representation of a term in the data stream "has an initiator", as in the initator
* Whether the representation of a term in the data stream "has an initiator", as in the initiator
* occupies a non-zero number of bits in the data stream, is an entirely different question.
*/
lazy val hasInitiator = {
val hasOne = initiatorExpr.isKnownNonEmpty
val hasOne = !initiatorExpr.isConstantEmptyString
hasOne
}

/**
* True if the term's initiator cannot match zero-length data. This answers the entirely different
* question of whether the initiator occupies a non-zero number of bits in the data stream.
*/
lazy val hasNonZeroLengthInitiator = {
val hasOne = !initiatorExpr.isKnownCanMatchEmptyString
hasOne
}

/**
* True if the term is inside an immediately enclosing model group which has the initiatedContent
* property set to "yes". This tells us whether we need to verify that a runtime expression defining
* the initiator matches a non-zero number of bits in the data stream.
*/
lazy val mustMatchNonZeroData = parentSaysInitiatedContent

/**
* True if the term has a terminator expressed on it.
*
Expand All @@ -60,8 +76,8 @@ trait InitiatedTerminatedMixin
* occupies a non-zero number of bits, is an entirely different question.
*/
lazy val hasTerminator = {
val res = terminatorExpr.isKnownNonEmpty
res
val hasOne = !terminatorExpr.isConstantEmptyString
hasOne
}

private lazy val isInitiatedContentChoice: Boolean = {
Expand Down
Expand Up @@ -354,8 +354,13 @@ abstract class ModelGroup(index: Int)

lazy val initiatedContentCheck: Unit = {
if (initiatedContent eq YesNo.Yes) {
groupMembers.foreach { term =>
term.schemaDefinitionUnless(term.hasInitiator, "Enclosing group has initiatedContent='yes', but initiator is not defined.")
groupMembers.foreach {
term => {
term.schemaDefinitionUnless(term.hasInitiator,
"Enclosing group has initiatedContent='yes', but initiator is not defined.")
term.schemaDefinitionUnless(term.hasNonZeroLengthInitiator,
"Enclosing group has initiatedContent='yes', but initiator can match zero-length data.")
}
}
}
}
Expand Down
Expand Up @@ -181,7 +181,7 @@ trait SequenceGrammarMixin
* Whether the representation of a term in the data stream "has a separator", as in a specific separator
* occupies a non-zero number of bits, is an entirely different question.
*/
lazy val hasSeparator = separatorParseEv.isKnownNonEmpty
lazy val hasSeparator = !separatorParseEv.isConstantEmptyString

lazy val sequenceSeparator = prod("separator", hasSeparator) {
delimMTA ~ SequenceSeparator(this)
Expand Down
Expand Up @@ -30,37 +30,37 @@ import org.apache.daffodil.util.Misc
import org.apache.daffodil.xml.XMLUtils

case class DelimiterStackCombinatorSequence(sq: SequenceTermBase, body: Gram) extends Terminal(sq, !body.isEmpty) {
lazy val pInit = if (sq.initiatorParseEv.isKnownNonEmpty) One(sq.initiatorParseEv) else Nope
lazy val pSep = if (sq.hasSeparator && sq.separatorParseEv.isKnownNonEmpty) One(sq.separatorParseEv) else Nope
lazy val pTerm = if (sq.terminatorParseEv.isKnownNonEmpty) One(sq.terminatorParseEv) else Nope
lazy val pInit = if (sq.initiatorParseEv.isConstantEmptyString) Nope else One(sq.initiatorParseEv)
lazy val pSep = if (sq.hasSeparator && !sq.separatorParseEv.isConstantEmptyString) One(sq.separatorParseEv) else Nope
lazy val pTerm = if (sq.terminatorParseEv.isConstantEmptyString) Nope else One(sq.terminatorParseEv)

lazy val uInit = if (sq.initiatorParseEv.isKnownNonEmpty) One(sq.initiatorUnparseEv) else Nope
lazy val uSep = if (sq.hasSeparator && sq.separatorParseEv.isKnownNonEmpty) One(sq.separatorUnparseEv) else Nope
lazy val uTerm = if (sq.terminatorParseEv.isKnownNonEmpty) One(sq.terminatorUnparseEv) else Nope
lazy val uInit = if (sq.initiatorParseEv.isConstantEmptyString) Nope else One(sq.initiatorUnparseEv)
lazy val uSep = if (sq.hasSeparator && !sq.separatorParseEv.isConstantEmptyString) One(sq.separatorUnparseEv) else Nope
lazy val uTerm = if (sq.terminatorParseEv.isConstantEmptyString) Nope else One(sq.terminatorUnparseEv)

lazy val parser: DaffodilParser = new DelimiterStackParser((pInit.toList ++ pSep.toList ++ pTerm.toList).toArray, sq.runtimeData, body.parser)

override lazy val unparser: DaffodilUnparser = new DelimiterStackUnparser(uInit, uSep, uTerm, sq.termRuntimeData, body.unparser)
}

case class DelimiterStackCombinatorChoice(ch: ChoiceTermBase, body: Gram) extends Terminal(ch, !body.isEmpty) {
lazy val pInit = if (ch.initiatorParseEv.isKnownNonEmpty) One(ch.initiatorParseEv) else Nope
lazy val pTerm = if (ch.terminatorParseEv.isKnownNonEmpty) One(ch.terminatorParseEv) else Nope
lazy val pInit = if (ch.initiatorParseEv.isConstantEmptyString) Nope else One(ch.initiatorParseEv)
lazy val pTerm = if (ch.terminatorParseEv.isConstantEmptyString) Nope else One(ch.terminatorParseEv)

lazy val uInit = if (ch.initiatorParseEv.isKnownNonEmpty) One(ch.initiatorUnparseEv) else Nope
lazy val uTerm = if (ch.terminatorParseEv.isKnownNonEmpty) One(ch.terminatorUnparseEv) else Nope
lazy val uInit = if (ch.initiatorParseEv.isConstantEmptyString) Nope else One(ch.initiatorUnparseEv)
lazy val uTerm = if (ch.terminatorParseEv.isConstantEmptyString) Nope else One(ch.terminatorUnparseEv)

lazy val parser: DaffodilParser = new DelimiterStackParser((pInit.toList ++ pTerm.toList).toArray, ch.runtimeData, body.parser)

override lazy val unparser: DaffodilUnparser = new DelimiterStackUnparser(uInit, None, uTerm, ch.termRuntimeData, body.unparser)
}

case class DelimiterStackCombinatorElement(e: ElementBase, body: Gram) extends Terminal(e, !body.isEmpty) {
lazy val pInit = if (e.initiatorParseEv.isKnownNonEmpty) One(e.initiatorParseEv) else Nope
lazy val pTerm = if (e.terminatorParseEv.isKnownNonEmpty) One(e.terminatorParseEv) else Nope
lazy val pInit = if (e.initiatorParseEv.isConstantEmptyString) Nope else One(e.initiatorParseEv)
lazy val pTerm = if (e.terminatorParseEv.isConstantEmptyString) Nope else One(e.terminatorParseEv)

lazy val uInit = if (e.initiatorParseEv.isKnownNonEmpty) One(e.initiatorUnparseEv) else Nope
lazy val uTerm = if (e.terminatorParseEv.isKnownNonEmpty) One(e.terminatorUnparseEv) else Nope
lazy val uInit = if (e.initiatorParseEv.isConstantEmptyString) Nope else One(e.initiatorUnparseEv)
lazy val uTerm = if (e.terminatorParseEv.isConstantEmptyString) Nope else One(e.terminatorUnparseEv)

lazy val delims = (pInit.toList ++ pTerm.toList)

Expand Down
Expand Up @@ -60,7 +60,7 @@ abstract class DelimiterText(e: Term, eb: Term, delimiterType: DelimiterTextType
case _ => false
}

override lazy val parser: DaffodilParser = new DelimiterTextParser(e.termRuntimeData, textParser, delimiterType, isDelimited)
override lazy val parser: DaffodilParser = new DelimiterTextParser(e.termRuntimeData, textParser, delimiterType, isDelimited, e.mustMatchNonZeroData)
override lazy val unparser: DaffodilUnparser = new DelimiterTextUnparser(e.termRuntimeData, delimiterType)
}

Expand Down
Expand Up @@ -80,8 +80,8 @@ final class RuntimeExpressionDPath[T <: AnyRef](qn: NamedQName, tt: NodeInfo.Kin

override def targetType = tt

// TODO: fix this check below. There is a unierse of target types which is
// muuch smaller than the set of all types, so some check is useful to be sure
// TODO: fix this check below. There is a universe of target types which is
// much smaller than the set of all types, so some check is useful to be sure
// we stay within the subset of types that are actually used as target types.
// Assert.usage(targetType == NodeInfo.AnyType // used by debugger eval stmt
// || targetType == NodeInfo.NonEmptyString // string-valued properties
Expand All @@ -93,8 +93,6 @@ final class RuntimeExpressionDPath[T <: AnyRef](qn: NamedQName, tt: NodeInfo.Kin

override lazy val prettyExpr = dpathText

def isKnownNonEmpty = true // expressions are not allowed to return empty string

private def UE(e: Throwable, maybeCL: Maybe[DataLocation]) =
throw new UnparseError(One(ci.schemaFileLocation), maybeCL, e)

Expand Down
Expand Up @@ -68,10 +68,10 @@ trait ContentValueReferencedElementInfoMixin {
*/
abstract class CompiledExpression[+T <: AnyRef](
val qName: NamedQName,
valueForDebugPrinting: AnyRef)
value: AnyRef)
extends ContentValueReferencedElementInfoMixin with Serializable {

DataValue.assertValueIsNotDataValue(valueForDebugPrinting)
DataValue.assertValueIsNotDataValue(value)

final def toBriefXML(depth: Int = -1) = {
"'" + prettyExpr + "'"
Expand All @@ -83,17 +83,19 @@ abstract class CompiledExpression[+T <: AnyRef](
* particularly for `Array[Byte]`. It prints a useless thing like "[@0909280".
* Use of `stringOf` prints "Array(....)".
*/
lazy val prettyExpr = stringOf(valueForDebugPrinting)
lazy val prettyExpr = stringOf(value)

/**
* tells us if the property is non-empty. This is true if it is a constant non-empty expression
* (that is, is not ""), but it is also true if it is evaluated as a runtime expression that it is
* not allowed to return "".
*
* Issue: are there properties which are string-valued, and where "" can in fact be returned at run time?
* Assumed no. This was clarified in an errata to the DFDL spec.
* Tells us if the expression is the constant empty string (that is, it is "").
*/
def isKnownNonEmpty: Boolean
final lazy val isConstantEmptyString = value == ""

/**
* Tells us if the expression can match the empty string. We know it can if the expression
* is a DFDL entity like %ES; or %WSP*. We do not know whether it can if it is a more
* complicated constant or runtime expression.
*/
final lazy val isKnownCanMatchEmptyString = value == "%ES;" || value == "%WSP*;"

/**
* used to obtain a constant value.
Expand Down Expand Up @@ -124,7 +126,7 @@ abstract class CompiledExpression[+T <: AnyRef](
*/
def evaluateForwardReferencing(state: ParseOrUnparseState, whereBlockedLocation: Suspension): Maybe[T]

override def toString(): String = "CompiledExpression(" + valueForDebugPrinting.toString + ")"
override def toString(): String = "CompiledExpression(" + value.toString + ")"

}

Expand All @@ -143,8 +145,6 @@ final case class ConstantExpression[+T <: AnyRef](

lazy val sourceType: NodeInfo.Kind = NodeInfo.fromObject(value)

def isKnownNonEmpty = value != ""

override def evaluate(state: ParseOrUnparseState) = value

def evaluate(dstate: DState, state: ParseOrUnparseState) = {
Expand Down
Expand Up @@ -33,7 +33,7 @@ import org.apache.daffodil.processors.parsers.PState
trait DelimiterEvMixin[+T <: AnyRef]
extends ExprEvalMixin[String] { self: Evaluatable[T] =>

final def isKnownNonEmpty = expr.isKnownNonEmpty
final def isConstantEmptyString = expr.isConstantEmptyString

def expr: CompiledExpression[String]
def converter: Converter[String, List[String]]
Expand Down
Expand Up @@ -42,7 +42,8 @@ class DelimiterTextParser(
rd: TermRuntimeData,
textParser: TextParser,
delimiterType: DelimiterTextType.Type,
isDelimited: Boolean)
isDelimited: Boolean,
mustMatchNonZeroData: Boolean)
extends TextPrimParser {

override lazy val runtimeDependencies = rd.encodingInfo.runtimeDependencies
Expand Down Expand Up @@ -95,8 +96,12 @@ class DelimiterTextParser(
return
}

// Consume the found local delimiter
// Consume the found local delimiter but also check if it was supposed to match
// a non-zero number of bits and throw a runtime SDE if necessary
val nChars = foundDelimiter.get.matchedDelimiterValue.get.length
if (mustMatchNonZeroData && nChars == 0) {
start.SDE("The initiator must match non-zero length data when dfdl:initiatedContent is 'yes'.")
}
val wasDelimiterTextSkipped = start.dataInputStream.skipChars(nChars, start)
Assert.invariant(wasDelimiterTextSkipped)
start.clearDelimitedParseResult()
Expand Down
Expand Up @@ -908,4 +908,96 @@
</tdml:errors>
</tdml:parserTestCase>

<tdml:defineSchema name="emptyInitiator">
<xs:include schemaLocation="org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd"/>
<dfdl:format ref="ex:GeneralFormat" />

<xs:element name="zeroLengthString">
<xs:complexType>
<xs:sequence dfdl:initiatedContent="yes">
<xs:element name="s1" type="xs:string" dfdl:lengthKind="delimited" dfdl:initiator="" />
</xs:sequence>
</xs:complexType>
</xs:element>

<xs:element name="emptyEntity">
<xs:complexType>
<xs:sequence dfdl:initiatedContent="yes">
<xs:element name="s1" type="xs:string" dfdl:lengthKind="delimited" dfdl:initiator="%ES;" />
</xs:sequence>
</xs:complexType>
</xs:element>

<xs:element name="emptyOrWhitespaceEntity">
<xs:complexType>
<xs:sequence dfdl:initiatedContent="yes">
<xs:element name="s1" type="xs:string" dfdl:lengthKind="delimited" dfdl:initiator="%WSP*;" />
</xs:sequence>
</xs:complexType>
</xs:element>

<xs:element name="runtimeEvaluatedInitiator">
<xs:complexType>
<xs:sequence dfdl:initiatedContent="yes">
<xs:element name="s1" type="xs:string" dfdl:lengthKind="delimited" dfdl:initiator="{ if (fn:nilled(.)) then '%ES;' else '%WSP*;' }" />
</xs:sequence>
</xs:complexType>
</xs:element>
</tdml:defineSchema>

<tdml:parserTestCase name="emptyInitiator1"
model="emptyInitiator"
description="An initiator that is '' causes an error"
root="zeroLengthString">
<tdml:document><![CDATA[foo]]></tdml:document>
<tdml:errors>
<tdml:error>Schema Definition Error</tdml:error>
<tdml:error>initiatedContent</tdml:error>
<tdml:error>yes</tdml:error>
<tdml:error>initiator</tdml:error>
<tdml:error>not defined</tdml:error>
</tdml:errors>
</tdml:parserTestCase>

<tdml:parserTestCase name="emptyInitiator2"
model="emptyInitiator"
description="An initiator that is '%ES;' causes an error"
root="emptyEntity">
<tdml:document><![CDATA[foo]]></tdml:document>
<tdml:errors>
<tdml:error>Schema Definition Error</tdml:error>
<tdml:error>initiatedContent</tdml:error>
<tdml:error>yes</tdml:error>
<tdml:error>initiator</tdml:error>
<tdml:error>zero</tdml:error>
</tdml:errors>
</tdml:parserTestCase>

<tdml:parserTestCase name="emptyInitiator3"
model="emptyInitiator"
description="An initiator that is '%WSP*;' causes an error"
root="emptyOrWhitespaceEntity">
<tdml:document><![CDATA[foo]]></tdml:document>
<tdml:errors>
<tdml:error>Schema Definition Error</tdml:error>
<tdml:error>initiatedContent</tdml:error>
<tdml:error>yes</tdml:error>
<tdml:error>initiator</tdml:error>
<tdml:error>zero</tdml:error>
</tdml:errors>
</tdml:parserTestCase>

<tdml:parserTestCase name="emptyInitiator4"
model="emptyInitiator"
description="A runtime-evaluated initiator that matches zero-length data causes an error"
root="runtimeEvaluatedInitiator">
<tdml:document><![CDATA[foo]]></tdml:document>
<tdml:errors>
<tdml:error>Schema Definition Error</tdml:error>
<tdml:error>initiatedContent</tdml:error>
<tdml:error>yes</tdml:error>
<tdml:error>initiator</tdml:error>
<tdml:error>zero</tdml:error>
</tdml:errors>
</tdml:parserTestCase>
</tdml:testSuite>
Expand Up @@ -79,4 +79,9 @@ class TestDelimiterProperties {
@Test def test_percentTerminator() = { runner_02.runOneTest("percentTerminator") }
@Test def test_percentTerminator2() = { runner_02.runOneTest("percentTerminator2") }
@Test def test_percentExpression() = { runner_02.runOneTest("percentExpression") }

@Test def test_emptyInitiator1() = { runner_02.runOneTest("emptyInitiator1") }
@Test def test_emptyInitiator2() = { runner_02.runOneTest("emptyInitiator2") }
@Test def test_emptyInitiator3() = { runner_02.runOneTest("emptyInitiator3") }
@Test def test_emptyInitiator4() = { runner_02.runOneTest("emptyInitiator4") }
}

0 comments on commit 03a2a5b

Please sign in to comment.