Skip to content
Permalink
Browse files
Add blob support
- Adds support for xs:anyURI primitive type. Elements with this type
  must specifiy the new property dfdl:objectKind, which can be
  "bytes" or "characters". Support for "characters" is not added in this
  patch.
- On parse elements with objectKind="bytes" will have the data read
  and written out to a file with a randomly generated name, with a URI
  to that file being put in the infoset. On unparse, the data is read
  from the URI and written to the data stream. Behavior is very similar
  to xs:hexBinary.
- BLOB Objects must have lengths set with dfdl:lengthKind="explicit".
- New API functions are added to set where to write blob files to and to
  a get list of all blob files that were written.
- TDML Runner is updated to be aware of xsi:type="xs:anyURI" and compare
  the contents of the blob files. It will also delete blob files after a
  test passes.
- The CLI will write blob files to the $PWD/daffodil-blobs/ directory.

DAFFODIL-1735
  • Loading branch information
stevedlawrence committed Sep 5, 2019
1 parent ebee69f commit 9de0d1a3c3d294600d5c769243efe4f67b03bac1
Showing 42 changed files with 1,176 additions and 75 deletions.
@@ -34,7 +34,7 @@ jobs:
############################################################

- name: Checkout Repository
uses: actions/checkout@master
uses: actions/checkout@v1.0.0

- name: Install Dependencies (Linux)
run: |
@@ -28,6 +28,7 @@ import java.io.FileInputStream
import java.io.ByteArrayInputStream
import java.io.File
import java.nio.channels.Channels
import java.nio.file.Paths
import java.net.URI
import java.util.Scanner

@@ -720,15 +721,21 @@ object Main extends Logging {
pf
}

// write blobs to $PWD/daffodil-blobs/*.bin
val blobDir = Paths.get(System.getProperty("user.dir"), "daffodil-blobs")
val blobSuffix = ".bin"

def getInfosetOutputter(infosetType: String, os: java.io.OutputStream): InfosetOutputter = {
infosetType match {
val outputter = infosetType match {
case "xml" => new XMLTextInfosetOutputter(os, true)
case "scala-xml" => new ScalaXMLInfosetOutputter()
case "json" => new JsonInfosetOutputter(os, true)
case "jdom" => new JDOMInfosetOutputter()
case "w3cdom" => new W3CDOMInfosetOutputter()
case "null" => new NullInfosetOutputter()
}
outputter.setBlobAttributes(blobDir, null, blobSuffix)
outputter
}

// Converts the data to whatever form the InfosetInputter will want. Note
@@ -643,14 +643,18 @@ trait ElementBase
primType match {
case PrimType.HexBinary => Representation.Binary
case PrimType.String => Representation.Text
case PrimType.AnyURI => objectKind match {
case ObjectKindType.Bytes => Representation.Binary
case ObjectKindType.Chars => Representation.Text
}
case _ => representation
}
} else {
representation
}
rep match {
case Representation.Binary =>
if (isComplexType || primType != PrimType.HexBinary) byteOrderEv // ensure defined
if (isComplexType || (primType != PrimType.HexBinary && primType != PrimType.AnyURI)) byteOrderEv // ensure defined
case _ =>
charsetEv // ensure defined
}
@@ -680,13 +684,15 @@ trait ElementBase

private def getImplicitAlignmentInBits(thePrimType: PrimType, theRepresentation: Representation): Int = {
(theRepresentation, thePrimType) match {
case (Representation.Text, PrimType.AnyURI) => this.subsetError("Property value objectKind='chars' is not supported.")
case (Representation.Text, PrimType.HexBinary) => Assert.impossible("type xs:hexBinary with representation='text'")
case (Representation.Text, _) => knownEncodingAlignmentInBits
case (Representation.Binary, PrimType.String) => Assert.impossible("type xs:string with representation='binary'")
// Boolean, Float, Double, and HexBinary do not require binaryNumberRep to be defined
case (Representation.Binary, PrimType.Float | PrimType.Boolean) => 32
case (Representation.Binary, PrimType.Double) => 64
case (Representation.Binary, PrimType.HexBinary) => 8
case (Representation.Binary, PrimType.AnyURI) => 8
// Handle 64 bit types
case (Representation.Binary, PrimType.Long | PrimType.UnsignedLong) =>
binaryNumberRep match {
@@ -335,11 +335,11 @@ sealed trait TypeChecks { self: Restriction =>
}
}

// Don't need to range check String or HexBinary
// Don't need to range check String or HexBinary or blobs.
// no point attempting a conversion to BigDecimal so
// return early here.
primType match {
case PrimType.String | PrimType.HexBinary => return (true, None)
case PrimType.String | PrimType.HexBinary | PrimType.AnyURI => return (true, None)
case _ => /* Continue on below */
}

@@ -373,6 +373,7 @@ sealed trait TypeChecks { self: Restriction =>
case PrimType.Decimal => true // Unbounded Decimal
case PrimType.HexBinary => Assert.impossibleCase // Handled earlier, shouldn't get here
case PrimType.String => Assert.impossibleCase // Handled earlier, shouldn't get here
case PrimType.AnyURI => Assert.impossibleCase // Handled earlier, shouldn't get here
case PrimType.NonNegativeInteger => isInNonNegativeIntegerRange(theValue)
}
val isValueWhole = {
@@ -256,16 +256,16 @@ trait ElementRuntimeValuedPropertiesMixin
}

final lazy val maybeByteOrderEv = {
if (isSimpleType && primType == PrimType.HexBinary) {
// xs:hexBinary types should ignore the byteOrder property and
if (isSimpleType && (primType == PrimType.HexBinary || primType == PrimType.AnyURI)) {
// xs:hexBinary and xs:anyURI types should ignore the byteOrder property and
// effectively always use a bigEndian byteOrder. One way to accomplish
// this would be to modify the byteOrderExpr to return a constant value
// of "bigEndian" for hexBinary types. The problem with this is that
// Daffodil does not allow byteOrder="bigEndian" with
// bitOrder="leastSignificantBitFirst", which is what would happen when
// parsing hexBinary data with LSBF bitOrder, resulting in an SDE. So
// instead, we just set this to Nope, and ensure we never try to get the
// byteOrder property when the type is xs:hexBinary. The IO layer will
// byteOrder property when the type is xs:hexBinary/anyURI. The IO layer will
// see this Nope and do the right thing.
Nope
} else if (optionByteOrderRaw.isDefined) {
@@ -576,7 +576,7 @@ trait ElementRuntimeValuedPropertiesMixin
}

val byteOrderExprElts =
if (isSimpleType && primType =:= PrimType.HexBinary) {
if (isSimpleType && (primType =:= PrimType.HexBinary || primType =:= PrimType.AnyURI)) {
ReferencedElementInfos.None
} else {
propExprElts(optionByteOrderRaw, byteOrderEv, f)
@@ -170,6 +170,7 @@ object PrimitiveType {
case PrimType.DateTime => DateTime
case PrimType.Date => Date
case PrimType.Time => Time
case PrimType.AnyURI => AnyURI
case _ => Assert.usageError("Not a primitive type node: " + typeNode)
}
}
@@ -193,6 +194,7 @@ object PrimitiveType {
val DateTime = new PrimitiveType(PrimType.DateTime)
val Date = new PrimitiveType(PrimType.Date)
val Time = new PrimitiveType(PrimType.Time)
val AnyURI = new PrimitiveType(PrimType.AnyURI)

}

@@ -50,6 +50,7 @@ import org.apache.daffodil.grammar.primitives.BinaryFloat
import org.apache.daffodil.grammar.primitives.BinaryIntegerKnownLength
import org.apache.daffodil.grammar.primitives.BinaryIntegerPrefixedLength
import org.apache.daffodil.grammar.primitives.BinaryIntegerRuntimeLength
import org.apache.daffodil.grammar.primitives.BlobSpecifiedLength
import org.apache.daffodil.grammar.primitives.CaptureContentLengthEnd
import org.apache.daffodil.grammar.primitives.CaptureContentLengthStart
import org.apache.daffodil.grammar.primitives.CaptureValueLengthEnd
@@ -692,6 +693,25 @@ trait ElementBaseGrammarMixin
}
}

private lazy val specifiedLengthBlob = prod("specifiedLengthBlob") {
lengthUnits match {
case LengthUnits.Bytes => BlobSpecifiedLength(this)
case LengthUnits.Bits => BlobSpecifiedLength(this)
case LengthUnits.Characters => SDE("lengthUnits='characters' is not valid for blob data.")
}
}

private lazy val blobValue = prod("blobValue") {
lengthKind match {
case LengthKind.Explicit => specifiedLengthBlob
case _ => SDE("objectKind='bytes' must have dfdl:lengthKind='explicit'")
}
}

private lazy val clobValue = prod("clobValue") {
notYetImplemented("objectKind='chars'")
}

private lazy val textInt = prod("textInt", impliedRepresentation == Representation.Text) {
standardTextInt || zonedTextInt
}
@@ -1034,6 +1054,13 @@ trait ElementBaseGrammarMixin
primType match {
case PrimType.String => stringValue
case PrimType.HexBinary => hexBinaryValue
case PrimType.AnyURI => {
val res = impliedRepresentation match {
case Representation.Binary => blobValue
case Representation.Text => clobValue
}
res
}
case _ => {
val res = impliedRepresentation match {
case Representation.Binary => binaryValue
@@ -119,6 +119,9 @@ trait PaddingInfoMixin {
case PrimType.HexBinary => {
(MaybeChar.Nope, TextJustificationType.None)
}
case PrimType.AnyURI => {
(MaybeChar.Nope, TextJustificationType.None)
}
}
theJust
}
@@ -30,6 +30,7 @@ import org.apache.daffodil.processors.dfa.TextDelimitedParserWithEscapeBlock
import org.apache.daffodil.processors.dfa.TextPaddingParser
import org.apache.daffodil.processors.parsers.BCDDecimalDelimitedParser
import org.apache.daffodil.processors.parsers.BCDIntegerDelimitedParser
import org.apache.daffodil.processors.parsers.BlobSpecifiedLengthParser
import org.apache.daffodil.processors.parsers.HexBinaryDelimitedParser
import org.apache.daffodil.processors.parsers.HexBinaryEndOfBitLimitParser
import org.apache.daffodil.processors.parsers.HexBinaryLengthPrefixedParser
@@ -44,6 +45,7 @@ import org.apache.daffodil.processors.parsers.StringOfSpecifiedLengthParser
import org.apache.daffodil.processors.parsers.{ Parser => DaffodilParser }
import org.apache.daffodil.processors.unparsers.BCDDecimalDelimitedUnparser
import org.apache.daffodil.processors.unparsers.BCDIntegerDelimitedUnparser
import org.apache.daffodil.processors.unparsers.BlobSpecifiedLengthUnparser
import org.apache.daffodil.processors.unparsers.HexBinaryLengthPrefixedUnparser
import org.apache.daffodil.processors.unparsers.HexBinaryMinLengthInBytesUnparser
import org.apache.daffodil.processors.unparsers.HexBinarySpecifiedLengthUnparser
@@ -71,6 +73,13 @@ case class HexBinarySpecifiedLength(e: ElementBase) extends Terminal(e, true) {
override lazy val unparser: DaffodilUnparser = new HexBinarySpecifiedLengthUnparser(e.elementRuntimeData, e.unparseTargetLengthInBitsEv)
}

case class BlobSpecifiedLength(e: ElementBase) extends Terminal(e, true) {

override lazy val parser = new BlobSpecifiedLengthParser(e.elementRuntimeData, e.elementLengthInBitsEv)

override lazy val unparser = new BlobSpecifiedLengthUnparser(e.elementRuntimeData, e.unparseTargetLengthInBitsEv)
}

case class StringOfSpecifiedLength(e: ElementBase) extends Terminal(e, true) with Padded {

private def erd = e.elementRuntimeData
@@ -286,6 +286,17 @@ trait DataInputStream
*/
def getByteArray(bitLengthFrom1: Int, finfo: FormatInfo): Array[Byte]

/**
* Same as getByteArray(Int, FormatInfo), except it accepts the array as a
* parameter and will fill that array with bytes. This can help to improve
* performance if an array can be reused to avoid allocations. But the caller
* must be careful upon return, since the array will not be cleared and may
* be bigger than the number of bits filled. It is the responsibility of the
* caller to know which bits in the array were set based on the bit length
* and format info.
*/
def getByteArray(bitLengthFrom1: Int, finfo: FormatInfo, array: Array[Byte]): Unit

/**
* Returns a long integer containing the bits between the current bit position
* and that position plus the bitLength.
@@ -154,6 +154,18 @@ final class InputSourceDataInputStream private (val inputSource: InputSource)
array
}

def getByteArray(bitLengthFrom1: Int, finfo: FormatInfo, array: Array[Byte]): Unit = {
// threadCheck()
if (!isDefinedForLength(bitLengthFrom1)) throw DataInputStream.NotEnoughDataException(bitLengthFrom1)

val bytesNeeded = (bitLengthFrom1 + 7) / 8
Assert.usage(array.size >= bytesNeeded)

fillByteArray(array, bitLengthFrom1, finfo)

setBitPos0b(bitPos0b + bitLengthFrom1)
}

/**
* Accepts a preallocated array and a bitLength. Reads the specified number
* of bits and stores them in the array consistent with bitOrder and
@@ -72,6 +72,7 @@ final class StringDataInputStreamForUnparse
override def getUnsignedBigInt(bitLengthFrom1: Int, finfo: FormatInfo): BigInt = doNotUse
override def getUnsignedLong(bitLengthFrom1To64: Int, finfo: FormatInfo): passera.unsigned.ULong = doNotUse
override def getByteArray(bitLengthFrom1: Int, finfo: FormatInfo): Array[Byte] = doNotUse
override def getByteArray(bitLengthFrom1: Int, finfo: FormatInfo, array: Array[Byte]): Unit = doNotUse
override def pastData(nBytesRequested: Int): java.nio.ByteBuffer = doNotUse
override def setBitLimit0b(bitLimit0b: MaybeULong): Boolean = doNotUse
override def setDebugging(setting: Boolean): Unit = doNotUse
@@ -17,20 +17,24 @@

package org.apache.daffodil.util

import java.io.InputStream
import java.io.File
import java.net.URL
import java.net.URI
import java.io.ByteArrayInputStream
import java.nio.channels.ReadableByteChannel
import java.nio.channels.WritableByteChannel
import java.io.ByteArrayOutputStream
import java.io.File
import java.io.InputStream
import java.net.URI
import java.net.URL
import java.net.URLClassLoader
import java.nio.ByteBuffer
import java.nio.CharBuffer
import java.nio.charset.{ Charset => JavaCharset }
import java.nio.channels.ReadableByteChannel
import java.nio.channels.WritableByteChannel
import java.nio.charset.CodingErrorAction
import java.nio.charset.{ Charset => JavaCharset }
import java.nio.file.Files
import java.nio.file.Paths

import scala.collection.JavaConverters._

import org.apache.daffodil.equality._

/**
@@ -121,6 +125,41 @@ object Misc {
}
}

/**
* Search for a resource name, trying a handful of heuristics.
*
* This is useful in cases where it's okay to be a little lax when searching
* for a resource name (e.g. testing). This is often needed in situations
* like directory structures not exactly match paths and we just want to try
* our best to find a file. Heuristics include things like ignoring leading
* directories in a resource, looking for a resource as a file, etc. This
* does mean that it's possible that this could find the wrong file if there
* are ambiguities, so be careful when using.
*
* If ambiguities would be a serious problem, use getResourceOption or
* getResourceRelativeOption.
*/
def searchResourceOption(resName: String, relativeTo: Option[URI]): Option[URI] = {
val resAsURI = new URI(resName)
val resPath =
if (resAsURI.getScheme != null) Paths.get(resAsURI) else Paths.get(resName)
val resolvedURI =
if (Files.exists(resPath)) Some(resPath.toFile().toURI())
else Misc.getResourceRelativeOption(resName, relativeTo)
val res = resolvedURI.orElse {
// try ignoring the directory part
val parts = resName.split("/")
if (parts.length > 1) { // if there is one
val filePart = parts.last
val secondTry = searchResourceOption(filePart, relativeTo) // recursively
secondTry
} else {
None
}
}
res
}

private def tryURL(url: URL): Option[URI] = {
var is: InputStream = null
val res =

0 comments on commit 9de0d1a

Please sign in to comment.