diff --git a/src/main/scala/com/exini/dicom/streams/ParseFlow.scala b/src/main/scala/com/exini/dicom/streams/ParseFlow.scala index bc9e9fd..6362b61 100644 --- a/src/main/scala/com/exini/dicom/streams/ParseFlow.scala +++ b/src/main/scala/com/exini/dicom/streams/ParseFlow.scala @@ -41,11 +41,13 @@ class ParseFlow private (chunkSize: Int) extends ByteStringParser[DicomPart] { log.warning(s"Element ${tagToString(tag)} has odd length") sealed trait HeaderState { + val maySwitchTs: Boolean val bigEndian: Boolean val explicitVR: Boolean } - case class DatasetHeaderState(itemIndex: Int, bigEndian: Boolean, explicitVR: Boolean) extends HeaderState + case class DatasetHeaderState(itemIndex: Int, maySwitchTs: Boolean, bigEndian: Boolean, explicitVR: Boolean) + extends HeaderState case class FmiHeaderState( tsuid: Option[String], @@ -54,11 +56,15 @@ class ParseFlow private (chunkSize: Int) extends ByteStringParser[DicomPart] { hasFmi: Boolean, pos: Long, fmiEndPos: Option[Long] - ) extends HeaderState + ) extends HeaderState { + override val maySwitchTs: Boolean = false + } case class ValueState(bigEndian: Boolean, bytesLeft: Long, nextStep: ParseStep[DicomPart]) - case class FragmentsState(fragmentIndex: Int, bigEndian: Boolean, explicitVR: Boolean) extends HeaderState + case class FragmentsState(fragmentIndex: Int, bigEndian: Boolean, explicitVR: Boolean) extends HeaderState { + override val maySwitchTs: Boolean = false + } case class DeflatedState(bigEndian: Boolean, nowrap: Boolean) @@ -114,7 +120,7 @@ class ParseFlow private (chunkSize: Int) extends ByteStringParser[DicomPart] { log.warning(s"File meta information uses big-endian encoding") InFmiHeader(FmiHeaderState(None, info.bigEndian, info.explicitVR, info.hasFmi, 0, None)) } else - InDatasetHeader(DatasetHeaderState(0, info.bigEndian, info.explicitVR)) + InDatasetHeader(DatasetHeaderState(0, maySwitchTs = false, info.bigEndian, info.explicitVR)) ParseResult(maybePreamble, nextState) } .getOrElse(throw new DicomStreamException("Not a DICOM stream")) @@ -139,7 +145,7 @@ class ParseFlow private (chunkSize: Int) extends ByteStringParser[DicomPart] { else InDeflatedData(DeflatedState(state.bigEndian, nowrap = true)) else - InDatasetHeader(DatasetHeaderState(0, bigEndian, explicitVR)) + InDatasetHeader(DatasetHeaderState(0, maySwitchTs = true, bigEndian, explicitVR)) } private def hasZLIBHeader(firstTwoBytes: ByteString): Boolean = bytesToUShortBE(firstTwoBytes) == 0x789c @@ -210,45 +216,62 @@ class ParseFlow private (chunkSize: Int) extends ByteStringParser[DicomPart] { } } - case class InDatasetHeader(state: DatasetHeaderState) extends DicomParseStep { - private def readDatasetHeader(reader: ByteReader, state: DatasetHeaderState): Option[DicomPart] = { + case class InDatasetHeader(datasetHeaderState: DatasetHeaderState) extends DicomParseStep { + def maybeSwitchTs(reader: ByteReader, state: DatasetHeaderState): DatasetHeaderState = { + reader.ensure(8) + val data = reader.remainingData.take(8) + val tag = bytesToTag(data, state.bigEndian) + val explicitVR = Try(VR.withValue(bytesToVR(data.drop(4)))).getOrElse(null) + if (isSpecial(tag)) + state.copy(maySwitchTs = false) + else if (state.explicitVR && explicitVR == null) { + log.info("Implicit VR attributes detected in explicit VR dataset") + state.copy(maySwitchTs = false, explicitVR = false) + } else if (!state.explicitVR && explicitVR != null) + state.copy(maySwitchTs = false, explicitVR = true) + else + state.copy(maySwitchTs = false) + } + + private def readDatasetHeader(reader: ByteReader, state: DatasetHeaderState): DicomPart = { val (tag, vr, headerLength, valueLength) = readHeader(reader, state) warnIfOdd(tag, vr, valueLength) if (vr != null) { val bytes = reader.take(headerLength) if (vr == VR.SQ || vr == VR.UN && valueLength == indeterminateLength) - Some(SequencePart(tag, valueLength, state.bigEndian, state.explicitVR, bytes)) + SequencePart(tag, valueLength, state.bigEndian, state.explicitVR, bytes) else if (valueLength == indeterminateLength) - Some(FragmentsPart(tag, valueLength, vr, state.bigEndian, state.explicitVR, bytes)) + FragmentsPart(tag, valueLength, vr, state.bigEndian, state.explicitVR, bytes) else - Some(HeaderPart(tag, vr, valueLength, isFmi = false, state.bigEndian, state.explicitVR, bytes)) + HeaderPart(tag, vr, valueLength, isFmi = false, state.bigEndian, state.explicitVR, bytes) } else tag match { - case 0xfffee000 => Some(ItemPart(state.itemIndex + 1, valueLength, state.bigEndian, reader.take(8))) - case 0xfffee00d => Some(ItemDelimitationPart(state.itemIndex, state.bigEndian, reader.take(8))) - case 0xfffee0dd => Some(SequenceDelimitationPart(state.bigEndian, reader.take(8))) - case _ => Some(UnknownPart(state.bigEndian, reader.take(headerLength))) + case 0xfffee000 => ItemPart(state.itemIndex + 1, valueLength, state.bigEndian, reader.take(8)) + case 0xfffee00d => ItemDelimitationPart(state.itemIndex, state.bigEndian, reader.take(8)) + case 0xfffee0dd => SequenceDelimitationPart(state.bigEndian, reader.take(8)) + case _ => UnknownPart(state.bigEndian, reader.take(headerLength)) } } def parse(reader: ByteReader): ParseResult[DicomPart] = { + val state = + if (datasetHeaderState.maySwitchTs) maybeSwitchTs(reader, datasetHeaderState) else datasetHeaderState val part = readDatasetHeader(reader, state) - val nextState = part - .map { - case HeaderPart(_, _, length, _, bigEndian, _, _) => - if (length > 0) - InValue(ValueState(bigEndian, length, InDatasetHeader(state))) - else - InDatasetHeader(state) - case FragmentsPart(_, _, _, bigEndian, _, _) => - InFragments(FragmentsState(fragmentIndex = 0, bigEndian, state.explicitVR)) - case SequencePart(_, _, _, _, _) => InDatasetHeader(state.copy(itemIndex = 0)) - case ItemPart(index, _, _, _) => InDatasetHeader(state.copy(itemIndex = index)) - case ItemDelimitationPart(index, _, _) => InDatasetHeader(state.copy(itemIndex = index)) - case _ => InDatasetHeader(state) - } - .getOrElse(FinishedParser) - ParseResult(part, nextState, acceptUpstreamFinish = !nextState.isInstanceOf[InValue]) + val nextState = part match { + case HeaderPart(_, _, length, _, bigEndian, _, _) => + if (length > 0) + InValue(ValueState(bigEndian, length, InDatasetHeader(state))) + else + InDatasetHeader(state) + case FragmentsPart(_, _, _, bigEndian, _, _) => + InFragments(FragmentsState(fragmentIndex = 0, bigEndian, state.explicitVR)) + case SequencePart(_, _, _, _, _) => InDatasetHeader(state.copy(itemIndex = 0)) + case ItemPart(index, _, _, _) => InDatasetHeader(state.copy(itemIndex = index, maySwitchTs = true)) + case ItemDelimitationPart(index, _, _) => InDatasetHeader(state.copy(itemIndex = index)) + case SequenceDelimitationPart(_, _) => InDatasetHeader(state.copy(maySwitchTs = true)) + case _ => InDatasetHeader(state) + } + ParseResult(Some(part), nextState, acceptUpstreamFinish = !nextState.isInstanceOf[InValue]) } } @@ -301,7 +324,7 @@ class ParseFlow private (chunkSize: Int) extends ByteStringParser[DicomPart] { log.warning(s"Unexpected fragments delimitation length $valueLength") ParseResult( Some(SequenceDelimitationPart(state.bigEndian, reader.take(headerLength))), - InDatasetHeader(DatasetHeaderState(0, state.bigEndian, state.explicitVR)) + InDatasetHeader(DatasetHeaderState(0, maySwitchTs = false, state.bigEndian, state.explicitVR)) ) case _ => @@ -340,21 +363,10 @@ class ParseFlow private (chunkSize: Int) extends ByteStringParser[DicomPart] { private def readHeader(reader: ByteReader, state: HeaderState): (Int, VR, Int, Long) = { reader.ensure(8) val tagVrBytes = reader.remainingData.take(8) - val (tag, vr, explicitVR) = { - val (tag1, vr1) = tagVr(tagVrBytes, state.bigEndian, state.explicitVR) - if (vr1 == null && !isSpecial(tag1)) { - // cannot parse VR and not item or delimitation, try switching implicit/explicit as last resort - val (tag2, vr2) = tagVr(tagVrBytes, state.bigEndian, !state.explicitVR) - if (vr2 != null && vr2 != VR.UN) - (tag2, vr2, !state.explicitVR) - else - (tag1, vr1, state.explicitVR) - } else - (tag1, vr1, state.explicitVR) - } + val (tag, vr) = tagVr(tagVrBytes, state.bigEndian, state.explicitVR) if (vr == null) (tag, vr, 8, lengthToLong(bytesToInt(tagVrBytes.drop(4), state.bigEndian))) - else if (explicitVR) + else if (state.explicitVR) if (vr.headerLength == 8) (tag, vr, 8, lengthToLong(bytesToUShort(tagVrBytes.drop(6), state.bigEndian))) else { diff --git a/src/test/scala/com/exini/dicom/data/TestData.scala b/src/test/scala/com/exini/dicom/data/TestData.scala index b9011ae..ad6570e 100644 --- a/src/test/scala/com/exini/dicom/data/TestData.scala +++ b/src/test/scala/com/exini/dicom/data/TestData.scala @@ -106,6 +106,8 @@ object TestData { length, bigEndian ) + val cp264Sequence: ByteString = + tagToBytes(Tag.CTDIPhantomTypeCodeSequence) ++ ByteString('U', 'N', 0, 0, 0xff, 0xff, 0xff, 0xff) def waveformSeqStart(bigEndian: Boolean = false, explicitVR: Boolean = true): ByteString = sequence(Tag.WaveformSequence, bigEndian) diff --git a/src/test/scala/com/exini/dicom/streams/ParseFlowTest.scala b/src/test/scala/com/exini/dicom/streams/ParseFlowTest.scala index 7c470f7..77480fe 100644 --- a/src/test/scala/com/exini/dicom/streams/ParseFlowTest.scala +++ b/src/test/scala/com/exini/dicom/streams/ParseFlowTest.scala @@ -579,7 +579,7 @@ class ParseFlowTest .expectDicomComplete() } - it should "parse sequences with VR UN, and where the nested data set(s) have implicit VR, as a block of bytes" in { + it should "parse sequences of determinate length with VR UN with contents in implicit VR as a block of bytes" in { val unSequence = tagToBytes(Tag.CTExposureSequence) ++ ByteString('U', 'N', 0, 0) ++ intToBytes(24) val bytes = personNameJohnDoe() ++ unSequence ++ item(16) ++ studyDate(explicitVR = false) @@ -596,14 +596,13 @@ class ParseFlowTest .expectDicomComplete() } - it should "handle sequences of indefinite length with VR UN and implicit VR" in { + it should "handle sequences of indefinite length with VR UN with contents in implicit VR" in { // see ftp://medical.nema.org/medical/dicom/final/cp246_ft.pdf for motivation - val unSequence = tagToBytes(Tag.CTDIPhantomTypeCodeSequence) ++ ByteString('U', 'N', 0, 0, 0xff, 0xff, 0xff, 0xff) - val bytes = personNameJohnDoe() ++ unSequence ++ item(60) ++ + val bytes = personNameJohnDoe() ++ cp264Sequence ++ item(60) ++ element(Tag.CodeValue, "113691", explicitVR = false) ++ element(Tag.CodingSchemeDesignator, "DCM", explicitVR = false) ++ element(Tag.CodeMeaning, "IEC Body Dosimetry Phantom", explicitVR = false) ++ - sequenceDelimitation() + sequenceDelimitation() ++ pixelData(10) val source = Source .single(bytes) @@ -622,9 +621,163 @@ class ParseFlowTest .expectHeader(Tag.CodeMeaning, VR.LO, 26) .expectValueChunk() .expectSequenceDelimitation() + .expectHeader(Tag.PixelData) + .expectValueChunk(10) .expectDicomComplete() } + it should "handle data ending with a CP-246 sequence" in { + val bytes = personNameJohnDoe() ++ cp264Sequence ++ item() ++ personNameJohnDoe(explicitVR = false) ++ + itemDelimitation() ++ sequenceDelimitation() + + val source = Source + .single(bytes) + .via(ParseFlow()) + + source + .runWith(TestSink.probe[DicomPart]) + .expectHeader(Tag.PatientName) + .expectValueChunk() + .expectSequence(Tag.CTDIPhantomTypeCodeSequence) + .expectItem(1) + .expectHeader(Tag.PatientName) + .expectValueChunk() + .expectItemDelimitation() + .expectSequenceDelimitation() + .expectDicomComplete() + } + + it should "handle a CP-246 sequence followed by a regular sequence" in { + val bytes = personNameJohnDoe() ++ + cp264Sequence ++ item() ++ personNameJohnDoe(explicitVR = + false + ) ++ itemDelimitation() ++ sequenceDelimitation() ++ + sequence(Tag.CollimatorShapeSequence) ++ item() ++ studyDate() ++ itemDelimitation() ++ sequenceDelimitation() + + val source = Source + .single(bytes) + .via(ParseFlow()) + + source + .runWith(TestSink.probe[DicomPart]) + .expectHeader(Tag.PatientName) + .expectValueChunk() + .expectSequence(Tag.CTDIPhantomTypeCodeSequence) + .expectItem(1) + .expectHeader(Tag.PatientName) + .expectValueChunk() + .expectItemDelimitation() + .expectSequenceDelimitation() + .expectSequence(Tag.CollimatorShapeSequence) + .expectItem(1) + .expectHeader(Tag.StudyDate) + .expectValueChunk() + .expectItemDelimitation() + .expectSequenceDelimitation() + .expectDicomComplete() + } + + it should "handle a CP-246 sequence followed by a private attribute" in { + val bytes = personNameJohnDoe() ++ + cp264Sequence ++ item() ++ personNameJohnDoe(explicitVR = false) ++ itemDelimitation() ++ + sequenceDelimitation() ++ + ValueElement(0x00990110, VR.SH, Value.fromString(VR.SH, "Value"), bigEndian = false, explicitVR = true).toBytes + + val source = Source + .single(bytes) + .via(ParseFlow()) + + source + .runWith(TestSink.probe[DicomPart]) + .expectHeader(Tag.PatientName) + .expectValueChunk() + .expectSequence(Tag.CTDIPhantomTypeCodeSequence) + .expectItem(1) + .expectHeader(Tag.PatientName) + .expectValueChunk() + .expectItemDelimitation() + .expectSequenceDelimitation() + .expectHeader(0x00990110) + .expectValueChunk() + .expectDicomComplete() + } + + it should "handle a CP-246 sequence followed by fragments" in { + val bytes = personNameJohnDoe() ++ + cp264Sequence ++ item() ++ personNameJohnDoe(explicitVR = false) ++ itemDelimitation() ++ + sequenceDelimitation() ++ pixeDataFragments() ++ item(4) ++ ByteString(1, 2, 3, 4) + + val source = Source + .single(bytes) + .via(ParseFlow()) + + source + .runWith(TestSink.probe[DicomPart]) + .expectHeader(Tag.PatientName) + .expectValueChunk() + .expectSequence(Tag.CTDIPhantomTypeCodeSequence) + .expectItem(1) + .expectHeader(Tag.PatientName) + .expectValueChunk() + .expectItemDelimitation() + .expectSequenceDelimitation() + .expectFragments() + .expectFragment(1, 4) + .expectValueChunk(4) + .expectDicomComplete() + } + + it should "handle nested CP-246 sequences" in { + val bytes = personNameJohnDoe() ++ + cp264Sequence ++ item() ++ sequence(Tag.DerivationCodeSequence) ++ item() ++ studyDate(explicitVR = false) ++ + itemDelimitation() ++ sequenceDelimitation() ++ itemDelimitation() ++ sequenceDelimitation() + + val source = Source + .single(bytes) + .via(ParseFlow()) + + source + .runWith(TestSink.probe[DicomPart]) + .expectHeader(Tag.PatientName) + .expectValueChunk() + .expectSequence(Tag.CTDIPhantomTypeCodeSequence) + .expectItem(1) + .expectSequence(Tag.DerivationCodeSequence) + .expectItem(1) + .expectHeader(Tag.StudyDate) + .expectValueChunk() + .expectItemDelimitation() + .expectSequenceDelimitation() + .expectItemDelimitation() + .expectSequenceDelimitation() + .expectDicomComplete() + } + + it should "fail parsing if length bytes matches a known VR when checking for transfer syntax switch" in { + // length of implicit attribute will be encoded as 0x4441 (little endian) which reads as VR 'DA' + // this is the smallest length that could lead to such problems + val bytes = personNameJohnDoe() ++ cp264Sequence ++ item() ++ + element(Tag.CodeMeaning, ByteString(new Array[Byte](0x4144)), bigEndian = false, explicitVR = false) ++ + itemDelimitation() ++ sequenceDelimitation() + + val source = Source + .single(bytes) + .via(ParseFlow()) + + source + .runWith(TestSink.probe[DicomPart]) + .expectHeader(Tag.PatientName) + .expectValueChunk() + .expectSequence(Tag.CTDIPhantomTypeCodeSequence) + .expectItem(1) + .expectHeader(Tag.CodeMeaning, VR.DA, 0) + .expectUnknownPart() + .expectUnknownPart() + .expectUnknownPart() + .expectUnknownPart() + // etc for many more unknown parts + } + it should "handle odd-length attributes" in { def element(tag: Int, value: String): ByteString =