Fix ideographic and phonetic components parsing in PersonName

DICOM standard allows for three representations separated by `=`, and within each representation, for five name components separated by `^`. Previous parser was implemented the other way around - five name components, and within each component three possible representations. http://dicom.nema.org/dicom/2013/output/chtml/part05/sect_H.3.html
exini · Feb 22, 2021 · 1f69fe8 · 1f69fe8
1 parent b2e0abe
commit 1f69fe8
Show file tree

Hide file tree

Showing 3 changed files with 37 additions and 14 deletions.
diff --git a/src/main/scala/com/exini/dicom/data/PersonName.scala b/src/main/scala/com/exini/dicom/data/PersonName.scala
@@ -16,9 +16,7 @@
 
 package com.exini.dicom.data
 
-case class ComponentGroup(alphabetic: String, ideographic: String, phonetic: String) {
-  override def toString: String = s"$alphabetic=$ideographic=$phonetic".replaceAll("=+$", "")
-}
+case class ComponentGroup(alphabetic: String, ideographic: String, phonetic: String)
 
 case class PersonName(
     familyName: ComponentGroup,
@@ -27,7 +25,29 @@ case class PersonName(
     prefix: ComponentGroup,
     suffix: ComponentGroup
 ) {
-  override def toString: String = s"$familyName^$givenName^$middleName^$prefix^$suffix".replaceAll("\\^+$", "")
+  override def toString: String = {
+    val components = List(
+      familyName,
+      givenName,
+      middleName,
+      prefix,
+      suffix
+    )
+    val representations = List(
+      (c: ComponentGroup) => c.alphabetic,
+      (c: ComponentGroup) => c.ideographic,
+      (c: ComponentGroup) => c.phonetic
+    )
+    representations
+      .map((repr) =>
+        components
+          .map(repr)
+          .mkString("^")
+          .replaceAll("\\^+$", "") // Trim trailing ^ separators
+      )
+      .mkString("=")
+      .replaceAll("=+$", "") // Trim trailing = separators
+  }
 }
 
 object PersonName {

diff --git a/src/main/scala/com/exini/dicom/data/Value.scala b/src/main/scala/com/exini/dicom/data/Value.scala
@@ -653,11 +653,14 @@ object Value {
   def parsePersonName(s: String): Option[PersonName] = {
     def ensureLength(ss: Seq[String], n: Int) = ss ++ Seq.fill(math.max(0, n - ss.length))("")
 
-    val comps = ensureLength(s.split("""\^""").toSeq, 5)
-      .map(s => ensureLength(s.split("=").toSeq, 3).map(trim))
-      .map(c => ComponentGroup(c.head, c(1), c(2)))
+    def transpose(matrix: Seq[Seq[String]]): Seq[Seq[String]] =
+      matrix(0).zipWithIndex.map { case (_, i) => matrix.map((col) => col(i)) }
 
-    Option(PersonName(comps.head, comps(1), comps(2), comps(3), comps(4)))
+    val matrix = ensureLength(s.split("=").toSeq, 3)
+      .map(trim)
+      .map(s => ensureLength(s.split("""\^""").toSeq, 5).map(trim))
+    val comps = transpose(matrix).map(c => ComponentGroup(c(0), c(1), c(2)))
+    Option(PersonName(comps(0), comps(1), comps(2), comps(3), comps(4)))
   }
 
   def parseURI(s: String): Option[URI] =

diff --git a/src/test/scala/com/exini/dicom/data/ValueTest.scala b/src/test/scala/com/exini/dicom/data/ValueTest.scala
@@ -472,7 +472,7 @@ class ValueTest extends AnyFlatSpec with Matchers {
   "Parsing a patient name" should "divide into parts and components" in {
     Value(
       ByteString(
-        "aFamily=iFamily=pFamily^aGiven=iGiven=pGiven^aMiddle=iMiddle=pMiddle^aPrefix=iPrefix=pPrefix^aSuffix=iSuffix=pSuffix"
+        "aFamily^aGiven^aMiddle^aPrefix^aSuffix=iFamily^iGiven^iMiddle^iPrefix^iSuffix=pFamily^pGiven^pMiddle^pPrefix^pSuffix"
       )
     ).toPersonNames() shouldBe Seq(
       PersonName(
@@ -486,7 +486,7 @@ class ValueTest extends AnyFlatSpec with Matchers {
   }
 
   it should "handle null components" in {
-    Value(ByteString("=iFamily=pFamily^^aMiddle^aPrefix==pPrefix^==pSuffix"))
+    Value(ByteString("^^aMiddle^aPrefix^=iFamily^^^^=pFamily^^^pPrefix^pSuffix"))
       .toPersonNames() shouldBe Seq(
       PersonName(
         ComponentGroup("", "iFamily", "pFamily"),
@@ -497,7 +497,7 @@ class ValueTest extends AnyFlatSpec with Matchers {
       )
     )
 
-    Value(ByteString("aFamily=iFamily^^aMiddle"))
+    Value(ByteString("aFamily^^aMiddle=iFamily"))
       .toPersonNames() shouldBe Seq(
       PersonName(
         ComponentGroup("aFamily", "iFamily", ""),
@@ -510,7 +510,7 @@ class ValueTest extends AnyFlatSpec with Matchers {
   }
 
   it should "trim whitespace within each component" in {
-    Value(ByteString("   aFamily   =   iFamily   ^^   aMiddle   "))
+    Value(ByteString("   aFamily   ^^    aMiddle    =   iFamily"))
       .toPersonNames() shouldBe Seq(
       PersonName(
         ComponentGroup("aFamily", "iFamily", ""),
@@ -708,7 +708,7 @@ class ValueTest extends AnyFlatSpec with Matchers {
     Value.fromPersonNames(VR.PN, Seq(pn1, pn2)).toPersonNames() shouldBe Seq(pn1, pn2)
 
     Value.fromPersonName(VR.PN, pn1).toString(VR.PN) shouldBe Some(
-      "family=i=p^given=i=p^middle=i=p^prefix=i=p^suffix=i=p"
+      "family^given^middle^prefix^suffix=i^i^i^i^i=p^p^p^p^p"
     )
   }
 
@@ -740,7 +740,7 @@ class ValueTest extends AnyFlatSpec with Matchers {
   }
 
   it should "parse components into alphabetic, ideographic and phonetic elements" in {
-    val pns = parsePN("F-Alphabetic=F-Ideographic=F-Phonetic^Given^==M-Phonetic^P-Alphabetic==P-Phonetic^")
+    val pns = parsePN("F-Alphabetic^Given^^P-Alphabetic^=F-Ideographic^^^^=F-Phonetic^^M-Phonetic^P-Phonetic^")
     pns should have length 1
     pns.head.familyName.alphabetic shouldBe "F-Alphabetic"
     pns.head.familyName.ideographic shouldBe "F-Ideographic"