diff --git a/idr/.snapshots/TestReferenceTestTreeWithJSONify1 b/idr/.snapshots/TestReferenceTestTreeWithJSONify1
new file mode 100644
index 0000000..22aaf5d
--- /dev/null
+++ b/idr/.snapshots/TestReferenceTestTreeWithJSONify1
@@ -0,0 +1,140 @@
+{
+ "Children": [
+ {
+ "Children": [
+ {
+ "Children": [
+ {
+ "Children": null,
+ "Data": "data 11",
+ "FirstChild": null,
+ "FormatSpecific": null,
+ "LastChild": null,
+ "NextSibling": null,
+ "Parent": "(ElementNode grandChild11)",
+ "PrevSibling": null,
+ "Type": "TextNode"
+ }
+ ],
+ "Data": "grandChild11",
+ "FirstChild": "(TextNode 'data 11')",
+ "FormatSpecific": null,
+ "LastChild": "(TextNode 'data 11')",
+ "NextSibling": "(ElementNode grandChild12)",
+ "Parent": "(ElementNode child1)",
+ "PrevSibling": null,
+ "Type": "ElementNode"
+ },
+ {
+ "Children": [
+ {
+ "Children": null,
+ "Data": "data 12",
+ "FirstChild": null,
+ "FormatSpecific": null,
+ "LastChild": null,
+ "NextSibling": null,
+ "Parent": "(ElementNode grandChild12)",
+ "PrevSibling": null,
+ "Type": "TextNode"
+ }
+ ],
+ "Data": "grandChild12",
+ "FirstChild": "(TextNode 'data 12')",
+ "FormatSpecific": null,
+ "LastChild": "(TextNode 'data 12')",
+ "NextSibling": null,
+ "Parent": "(ElementNode child1)",
+ "PrevSibling": "(ElementNode grandChild11)",
+ "Type": "ElementNode"
+ }
+ ],
+ "Data": "child1",
+ "FirstChild": "(ElementNode grandChild11)",
+ "FormatSpecific": null,
+ "LastChild": "(ElementNode grandChild12)",
+ "NextSibling": "(ElementNode child2)",
+ "Parent": "(DocumentNode)",
+ "PrevSibling": null,
+ "Type": "ElementNode"
+ },
+ {
+ "Children": [
+ {
+ "Children": [
+ {
+ "Children": null,
+ "Data": "data 21",
+ "FirstChild": null,
+ "FormatSpecific": null,
+ "LastChild": null,
+ "NextSibling": null,
+ "Parent": "(ElementNode grandChild21)",
+ "PrevSibling": null,
+ "Type": "TextNode"
+ }
+ ],
+ "Data": "grandChild21",
+ "FirstChild": "(TextNode 'data 21')",
+ "FormatSpecific": null,
+ "LastChild": "(TextNode 'data 21')",
+ "NextSibling": null,
+ "Parent": "(ElementNode child2)",
+ "PrevSibling": null,
+ "Type": "ElementNode"
+ }
+ ],
+ "Data": "child2",
+ "FirstChild": "(ElementNode grandChild21)",
+ "FormatSpecific": null,
+ "LastChild": "(ElementNode grandChild21)",
+ "NextSibling": "(ElementNode child3)",
+ "Parent": "(DocumentNode)",
+ "PrevSibling": "(ElementNode child1)",
+ "Type": "ElementNode"
+ },
+ {
+ "Children": [
+ {
+ "Children": [
+ {
+ "Children": null,
+ "Data": "attr 31",
+ "FirstChild": null,
+ "FormatSpecific": null,
+ "LastChild": null,
+ "NextSibling": null,
+ "Parent": "(AttributeNode grandChild31)",
+ "PrevSibling": null,
+ "Type": "TextNode"
+ }
+ ],
+ "Data": "grandChild31",
+ "FirstChild": "(TextNode 'attr 31')",
+ "FormatSpecific": null,
+ "LastChild": "(TextNode 'attr 31')",
+ "NextSibling": null,
+ "Parent": "(ElementNode child3)",
+ "PrevSibling": null,
+ "Type": "AttributeNode"
+ }
+ ],
+ "Data": "child3",
+ "FirstChild": "(AttributeNode grandChild31)",
+ "FormatSpecific": null,
+ "LastChild": "(AttributeNode grandChild31)",
+ "NextSibling": null,
+ "Parent": "(DocumentNode)",
+ "PrevSibling": "(ElementNode child2)",
+ "Type": "ElementNode"
+ }
+ ],
+ "Data": "root",
+ "FirstChild": "(ElementNode child1)",
+ "FormatSpecific": null,
+ "LastChild": "(ElementNode child3)",
+ "NextSibling": null,
+ "Parent": null,
+ "PrevSibling": null,
+ "Type": "DocumentNode"
+}
diff --git a/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_first_child_but_not_the_last b/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_first_child_but_not_the_last
new file mode 100644
index 0000000..9413b37
--- /dev/null
+++ b/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_first_child_but_not_the_last
@@ -0,0 +1,82 @@
+{
+ "Children": [
+ {
+ "Children": [
+ {
+ "Children": [
+ {
+ "Children": null,
+ "Data": "data 21",
+ "FirstChild": null,
+ "FormatSpecific": null,
+ "LastChild": null,
+ "NextSibling": null,
+ "Parent": "(ElementNode grandChild21)",
+ "PrevSibling": null,
+ "Type": "TextNode"
+ }
+ ],
+ "Data": "grandChild21",
+ "FirstChild": "(TextNode 'data 21')",
+ "FormatSpecific": null,
+ "LastChild": "(TextNode 'data 21')",
+ "NextSibling": null,
+ "Parent": "(ElementNode child2)",
+ "PrevSibling": null,
+ "Type": "ElementNode"
+ }
+ ],
+ "Data": "child2",
+ "FirstChild": "(ElementNode grandChild21)",
+ "FormatSpecific": null,
+ "LastChild": "(ElementNode grandChild21)",
+ "NextSibling": "(ElementNode child3)",
+ "Parent": "(DocumentNode)",
+ "PrevSibling": null,
+ "Type": "ElementNode"
+ },
+ {
+ "Children": [
+ {
+ "Children": [
+ {
+ "Children": null,
+ "Data": "attr 31",
+ "FirstChild": null,
+ "FormatSpecific": null,
+ "LastChild": null,
+ "NextSibling": null,
+ "Parent": "(AttributeNode grandChild31)",
+ "PrevSibling": null,
+ "Type": "TextNode"
+ }
+ ],
+ "Data": "grandChild31",
+ "FirstChild": "(TextNode 'attr 31')",
+ "FormatSpecific": null,
+ "LastChild": "(TextNode 'attr 31')",
+ "NextSibling": null,
+ "Parent": "(ElementNode child3)",
+ "PrevSibling": null,
+ "Type": "AttributeNode"
+ }
+ ],
+ "Data": "child3",
+ "FirstChild": "(AttributeNode grandChild31)",
+ "FormatSpecific": null,
+ "LastChild": "(AttributeNode grandChild31)",
+ "NextSibling": null,
+ "Parent": "(DocumentNode)",
+ "PrevSibling": "(ElementNode child2)",
+ "Type": "ElementNode"
+ }
+ ],
+ "Data": "root",
+ "FirstChild": "(ElementNode child2)",
+ "FormatSpecific": null,
+ "LastChild": "(ElementNode child3)",
+ "NextSibling": null,
+ "Parent": null,
+ "PrevSibling": null,
+ "Type": "DocumentNode"
+}
diff --git a/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_last_child_but_not_the_first b/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_last_child_but_not_the_first
new file mode 100644
index 0000000..b84f0ef
--- /dev/null
+++ b/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_last_child_but_not_the_first
@@ -0,0 +1,105 @@
+{
+ "Children": [
+ {
+ "Children": [
+ {
+ "Children": [
+ {
+ "Children": null,
+ "Data": "data 11",
+ "FirstChild": null,
+ "FormatSpecific": null,
+ "LastChild": null,
+ "NextSibling": null,
+ "Parent": "(ElementNode grandChild11)",
+ "PrevSibling": null,
+ "Type": "TextNode"
+ }
+ ],
+ "Data": "grandChild11",
+ "FirstChild": "(TextNode 'data 11')",
+ "FormatSpecific": null,
+ "LastChild": "(TextNode 'data 11')",
+ "NextSibling": "(ElementNode grandChild12)",
+ "Parent": "(ElementNode child1)",
+ "PrevSibling": null,
+ "Type": "ElementNode"
+ },
+ {
+ "Children": [
+ {
+ "Children": null,
+ "Data": "data 12",
+ "FirstChild": null,
+ "FormatSpecific": null,
+ "LastChild": null,
+ "NextSibling": null,
+ "Parent": "(ElementNode grandChild12)",
+ "PrevSibling": null,
+ "Type": "TextNode"
+ }
+ ],
+ "Data": "grandChild12",
+ "FirstChild": "(TextNode 'data 12')",
+ "FormatSpecific": null,
+ "LastChild": "(TextNode 'data 12')",
+ "NextSibling": null,
+ "Parent": "(ElementNode child1)",
+ "PrevSibling": "(ElementNode grandChild11)",
+ "Type": "ElementNode"
+ }
+ ],
+ "Data": "child1",
+ "FirstChild": "(ElementNode grandChild11)",
+ "FormatSpecific": null,
+ "LastChild": "(ElementNode grandChild12)",
+ "NextSibling": "(ElementNode child2)",
+ "Parent": "(DocumentNode)",
+ "PrevSibling": null,
+ "Type": "ElementNode"
+ },
+ {
+ "Children": [
+ {
+ "Children": [
+ {
+ "Children": null,
+ "Data": "data 21",
+ "FirstChild": null,
+ "FormatSpecific": null,
+ "LastChild": null,
+ "NextSibling": null,
+ "Parent": "(ElementNode grandChild21)",
+ "PrevSibling": null,
+ "Type": "TextNode"
+ }
+ ],
+ "Data": "grandChild21",
+ "FirstChild": "(TextNode 'data 21')",
+ "FormatSpecific": null,
+ "LastChild": "(TextNode 'data 21')",
+ "NextSibling": null,
+ "Parent": "(ElementNode child2)",
+ "PrevSibling": null,
+ "Type": "ElementNode"
+ }
+ ],
+ "Data": "child2",
+ "FirstChild": "(ElementNode grandChild21)",
+ "FormatSpecific": null,
+ "LastChild": "(ElementNode grandChild21)",
+ "NextSibling": null,
+ "Parent": "(DocumentNode)",
+ "PrevSibling": "(ElementNode child1)",
+ "Type": "ElementNode"
+ }
+ ],
+ "Data": "root",
+ "FirstChild": "(ElementNode child1)",
+ "FormatSpecific": null,
+ "LastChild": "(ElementNode child2)",
+ "NextSibling": null,
+ "Parent": null,
+ "PrevSibling": null,
+ "Type": "DocumentNode"
+}
diff --git a/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_middle_child_not_the_first_not_the_last b/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_middle_child_not_the_first_not_the_last
new file mode 100644
index 0000000..906adad
--- /dev/null
+++ b/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_middle_child_not_the_first_not_the_last
@@ -0,0 +1,105 @@
+{
+ "Children": [
+ {
+ "Children": [
+ {
+ "Children": [
+ {
+ "Children": null,
+ "Data": "data 11",
+ "FirstChild": null,
+ "FormatSpecific": null,
+ "LastChild": null,
+ "NextSibling": null,
+ "Parent": "(ElementNode grandChild11)",
+ "PrevSibling": null,
+ "Type": "TextNode"
+ }
+ ],
+ "Data": "grandChild11",
+ "FirstChild": "(TextNode 'data 11')",
+ "FormatSpecific": null,
+ "LastChild": "(TextNode 'data 11')",
+ "NextSibling": "(ElementNode grandChild12)",
+ "Parent": "(ElementNode child1)",
+ "PrevSibling": null,
+ "Type": "ElementNode"
+ },
+ {
+ "Children": [
+ {
+ "Children": null,
+ "Data": "data 12",
+ "FirstChild": null,
+ "FormatSpecific": null,
+ "LastChild": null,
+ "NextSibling": null,
+ "Parent": "(ElementNode grandChild12)",
+ "PrevSibling": null,
+ "Type": "TextNode"
+ }
+ ],
+ "Data": "grandChild12",
+ "FirstChild": "(TextNode 'data 12')",
+ "FormatSpecific": null,
+ "LastChild": "(TextNode 'data 12')",
+ "NextSibling": null,
+ "Parent": "(ElementNode child1)",
+ "PrevSibling": "(ElementNode grandChild11)",
+ "Type": "ElementNode"
+ }
+ ],
+ "Data": "child1",
+ "FirstChild": "(ElementNode grandChild11)",
+ "FormatSpecific": null,
+ "LastChild": "(ElementNode grandChild12)",
+ "NextSibling": "(ElementNode child3)",
+ "Parent": "(DocumentNode)",
+ "PrevSibling": null,
+ "Type": "ElementNode"
+ },
+ {
+ "Children": [
+ {
+ "Children": [
+ {
+ "Children": null,
+ "Data": "attr 31",
+ "FirstChild": null,
+ "FormatSpecific": null,
+ "LastChild": null,
+ "NextSibling": null,
+ "Parent": "(AttributeNode grandChild31)",
+ "PrevSibling": null,
+ "Type": "TextNode"
+ }
+ ],
+ "Data": "grandChild31",
+ "FirstChild": "(TextNode 'attr 31')",
+ "FormatSpecific": null,
+ "LastChild": "(TextNode 'attr 31')",
+ "NextSibling": null,
+ "Parent": "(ElementNode child3)",
+ "PrevSibling": null,
+ "Type": "AttributeNode"
+ }
+ ],
+ "Data": "child3",
+ "FirstChild": "(AttributeNode grandChild31)",
+ "FormatSpecific": null,
+ "LastChild": "(AttributeNode grandChild31)",
+ "NextSibling": null,
+ "Parent": "(DocumentNode)",
+ "PrevSibling": "(ElementNode child1)",
+ "Type": "ElementNode"
+ }
+ ],
+ "Data": "root",
+ "FirstChild": "(ElementNode child1)",
+ "FormatSpecific": null,
+ "LastChild": "(ElementNode child3)",
+ "NextSibling": null,
+ "Parent": null,
+ "PrevSibling": null,
+ "Type": "DocumentNode"
+}
diff --git a/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_only_child b/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_only_child
new file mode 100644
index 0000000..8b39d31
--- /dev/null
+++ b/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_only_child
@@ -0,0 +1,116 @@
+{
+ "Children": [
+ {
+ "Children": [
+ {
+ "Children": [
+ {
+ "Children": null,
+ "Data": "data 11",
+ "FirstChild": null,
+ "FormatSpecific": null,
+ "LastChild": null,
+ "NextSibling": null,
+ "Parent": "(ElementNode grandChild11)",
+ "PrevSibling": null,
+ "Type": "TextNode"
+ }
+ ],
+ "Data": "grandChild11",
+ "FirstChild": "(TextNode 'data 11')",
+ "FormatSpecific": null,
+ "LastChild": "(TextNode 'data 11')",
+ "NextSibling": "(ElementNode grandChild12)",
+ "Parent": "(ElementNode child1)",
+ "PrevSibling": null,
+ "Type": "ElementNode"
+ },
+ {
+ "Children": [
+ {
+ "Children": null,
+ "Data": "data 12",
+ "FirstChild": null,
+ "FormatSpecific": null,
+ "LastChild": null,
+ "NextSibling": null,
+ "Parent": "(ElementNode grandChild12)",
+ "PrevSibling": null,
+ "Type": "TextNode"
+ }
+ ],
+ "Data": "grandChild12",
+ "FirstChild": "(TextNode 'data 12')",
+ "FormatSpecific": null,
+ "LastChild": "(TextNode 'data 12')",
+ "NextSibling": null,
+ "Parent": "(ElementNode child1)",
+ "PrevSibling": "(ElementNode grandChild11)",
+ "Type": "ElementNode"
+ }
+ ],
+ "Data": "child1",
+ "FirstChild": "(ElementNode grandChild11)",
+ "FormatSpecific": null,
+ "LastChild": "(ElementNode grandChild12)",
+ "NextSibling": "(ElementNode child2)",
+ "Parent": "(DocumentNode)",
+ "PrevSibling": null,
+ "Type": "ElementNode"
+ },
+ {
+ "Children": null,
+ "Data": "child2",
+ "FirstChild": null,
+ "FormatSpecific": null,
+ "LastChild": null,
+ "NextSibling": "(ElementNode child3)",
+ "Parent": "(DocumentNode)",
+ "PrevSibling": "(ElementNode child1)",
+ "Type": "ElementNode"
+ },
+ {
+ "Children": [
+ {
+ "Children": [
+ {
+ "Children": null,
+ "Data": "attr 31",
+ "FirstChild": null,
+ "FormatSpecific": null,
+ "LastChild": null,
+ "NextSibling": null,
+ "Parent": "(AttributeNode grandChild31)",
+ "PrevSibling": null,
+ "Type": "TextNode"
+ }
+ ],
+ "Data": "grandChild31",
+ "FirstChild": "(TextNode 'attr 31')",
+ "FormatSpecific": null,
+ "LastChild": "(TextNode 'attr 31')",
+ "NextSibling": null,
+ "Parent": "(ElementNode child3)",
+ "PrevSibling": null,
+ "Type": "AttributeNode"
+ }
+ ],
+ "Data": "child3",
+ "FirstChild": "(AttributeNode grandChild31)",
+ "FormatSpecific": null,
+ "LastChild": "(AttributeNode grandChild31)",
+ "NextSibling": null,
+ "Parent": "(DocumentNode)",
+ "PrevSibling": "(ElementNode child2)",
+ "Type": "ElementNode"
+ }
+ ],
+ "Data": "root",
+ "FirstChild": "(ElementNode child1)",
+ "FormatSpecific": null,
+ "LastChild": "(ElementNode child3)",
+ "NextSibling": null,
+ "Parent": null,
+ "PrevSibling": null,
+ "Type": "DocumentNode"
+}
diff --git a/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_root_does_nothing b/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_root_does_nothing
new file mode 100644
index 0000000..22aaf5d
--- /dev/null
+++ b/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_root_does_nothing
@@ -0,0 +1,140 @@
+{
+ "Children": [
+ {
+ "Children": [
+ {
+ "Children": [
+ {
+ "Children": null,
+ "Data": "data 11",
+ "FirstChild": null,
+ "FormatSpecific": null,
+ "LastChild": null,
+ "NextSibling": null,
+ "Parent": "(ElementNode grandChild11)",
+ "PrevSibling": null,
+ "Type": "TextNode"
+ }
+ ],
+ "Data": "grandChild11",
+ "FirstChild": "(TextNode 'data 11')",
+ "FormatSpecific": null,
+ "LastChild": "(TextNode 'data 11')",
+ "NextSibling": "(ElementNode grandChild12)",
+ "Parent": "(ElementNode child1)",
+ "PrevSibling": null,
+ "Type": "ElementNode"
+ },
+ {
+ "Children": [
+ {
+ "Children": null,
+ "Data": "data 12",
+ "FirstChild": null,
+ "FormatSpecific": null,
+ "LastChild": null,
+ "NextSibling": null,
+ "Parent": "(ElementNode grandChild12)",
+ "PrevSibling": null,
+ "Type": "TextNode"
+ }
+ ],
+ "Data": "grandChild12",
+ "FirstChild": "(TextNode 'data 12')",
+ "FormatSpecific": null,
+ "LastChild": "(TextNode 'data 12')",
+ "NextSibling": null,
+ "Parent": "(ElementNode child1)",
+ "PrevSibling": "(ElementNode grandChild11)",
+ "Type": "ElementNode"
+ }
+ ],
+ "Data": "child1",
+ "FirstChild": "(ElementNode grandChild11)",
+ "FormatSpecific": null,
+ "LastChild": "(ElementNode grandChild12)",
+ "NextSibling": "(ElementNode child2)",
+ "Parent": "(DocumentNode)",
+ "PrevSibling": null,
+ "Type": "ElementNode"
+ },
+ {
+ "Children": [
+ {
+ "Children": [
+ {
+ "Children": null,
+ "Data": "data 21",
+ "FirstChild": null,
+ "FormatSpecific": null,
+ "LastChild": null,
+ "NextSibling": null,
+ "Parent": "(ElementNode grandChild21)",
+ "PrevSibling": null,
+ "Type": "TextNode"
+ }
+ ],
+ "Data": "grandChild21",
+ "FirstChild": "(TextNode 'data 21')",
+ "FormatSpecific": null,
+ "LastChild": "(TextNode 'data 21')",
+ "NextSibling": null,
+ "Parent": "(ElementNode child2)",
+ "PrevSibling": null,
+ "Type": "ElementNode"
+ }
+ ],
+ "Data": "child2",
+ "FirstChild": "(ElementNode grandChild21)",
+ "FormatSpecific": null,
+ "LastChild": "(ElementNode grandChild21)",
+ "NextSibling": "(ElementNode child3)",
+ "Parent": "(DocumentNode)",
+ "PrevSibling": "(ElementNode child1)",
+ "Type": "ElementNode"
+ },
+ {
+ "Children": [
+ {
+ "Children": [
+ {
+ "Children": null,
+ "Data": "attr 31",
+ "FirstChild": null,
+ "FormatSpecific": null,
+ "LastChild": null,
+ "NextSibling": null,
+ "Parent": "(AttributeNode grandChild31)",
+ "PrevSibling": null,
+ "Type": "TextNode"
+ }
+ ],
+ "Data": "grandChild31",
+ "FirstChild": "(TextNode 'attr 31')",
+ "FormatSpecific": null,
+ "LastChild": "(TextNode 'attr 31')",
+ "NextSibling": null,
+ "Parent": "(ElementNode child3)",
+ "PrevSibling": null,
+ "Type": "AttributeNode"
+ }
+ ],
+ "Data": "child3",
+ "FirstChild": "(AttributeNode grandChild31)",
+ "FormatSpecific": null,
+ "LastChild": "(AttributeNode grandChild31)",
+ "NextSibling": null,
+ "Parent": "(DocumentNode)",
+ "PrevSibling": "(ElementNode child2)",
+ "Type": "ElementNode"
+ }
+ ],
+ "Data": "root",
+ "FirstChild": "(ElementNode child1)",
+ "FormatSpecific": null,
+ "LastChild": "(ElementNode child3)",
+ "NextSibling": null,
+ "Parent": null,
+ "PrevSibling": null,
+ "Type": "DocumentNode"
+}
diff --git a/idr/README.md b/idr/README.md
new file mode 100644
index 0000000..0e5d126
--- /dev/null
+++ b/idr/README.md
@@ -0,0 +1,97 @@
+# IDR
+
+**IDR** == **I**ntermediate **D**ata **R**epresentation or **I**n-memory **D**ata **R**epresentation
+
+IDR is an intermediate data structure used by omniparser ingesters to store raw data read from various
+formats of inputs, including CSV/txt/XML/EDI/JSON/etc, and then used by schema handlers to perform
+transforms. It is flexible and versatile to represent all kinds of data formats supported (or to be
+supported) by omniparser.
+
+*Credit:* The basic data structures and various operations and algorithms used by IDR are mostly
+inherited/adapted from, modified based on, and inspired by works done in https://github.com/antchfx/xmlquery
+and https://github.com/antchfx/xpath. Thank you very much!
+
+The basic building block of an IDR is a `Node` and an IDR is in fact a `Node` tree. Each `Node` has
+two parts (see actual code [here](./node.go)):
+```
+type Node struct {
+ Parent, FirstChild, LastChild, PrevSibling, NextSibling *Node
+
+ Type NodeType
+ Data string
+
+ FormatSpecific interface{}
+}
+```
+The first part of a `Node` contains the input format agnostic fields, such as tree pointers (like
+`Parent`, `FirstChild`, etc), `Type` and `Data`, which we'll explain more in details later. The second
+part of a `Node` is format specific data blob. The blob not only offers a place to store format specific
+data it also gives IDR code and algorithms a hint on what input format the `Node` is about.
+
+Below we'll go through each input format we support and show what its corresponding IDR looks like.
+
+## XML
+
+Since XML is the most complex input format we have for IDR, let's cover it first.
+
+Here is a simple XML (from [this sample](../samples/omniv2/xml/1_datetime_parse_and_format.input.xml)):
+```
+
+ 2020/09/22
+ 09/22/2020 12:34:56
+
+```
+This is a simple XML blob with no non-default namespaces and with no attributes. Its corresponding IDR
+looks like this (with empty field omitted and tree pointers omitted for clarity):
+```
+Node(Type: DocumentNode, FormatSpecific: XMLSpecific())
+ Node(Type: ElementNode, Data: "Root", FormatSpecific: XMLSpecific())
+ Node(Type: TextNode, Data: "\n", FormatSpecific: XMLSpecific())
+ Node(Type: ElementNode, Data: "JustData", FormatSpecific: XMLSpecific())
+ Node(Type: TextNode, Data: "2020/09/22", FormatSpecific: XMLSpecific())
+ Node(Type: TextNode, Data: "\n", FormatSpecific: XMLSpecific())
+ Node(Type: ElementNode, Data: "DateTimeWithNoTZ", FormatSpecific: XMLSpecific())
+ Node(Type: TextNode, Data: "09/22/2020 12:34:56", FormatSpecific: XMLSpecific())
+ Node(Type: TextNode, Data: "\n", FormatSpecific: XMLSpecific())
+```
+Most of the IDR is quite self-explanatory, but what about those `TextNode`'s with `\n` as `Data`? Turns
+out [`xml.Decoder`](https://golang.org/pkg/encoding/xml/#Decoder) treats anything in between two XML
+element nodes as text, as long as the two elements are not directly adjacent to each other. Since
+there is a newline `'\n'` after the XML element `` and before ``, the `'\n'` is captured
+as a `TextNode`.
+
+Also note in this simple case, each of the `Node` has an empty but none-nil `FormatSpecific`, typed as
+[`XMLSpecific`](./xmlnode.go). `XMLSpecific` contains XML namespace information for each of the node,
+which we'll see in the [next example](../samples/omniv2/xml/2_multiple_objects.input.xml):
+```
+
+
+
+ J. K. Rowling
+
+
+
+```
+In this example, we'll see how IDR deals with XML namespaces, as well as attributes.
+
+The IDR for the example above looks like the following (note those "dummy" text nodes sprinkled
+in between element nodes are omitted here for clarity; also not including empty `XMLSpecific`):
+```
+Node(Type: DocumentNode)
+ Node(Type: ElementNode, Data: "library", FormatSpecific: XMLSpecific(NamespacePrefix: "lb0", NamespaceURI: "uri://something"))
+ Node(Type: ElementNode, Data: "books", FormatSpecific: XMLSpecific(NamespacePrefix: "lb0", NamespaceURI: "uri://something"))
+ Node(Type: ElementNode, Data: "book")
+ Node(Type: AttributeNode, Data: "title")
+ Node(Type: TextNode, Data: "Harry Potter and the Philosopher's Stone")
+ Node(Type: ElementNode, Data: "author")
+ Node(Type: TextNode, Data: "J. K. Rowling")
+```
+Both `Node`'s representing `` and `` include non-empty `XMLSpecific`'s which
+contain their namespace prefixes and full URIs while their `Node.Data` contain the element names without
+the namespace prefixes.
+
+Note XML attributes on elements are represented as `Node`'s as well, with `Type: AttributeNode`
+specifically. If an attribute is namespace-prefixed, the `AttributeNode` typed `Node` will have a non-empty
+`XMLSpecific` set as well. An attribute's actual value is placed as a `TextNode` underneath its `ElementNode`.
+`AttributeNode`'s are guaranteed to be placed before any other child nodes (`TextNode`, or `ElementNode`)
+by IDR's XML reader.
diff --git a/idr/marshal1.go b/idr/marshal1.go
new file mode 100644
index 0000000..229e47c
--- /dev/null
+++ b/idr/marshal1.go
@@ -0,0 +1,55 @@
+package idr
+
+import (
+ "fmt"
+
+ "github.com/jf-tech/go-corelib/jsons"
+ "github.com/jf-tech/go-corelib/strs"
+)
+
+// j1NodePtrName returns a categorized name for a *Node pointer used in JSONify1.
+func j1NodePtrName(n *Node) *string {
+ if n == nil {
+ return nil
+ }
+ name := func(n *Node) string {
+ if IsXML(n) && XMLSpecificOf(n).NamespacePrefix != "" {
+ return XMLSpecificOf(n).NamespacePrefix + ":" + n.Data
+ }
+ return n.Data
+ }
+ switch n.Type {
+ case DocumentNode:
+ return strs.StrPtr(fmt.Sprintf("(%s)", n.Type))
+ case ElementNode, AttributeNode:
+ return strs.StrPtr(fmt.Sprintf("(%s %s)", n.Type, name(n)))
+ case TextNode:
+ return strs.StrPtr(fmt.Sprintf("(%s '%s')", n.Type, n.Data))
+ default:
+ return strs.StrPtr(fmt.Sprintf("(unknown '%s')", n.Data))
+ }
+}
+
+// j1NodeToInterface converts *Node into an interface{} suitable for json marshaling used in JSONify1.
+func j1NodeToInterface(n *Node) interface{} {
+ m := make(map[string]interface{})
+ m["Parent"] = j1NodePtrName(n.Parent)
+ m["FirstChild"] = j1NodePtrName(n.FirstChild)
+ m["LastChild"] = j1NodePtrName(n.LastChild)
+ m["PrevSibling"] = j1NodePtrName(n.PrevSibling)
+ m["NextSibling"] = j1NodePtrName(n.NextSibling)
+ m["Type"] = n.Type.String()
+ m["Data"] = n.Data
+ m["FormatSpecific"] = n.FormatSpecific
+ var children []interface{}
+ for child := n.FirstChild; child != nil; child = child.NextSibling {
+ children = append(children, j1NodeToInterface(child))
+ }
+ m["Children"] = children
+ return m
+}
+
+// JSONify1 json marshals a *Node verbatim. Mostly used in test for snapshotting.
+func JSONify1(n *Node) string {
+ return jsons.BPM(j1NodeToInterface(n))
+}
diff --git a/idr/marshal1_test.go b/idr/marshal1_test.go
new file mode 100644
index 0000000..0c3dc63
--- /dev/null
+++ b/idr/marshal1_test.go
@@ -0,0 +1,31 @@
+package idr
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func TestJ1NodePtrName(t *testing.T) {
+ for _, test := range []struct {
+ name string
+ n *Node
+ expected string
+ }{
+ {name: "nil", n: nil, expected: ""},
+ {name: "root", n: CreateNode(DocumentNode, "test"), expected: "(DocumentNode)"},
+ {name: "elem w/o ns", n: CreateNode(ElementNode, "A"), expected: "(ElementNode A)"},
+ {name: "elem w/ ns", n: CreateXMLNode(ElementNode, "A", XMLSpecific{"ns", "uri://"}), expected: "(ElementNode ns:A)"},
+ {name: "text", n: CreateNode(TextNode, "data"), expected: "(TextNode 'data')"},
+ {name: "attr", n: CreateNode(AttributeNode, "attr"), expected: "(AttributeNode attr)"},
+ {name: "unknown", n: CreateNode(NodeType(99999), "what"), expected: "(unknown 'what')"},
+ } {
+ t.Run(test.name, func(t *testing.T) {
+ if test.expected == "" {
+ assert.Nil(t, j1NodePtrName(test.n))
+ } else {
+ assert.Equal(t, test.expected, *j1NodePtrName(test.n))
+ }
+ })
+ }
+}
diff --git a/idr/node.go b/idr/node.go
new file mode 100644
index 0000000..1d66b20
--- /dev/null
+++ b/idr/node.go
@@ -0,0 +1,124 @@
+package idr
+
+import (
+ "fmt"
+ "strings"
+)
+
+// NodeType is the type of a Node in an IDR.
+type NodeType uint
+
+const (
+ // DocumentNode is the type of the root Node in an IDR tree.
+ DocumentNode NodeType = iota
+ // ElementNode is the type of an element Node in an IDR tree.
+ ElementNode
+ // TextNode is the type of an text/data Node in an IDR tree.
+ TextNode
+ // AttributeNode is the type of an attribute Node in an IDR tree.
+ AttributeNode
+)
+
+// String converts NodeType to a string.
+func (nt NodeType) String() string {
+ switch nt {
+ case DocumentNode:
+ return "DocumentNode"
+ case ElementNode:
+ return "ElementNode"
+ case TextNode:
+ return "TextNode"
+ case AttributeNode:
+ return "AttributeNode"
+ default:
+ return fmt.Sprintf("(unknown NodeType: %d)", nt)
+ }
+}
+
+// Node represents a node of element/data in an IDR (intermediate data representation) ingested and created
+// by the omniparser.
+// Credit: this is by and large a copy and some adaptation from
+// https://github.com/antchfx/xmlquery/blob/master/node.go. The reasons we want to have our own struct:
+// - more stability
+// - one struct to represent XML/JSON/EDI/CSV/txt/etc. Vs antchfx's work have one struct (in each repo)
+// for each format.
+// - Node allocation recycling.
+type Node struct {
+ Parent, FirstChild, LastChild, PrevSibling, NextSibling *Node
+
+ Type NodeType
+ Data string
+
+ FormatSpecific interface{}
+}
+
+// CreateNode creates a generic *Node.
+func CreateNode(ntype NodeType, data string) *Node {
+ return &Node{
+ Type: ntype,
+ Data: data,
+ }
+}
+
+// InnerText returns a Node's children's texts concatenated.
+// Note (in an XML IDR tree) none of the AttributeNode's text will be included.
+func (n *Node) InnerText() string {
+ var s strings.Builder
+ var captureText func(*Node)
+ captureText = func(a *Node) {
+ switch a.Type {
+ case TextNode:
+ s.WriteString(a.Data)
+ default:
+ for child := a.FirstChild; child != nil; child = child.NextSibling {
+ if child.Type != AttributeNode {
+ captureText(child)
+ }
+ }
+ }
+ }
+ captureText(n)
+ return s.String()
+}
+
+// AddChild adds 'n' as the new last child to 'parent'.
+func AddChild(parent, n *Node) {
+ n.Parent = parent
+ n.NextSibling = nil
+ if parent.FirstChild == nil {
+ parent.FirstChild = n
+ n.PrevSibling = nil
+ } else {
+ parent.LastChild.NextSibling = n
+ n.PrevSibling = parent.LastChild
+ }
+ parent.LastChild = n
+}
+
+// RemoveFromTree removes a node and its subtree from an IDR
+// tree it is in. If the node is the root of the tree, it's a no-op.
+func RemoveFromTree(n *Node) {
+ if n.Parent == nil {
+ return
+ }
+ if n.Parent.FirstChild == n {
+ if n.Parent.LastChild == n {
+ n.Parent.FirstChild = nil
+ n.Parent.LastChild = nil
+ } else {
+ n.Parent.FirstChild = n.NextSibling
+ n.NextSibling.PrevSibling = nil
+ }
+ } else {
+ if n.Parent.LastChild == n {
+ n.Parent.LastChild = n.PrevSibling
+ n.PrevSibling.NextSibling = nil
+ } else {
+ n.PrevSibling.NextSibling = n.NextSibling
+ n.NextSibling.PrevSibling = n.PrevSibling
+ }
+ }
+ n.Parent = nil
+ n.PrevSibling = nil
+ n.NextSibling = nil
+}
diff --git a/idr/node_test.go b/idr/node_test.go
new file mode 100644
index 0000000..4c3480e
--- /dev/null
+++ b/idr/node_test.go
@@ -0,0 +1,171 @@
+package idr
+
+import (
+ "testing"
+
+ "github.com/bradleyjkemp/cupaloy"
+ "github.com/stretchr/testify/assert"
+)
+
+func TestNodeType_String(t *testing.T) {
+ assert.Equal(t, "DocumentNode", DocumentNode.String())
+ assert.Equal(t, "ElementNode", ElementNode.String())
+ assert.Equal(t, "TextNode", TextNode.String())
+ assert.Equal(t, "AttributeNode", AttributeNode.String())
+ assert.Equal(t, "(unknown NodeType: 99)", NodeType(99).String())
+}
+
+func findRoot(n *Node) *Node {
+ for ; n != nil && n.Parent != nil; n = n.Parent {
+ }
+ return n
+}
+
+func checkPointersInTree(t *testing.T, n *Node) {
+ if n == nil {
+ return
+ }
+ if n.FirstChild != nil {
+ assert.True(t, n == n.FirstChild.Parent)
+ }
+ if n.LastChild != nil {
+ assert.True(t, n == n.LastChild.Parent)
+ }
+ checkPointersInTree(t, n.FirstChild)
+ // There is no need to call checkPointersInTree(t, n.LastChild)
+ // because checkPointersInTree(t, n.FirstChild) will traverse all its
+ // siblings to the end, and if the last one isn't n.LastChild then it will fail.
+ parent := n.Parent // could be nil if n is the root of a tree.
+ // Verify the PrevSibling chain
+ cur, prev := n, n.PrevSibling
+ for ; prev != nil; cur, prev = prev, prev.PrevSibling {
+ assert.True(t, prev.Parent == parent)
+ assert.True(t, prev.NextSibling == cur)
+ }
+ assert.True(t, cur.PrevSibling == nil)
+ assert.True(t, parent == nil || parent.FirstChild == cur)
+ // Verify the NextSibling chain
+ cur, next := n, n.NextSibling
+ for ; next != nil; cur, next = next, next.NextSibling {
+ assert.True(t, next.Parent == parent)
+ assert.True(t, next.PrevSibling == cur)
+ }
+ assert.True(t, cur.NextSibling == nil)
+ assert.True(t, parent == nil || parent.LastChild == cur)
+}
+
+type testTree struct {
+ //
+ // root
+ // child1 child2 child3
+ // grandChild11E, grandchild12E grandChild21E grandChild31A
+ // grandChild11T, grandchild12T grandChild21T grandChild31T
+ root *Node
+ child1, child2, child3 *Node
+ grandChild11E, grandChild11T *Node
+ grandChild12E, grandChild12T *Node
+ grandChild21E, grandChild21T *Node
+ grandChild31A, grandChild31T *Node
+}
+
+func newTestTree(t *testing.T) *testTree {
+ root := CreateNode(DocumentNode, "root")
+ child1 := CreateNode(ElementNode, "child1")
+ child2 := CreateNode(ElementNode, "child2")
+ child3 := CreateNode(ElementNode, "child3")
+ grandChild11E := CreateNode(ElementNode, "grandChild11")
+ grandChild11T := CreateNode(TextNode, "data 11")
+ grandChild12E := CreateNode(ElementNode, "grandChild12")
+ grandChild12T := CreateNode(TextNode, "data 12")
+ grandChild21E := CreateNode(ElementNode, "grandChild21")
+ grandChild21T := CreateNode(TextNode, "data 21")
+ grandChild31A := CreateNode(AttributeNode, "grandChild31")
+ grandChild31T := CreateNode(TextNode, "attr 31")
+
+ AddChild(root, child1)
+ AddChild(root, child2)
+ AddChild(root, child3)
+ AddChild(child1, grandChild11E)
+ AddChild(child1, grandChild12E)
+ AddChild(child2, grandChild21E)
+ AddChild(child3, grandChild31A)
+ AddChild(grandChild11E, grandChild11T)
+ AddChild(grandChild12E, grandChild12T)
+ AddChild(grandChild21E, grandChild21T)
+ AddChild(grandChild31A, grandChild31T)
+
+ checkPointersInTree(t, root)
+ checkPointersInTree(t, child1)
+ checkPointersInTree(t, child2)
+ checkPointersInTree(t, child3)
+ checkPointersInTree(t, grandChild11E)
+ checkPointersInTree(t, grandChild12E)
+ checkPointersInTree(t, grandChild21E)
+ checkPointersInTree(t, grandChild31A)
+ checkPointersInTree(t, grandChild11T)
+ checkPointersInTree(t, grandChild12T)
+ checkPointersInTree(t, grandChild21T)
+ checkPointersInTree(t, grandChild31T)
+
+ return &testTree{
+ root: root,
+ child1: child1,
+ child2: child2,
+ child3: child3,
+ grandChild11E: grandChild11E,
+ grandChild12E: grandChild12E,
+ grandChild21E: grandChild21E,
+ grandChild31A: grandChild31A,
+ grandChild11T: grandChild11T,
+ grandChild12T: grandChild12T,
+ grandChild21T: grandChild21T,
+ grandChild31T: grandChild31T,
+ }
+}
+
+func TestReferenceTestTreeWithJSONify1(t *testing.T) {
+ cupaloy.SnapshotT(t, JSONify1(newTestTree(t).root))
+}
+
+func TestInnerText(t *testing.T) {
+ tt := newTestTree(t)
+ assert.Equal(t, tt.grandChild11T.Data+tt.grandChild12T.Data, tt.child1.InnerText())
+ assert.Equal(t, tt.grandChild11T.Data+tt.grandChild12T.Data+tt.grandChild21T.Data, tt.root.InnerText())
+}
+
+func TestRemoveNodeAndSubTree(t *testing.T) {
+ t.Run("remove a node who is its parents only child", func(t *testing.T) {
+ tt := newTestTree(t)
+ RemoveFromTree(tt.grandChild21E)
+ checkPointersInTree(t, tt.root)
+ cupaloy.SnapshotT(t, JSONify1(tt.root))
+ })
+
+ t.Run("remove a node who is its parents first child but not the last", func(t *testing.T) {
+ tt := newTestTree(t)
+ RemoveFromTree(tt.child1)
+ checkPointersInTree(t, tt.root)
+ cupaloy.SnapshotT(t, JSONify1(tt.root))
+ })
+
+ t.Run("remove a node who is its parents middle child not the first not the last", func(t *testing.T) {
+ tt := newTestTree(t)
+ RemoveFromTree(tt.child2)
+ checkPointersInTree(t, tt.root)
+ cupaloy.SnapshotT(t, JSONify1(tt.root))
+ })
+
+ t.Run("remove a node who is its parents last child but not the first", func(t *testing.T) {
+ tt := newTestTree(t)
+ RemoveFromTree(tt.child3)
+ checkPointersInTree(t, tt.root)
+ cupaloy.SnapshotT(t, JSONify1(tt.root))
+ })
+
+ t.Run("remove a root does nothing", func(t *testing.T) {
+ tt := newTestTree(t)
+ RemoveFromTree(tt.root)
+ checkPointersInTree(t, tt.root)
+ cupaloy.SnapshotT(t, JSONify1(tt.root))
+ })
+}
diff --git a/idr/xmlnode.go b/idr/xmlnode.go
new file mode 100644
index 0000000..08b0798
--- /dev/null
+++ b/idr/xmlnode.go
@@ -0,0 +1,29 @@
+package idr
+
+// XMLSpecific contains XML IDR Node specific information such as namespace.
+type XMLSpecific struct {
+ NamespacePrefix string
+ NamespaceURI string
+}
+
+// IsXML checks if a Node is of XML.
+func IsXML(n *Node) bool {
+ _, ok := n.FormatSpecific.(XMLSpecific)
+ return ok
+}
+
+// XMLSpecificOf returns the XMLSpecific field of a Node.
+// Note if the Node isn't of XML, this function will panic.
+func XMLSpecificOf(n *Node) XMLSpecific {
+ if !IsXML(n) {
+ panic("node is not XML")
+ }
+ return n.FormatSpecific.(XMLSpecific)
+}
+
+// CreateXMLNode creates an XML Node.
+func CreateXMLNode(ntype NodeType, data string, xmlSpecific XMLSpecific) *Node {
+ n := CreateNode(ntype, data)
+ n.FormatSpecific = xmlSpecific
+ return n
+}
diff --git a/idr/xmlnode_test.go b/idr/xmlnode_test.go
new file mode 100644
index 0000000..9c9d5cb
--- /dev/null
+++ b/idr/xmlnode_test.go
@@ -0,0 +1,26 @@
+package idr
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func TestIsXML(t *testing.T) {
+ assert.True(t, IsXML(CreateXMLNode(DocumentNode, "", XMLSpecific{})))
+ assert.True(t, IsXML(CreateXMLNode(ElementNode, "A", XMLSpecific{NamespacePrefix: "ns", NamespaceURI: "uri://"})))
+ assert.True(t, IsXML(CreateXMLNode(TextNode, "text", XMLSpecific{})))
+ assert.True(t, IsXML(CreateXMLNode(AttributeNode, "A", XMLSpecific{})))
+ assert.False(t, IsXML(CreateNode(ElementNode, "B")))
+}
+
+func TestXMLSpecificOf(t *testing.T) {
+ assert.Equal(t, XMLSpecific{}, XMLSpecificOf(CreateXMLNode(ElementNode, "A", XMLSpecific{})))
+ assert.Equal(t,
+ XMLSpecific{NamespacePrefix: "ns", NamespaceURI: "uri"},
+ XMLSpecificOf(
+ CreateXMLNode(ElementNode, "A", XMLSpecific{NamespacePrefix: "ns", NamespaceURI: "uri"})))
+ assert.PanicsWithValue(t, "node is not XML", func() {
+ XMLSpecificOf(CreateNode(ElementNode, "A"))
+ })
+}
diff --git a/samples/omniv2/xml/2_multiple_objects.input.xml b/samples/omniv2/xml/2_multiple_objects.input.xml
index 1f47c0e..e43990e 100644
--- a/samples/omniv2/xml/2_multiple_objects.input.xml
+++ b/samples/omniv2/xml/2_multiple_objects.input.xml
@@ -1,6 +1,6 @@
-
+
@@ -13,8 +13,8 @@
1998
-
-
+
+
@@ -27,5 +27,5 @@
1900
-
+
diff --git a/samples/omniv2/xml/2_multiple_objects.schema.json b/samples/omniv2/xml/2_multiple_objects.schema.json
index 3ab0c52..d0a6aa7 100644
--- a/samples/omniv2/xml/2_multiple_objects.schema.json
+++ b/samples/omniv2/xml/2_multiple_objects.schema.json
@@ -4,7 +4,7 @@
"file_format_type": "xml"
},
"transform_declarations": {
- "FINAL_OUTPUT": { "xpath": "lb0:library/books", "object": {
+ "FINAL_OUTPUT": { "xpath": "lb0:library/lb0:books", "object": {
"authors": { "array": [ { "xpath": "book/author" } ] },
"book_titles": { "array": [ { "xpath": "book/@title" } ] },
"books": { "array": [ { "xpath": "book", "template": "book_template" } ] },