From 1c71c25ec35ce131b57554f64dd371505a4c2076 Mon Sep 17 00:00:00 2001 From: jf-tech Date: Fri, 2 Oct 2020 21:30:03 +1300 Subject: [PATCH 1/4] Introduce IDR concept and its building block `Node` and some initial helper functions for generic Node as well as XML specific Node. See README.md for more details --- .../TestReferenceTestTreeWithJSONify1 | 140 ++++++++++++++ ...s_its_parents_first_child_but_not_the_last | 82 +++++++++ ...s_its_parents_last_child_but_not_the_first | 105 +++++++++++ ...ts_middle_child_not_the_first_not_the_last | 105 +++++++++++ ...emove_a_node_who_is_its_parents_only_child | 116 ++++++++++++ ...eNodeAndSubTree-remove_a_root_does_nothing | 140 ++++++++++++++ idr/README.md | 97 ++++++++++ idr/marshal1.go | 54 ++++++ idr/marshal1_test.go | 27 +++ idr/node.go | 121 +++++++++++++ idr/node_test.go | 171 ++++++++++++++++++ idr/xmlnode.go | 29 +++ idr/xmlnode_test.go | 26 +++ .../omniv2/xml/2_multiple_objects.input.xml | 8 +- .../omniv2/xml/2_multiple_objects.schema.json | 2 +- 15 files changed, 1218 insertions(+), 5 deletions(-) create mode 100644 idr/.snapshots/TestReferenceTestTreeWithJSONify1 create mode 100644 idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_first_child_but_not_the_last create mode 100644 idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_last_child_but_not_the_first create mode 100644 idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_middle_child_not_the_first_not_the_last create mode 100644 idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_only_child create mode 100644 idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_root_does_nothing create mode 100644 idr/README.md create mode 100644 idr/marshal1.go create mode 100644 idr/marshal1_test.go create mode 100644 idr/node.go create mode 100644 idr/node_test.go create mode 100644 idr/xmlnode.go create mode 100644 idr/xmlnode_test.go diff --git a/idr/.snapshots/TestReferenceTestTreeWithJSONify1 b/idr/.snapshots/TestReferenceTestTreeWithJSONify1 new file mode 100644 index 0000000..9aa24db --- /dev/null +++ b/idr/.snapshots/TestReferenceTestTreeWithJSONify1 @@ -0,0 +1,140 @@ +{ + "Children": [ + { + "Children": [ + { + "Children": [ + { + "Children": null, + "Data": "data 11", + "FirstChild": "(nil)", + "FormatSpecific": null, + "LastChild": "(nil)", + "NextSibling": "(nil)", + "Parent": "(ElementNode grandChild11)", + "PrevSibling": "(nil)", + "Type": "TextNode" + } + ], + "Data": "grandChild11", + "FirstChild": "(TextNode 'data 11')", + "FormatSpecific": null, + "LastChild": "(TextNode 'data 11')", + "NextSibling": "(ElementNode grandChild12)", + "Parent": "(ElementNode child1)", + "PrevSibling": "(nil)", + "Type": "ElementNode" + }, + { + "Children": [ + { + "Children": null, + "Data": "data 12", + "FirstChild": "(nil)", + "FormatSpecific": null, + "LastChild": "(nil)", + "NextSibling": "(nil)", + "Parent": "(ElementNode grandChild12)", + "PrevSibling": "(nil)", + "Type": "TextNode" + } + ], + "Data": "grandChild12", + "FirstChild": "(TextNode 'data 12')", + "FormatSpecific": null, + "LastChild": "(TextNode 'data 12')", + "NextSibling": "(nil)", + "Parent": "(ElementNode child1)", + "PrevSibling": "(ElementNode grandChild11)", + "Type": "ElementNode" + } + ], + "Data": "child1", + "FirstChild": "(ElementNode grandChild11)", + "FormatSpecific": null, + "LastChild": "(ElementNode grandChild12)", + "NextSibling": "(ElementNode child2)", + "Parent": "(DocumentNode)", + "PrevSibling": "(nil)", + "Type": "ElementNode" + }, + { + "Children": [ + { + "Children": [ + { + "Children": null, + "Data": "data 21", + "FirstChild": "(nil)", + "FormatSpecific": null, + "LastChild": "(nil)", + "NextSibling": "(nil)", + "Parent": "(ElementNode grandChild21)", + "PrevSibling": "(nil)", + "Type": "TextNode" + } + ], + "Data": "grandChild21", + "FirstChild": "(TextNode 'data 21')", + "FormatSpecific": null, + "LastChild": "(TextNode 'data 21')", + "NextSibling": "(nil)", + "Parent": "(ElementNode child2)", + "PrevSibling": "(nil)", + "Type": "ElementNode" + } + ], + "Data": "child2", + "FirstChild": "(ElementNode grandChild21)", + "FormatSpecific": null, + "LastChild": "(ElementNode grandChild21)", + "NextSibling": "(ElementNode child3)", + "Parent": "(DocumentNode)", + "PrevSibling": "(ElementNode child1)", + "Type": "ElementNode" + }, + { + "Children": [ + { + "Children": [ + { + "Children": null, + "Data": "attr 31", + "FirstChild": "(nil)", + "FormatSpecific": null, + "LastChild": "(nil)", + "NextSibling": "(nil)", + "Parent": "(AttributeNode grandChild31)", + "PrevSibling": "(nil)", + "Type": "TextNode" + } + ], + "Data": "grandChild31", + "FirstChild": "(TextNode 'attr 31')", + "FormatSpecific": null, + "LastChild": "(TextNode 'attr 31')", + "NextSibling": "(nil)", + "Parent": "(ElementNode child3)", + "PrevSibling": "(nil)", + "Type": "AttributeNode" + } + ], + "Data": "child3", + "FirstChild": "(AttributeNode grandChild31)", + "FormatSpecific": null, + "LastChild": "(AttributeNode grandChild31)", + "NextSibling": "(nil)", + "Parent": "(DocumentNode)", + "PrevSibling": "(ElementNode child2)", + "Type": "ElementNode" + } + ], + "Data": "root", + "FirstChild": "(ElementNode child1)", + "FormatSpecific": null, + "LastChild": "(ElementNode child3)", + "NextSibling": "(nil)", + "Parent": "(nil)", + "PrevSibling": "(nil)", + "Type": "DocumentNode" +} diff --git a/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_first_child_but_not_the_last b/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_first_child_but_not_the_last new file mode 100644 index 0000000..def94e0 --- /dev/null +++ b/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_first_child_but_not_the_last @@ -0,0 +1,82 @@ +{ + "Children": [ + { + "Children": [ + { + "Children": [ + { + "Children": null, + "Data": "data 21", + "FirstChild": "(nil)", + "FormatSpecific": null, + "LastChild": "(nil)", + "NextSibling": "(nil)", + "Parent": "(ElementNode grandChild21)", + "PrevSibling": "(nil)", + "Type": "TextNode" + } + ], + "Data": "grandChild21", + "FirstChild": "(TextNode 'data 21')", + "FormatSpecific": null, + "LastChild": "(TextNode 'data 21')", + "NextSibling": "(nil)", + "Parent": "(ElementNode child2)", + "PrevSibling": "(nil)", + "Type": "ElementNode" + } + ], + "Data": "child2", + "FirstChild": "(ElementNode grandChild21)", + "FormatSpecific": null, + "LastChild": "(ElementNode grandChild21)", + "NextSibling": "(ElementNode child3)", + "Parent": "(DocumentNode)", + "PrevSibling": "(nil)", + "Type": "ElementNode" + }, + { + "Children": [ + { + "Children": [ + { + "Children": null, + "Data": "attr 31", + "FirstChild": "(nil)", + "FormatSpecific": null, + "LastChild": "(nil)", + "NextSibling": "(nil)", + "Parent": "(AttributeNode grandChild31)", + "PrevSibling": "(nil)", + "Type": "TextNode" + } + ], + "Data": "grandChild31", + "FirstChild": "(TextNode 'attr 31')", + "FormatSpecific": null, + "LastChild": "(TextNode 'attr 31')", + "NextSibling": "(nil)", + "Parent": "(ElementNode child3)", + "PrevSibling": "(nil)", + "Type": "AttributeNode" + } + ], + "Data": "child3", + "FirstChild": "(AttributeNode grandChild31)", + "FormatSpecific": null, + "LastChild": "(AttributeNode grandChild31)", + "NextSibling": "(nil)", + "Parent": "(DocumentNode)", + "PrevSibling": "(ElementNode child2)", + "Type": "ElementNode" + } + ], + "Data": "root", + "FirstChild": "(ElementNode child2)", + "FormatSpecific": null, + "LastChild": "(ElementNode child3)", + "NextSibling": "(nil)", + "Parent": "(nil)", + "PrevSibling": "(nil)", + "Type": "DocumentNode" +} diff --git a/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_last_child_but_not_the_first b/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_last_child_but_not_the_first new file mode 100644 index 0000000..68f501e --- /dev/null +++ b/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_last_child_but_not_the_first @@ -0,0 +1,105 @@ +{ + "Children": [ + { + "Children": [ + { + "Children": [ + { + "Children": null, + "Data": "data 11", + "FirstChild": "(nil)", + "FormatSpecific": null, + "LastChild": "(nil)", + "NextSibling": "(nil)", + "Parent": "(ElementNode grandChild11)", + "PrevSibling": "(nil)", + "Type": "TextNode" + } + ], + "Data": "grandChild11", + "FirstChild": "(TextNode 'data 11')", + "FormatSpecific": null, + "LastChild": "(TextNode 'data 11')", + "NextSibling": "(ElementNode grandChild12)", + "Parent": "(ElementNode child1)", + "PrevSibling": "(nil)", + "Type": "ElementNode" + }, + { + "Children": [ + { + "Children": null, + "Data": "data 12", + "FirstChild": "(nil)", + "FormatSpecific": null, + "LastChild": "(nil)", + "NextSibling": "(nil)", + "Parent": "(ElementNode grandChild12)", + "PrevSibling": "(nil)", + "Type": "TextNode" + } + ], + "Data": "grandChild12", + "FirstChild": "(TextNode 'data 12')", + "FormatSpecific": null, + "LastChild": "(TextNode 'data 12')", + "NextSibling": "(nil)", + "Parent": "(ElementNode child1)", + "PrevSibling": "(ElementNode grandChild11)", + "Type": "ElementNode" + } + ], + "Data": "child1", + "FirstChild": "(ElementNode grandChild11)", + "FormatSpecific": null, + "LastChild": "(ElementNode grandChild12)", + "NextSibling": "(ElementNode child2)", + "Parent": "(DocumentNode)", + "PrevSibling": "(nil)", + "Type": "ElementNode" + }, + { + "Children": [ + { + "Children": [ + { + "Children": null, + "Data": "data 21", + "FirstChild": "(nil)", + "FormatSpecific": null, + "LastChild": "(nil)", + "NextSibling": "(nil)", + "Parent": "(ElementNode grandChild21)", + "PrevSibling": "(nil)", + "Type": "TextNode" + } + ], + "Data": "grandChild21", + "FirstChild": "(TextNode 'data 21')", + "FormatSpecific": null, + "LastChild": "(TextNode 'data 21')", + "NextSibling": "(nil)", + "Parent": "(ElementNode child2)", + "PrevSibling": "(nil)", + "Type": "ElementNode" + } + ], + "Data": "child2", + "FirstChild": "(ElementNode grandChild21)", + "FormatSpecific": null, + "LastChild": "(ElementNode grandChild21)", + "NextSibling": "(nil)", + "Parent": "(DocumentNode)", + "PrevSibling": "(ElementNode child1)", + "Type": "ElementNode" + } + ], + "Data": "root", + "FirstChild": "(ElementNode child1)", + "FormatSpecific": null, + "LastChild": "(ElementNode child2)", + "NextSibling": "(nil)", + "Parent": "(nil)", + "PrevSibling": "(nil)", + "Type": "DocumentNode" +} diff --git a/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_middle_child_not_the_first_not_the_last b/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_middle_child_not_the_first_not_the_last new file mode 100644 index 0000000..4603520 --- /dev/null +++ b/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_middle_child_not_the_first_not_the_last @@ -0,0 +1,105 @@ +{ + "Children": [ + { + "Children": [ + { + "Children": [ + { + "Children": null, + "Data": "data 11", + "FirstChild": "(nil)", + "FormatSpecific": null, + "LastChild": "(nil)", + "NextSibling": "(nil)", + "Parent": "(ElementNode grandChild11)", + "PrevSibling": "(nil)", + "Type": "TextNode" + } + ], + "Data": "grandChild11", + "FirstChild": "(TextNode 'data 11')", + "FormatSpecific": null, + "LastChild": "(TextNode 'data 11')", + "NextSibling": "(ElementNode grandChild12)", + "Parent": "(ElementNode child1)", + "PrevSibling": "(nil)", + "Type": "ElementNode" + }, + { + "Children": [ + { + "Children": null, + "Data": "data 12", + "FirstChild": "(nil)", + "FormatSpecific": null, + "LastChild": "(nil)", + "NextSibling": "(nil)", + "Parent": "(ElementNode grandChild12)", + "PrevSibling": "(nil)", + "Type": "TextNode" + } + ], + "Data": "grandChild12", + "FirstChild": "(TextNode 'data 12')", + "FormatSpecific": null, + "LastChild": "(TextNode 'data 12')", + "NextSibling": "(nil)", + "Parent": "(ElementNode child1)", + "PrevSibling": "(ElementNode grandChild11)", + "Type": "ElementNode" + } + ], + "Data": "child1", + "FirstChild": "(ElementNode grandChild11)", + "FormatSpecific": null, + "LastChild": "(ElementNode grandChild12)", + "NextSibling": "(ElementNode child3)", + "Parent": "(DocumentNode)", + "PrevSibling": "(nil)", + "Type": "ElementNode" + }, + { + "Children": [ + { + "Children": [ + { + "Children": null, + "Data": "attr 31", + "FirstChild": "(nil)", + "FormatSpecific": null, + "LastChild": "(nil)", + "NextSibling": "(nil)", + "Parent": "(AttributeNode grandChild31)", + "PrevSibling": "(nil)", + "Type": "TextNode" + } + ], + "Data": "grandChild31", + "FirstChild": "(TextNode 'attr 31')", + "FormatSpecific": null, + "LastChild": "(TextNode 'attr 31')", + "NextSibling": "(nil)", + "Parent": "(ElementNode child3)", + "PrevSibling": "(nil)", + "Type": "AttributeNode" + } + ], + "Data": "child3", + "FirstChild": "(AttributeNode grandChild31)", + "FormatSpecific": null, + "LastChild": "(AttributeNode grandChild31)", + "NextSibling": "(nil)", + "Parent": "(DocumentNode)", + "PrevSibling": "(ElementNode child1)", + "Type": "ElementNode" + } + ], + "Data": "root", + "FirstChild": "(ElementNode child1)", + "FormatSpecific": null, + "LastChild": "(ElementNode child3)", + "NextSibling": "(nil)", + "Parent": "(nil)", + "PrevSibling": "(nil)", + "Type": "DocumentNode" +} diff --git a/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_only_child b/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_only_child new file mode 100644 index 0000000..3a4dc88 --- /dev/null +++ b/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_only_child @@ -0,0 +1,116 @@ +{ + "Children": [ + { + "Children": [ + { + "Children": [ + { + "Children": null, + "Data": "data 11", + "FirstChild": "(nil)", + "FormatSpecific": null, + "LastChild": "(nil)", + "NextSibling": "(nil)", + "Parent": "(ElementNode grandChild11)", + "PrevSibling": "(nil)", + "Type": "TextNode" + } + ], + "Data": "grandChild11", + "FirstChild": "(TextNode 'data 11')", + "FormatSpecific": null, + "LastChild": "(TextNode 'data 11')", + "NextSibling": "(ElementNode grandChild12)", + "Parent": "(ElementNode child1)", + "PrevSibling": "(nil)", + "Type": "ElementNode" + }, + { + "Children": [ + { + "Children": null, + "Data": "data 12", + "FirstChild": "(nil)", + "FormatSpecific": null, + "LastChild": "(nil)", + "NextSibling": "(nil)", + "Parent": "(ElementNode grandChild12)", + "PrevSibling": "(nil)", + "Type": "TextNode" + } + ], + "Data": "grandChild12", + "FirstChild": "(TextNode 'data 12')", + "FormatSpecific": null, + "LastChild": "(TextNode 'data 12')", + "NextSibling": "(nil)", + "Parent": "(ElementNode child1)", + "PrevSibling": "(ElementNode grandChild11)", + "Type": "ElementNode" + } + ], + "Data": "child1", + "FirstChild": "(ElementNode grandChild11)", + "FormatSpecific": null, + "LastChild": "(ElementNode grandChild12)", + "NextSibling": "(ElementNode child2)", + "Parent": "(DocumentNode)", + "PrevSibling": "(nil)", + "Type": "ElementNode" + }, + { + "Children": null, + "Data": "child2", + "FirstChild": "(nil)", + "FormatSpecific": null, + "LastChild": "(nil)", + "NextSibling": "(ElementNode child3)", + "Parent": "(DocumentNode)", + "PrevSibling": "(ElementNode child1)", + "Type": "ElementNode" + }, + { + "Children": [ + { + "Children": [ + { + "Children": null, + "Data": "attr 31", + "FirstChild": "(nil)", + "FormatSpecific": null, + "LastChild": "(nil)", + "NextSibling": "(nil)", + "Parent": "(AttributeNode grandChild31)", + "PrevSibling": "(nil)", + "Type": "TextNode" + } + ], + "Data": "grandChild31", + "FirstChild": "(TextNode 'attr 31')", + "FormatSpecific": null, + "LastChild": "(TextNode 'attr 31')", + "NextSibling": "(nil)", + "Parent": "(ElementNode child3)", + "PrevSibling": "(nil)", + "Type": "AttributeNode" + } + ], + "Data": "child3", + "FirstChild": "(AttributeNode grandChild31)", + "FormatSpecific": null, + "LastChild": "(AttributeNode grandChild31)", + "NextSibling": "(nil)", + "Parent": "(DocumentNode)", + "PrevSibling": "(ElementNode child2)", + "Type": "ElementNode" + } + ], + "Data": "root", + "FirstChild": "(ElementNode child1)", + "FormatSpecific": null, + "LastChild": "(ElementNode child3)", + "NextSibling": "(nil)", + "Parent": "(nil)", + "PrevSibling": "(nil)", + "Type": "DocumentNode" +} diff --git a/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_root_does_nothing b/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_root_does_nothing new file mode 100644 index 0000000..9aa24db --- /dev/null +++ b/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_root_does_nothing @@ -0,0 +1,140 @@ +{ + "Children": [ + { + "Children": [ + { + "Children": [ + { + "Children": null, + "Data": "data 11", + "FirstChild": "(nil)", + "FormatSpecific": null, + "LastChild": "(nil)", + "NextSibling": "(nil)", + "Parent": "(ElementNode grandChild11)", + "PrevSibling": "(nil)", + "Type": "TextNode" + } + ], + "Data": "grandChild11", + "FirstChild": "(TextNode 'data 11')", + "FormatSpecific": null, + "LastChild": "(TextNode 'data 11')", + "NextSibling": "(ElementNode grandChild12)", + "Parent": "(ElementNode child1)", + "PrevSibling": "(nil)", + "Type": "ElementNode" + }, + { + "Children": [ + { + "Children": null, + "Data": "data 12", + "FirstChild": "(nil)", + "FormatSpecific": null, + "LastChild": "(nil)", + "NextSibling": "(nil)", + "Parent": "(ElementNode grandChild12)", + "PrevSibling": "(nil)", + "Type": "TextNode" + } + ], + "Data": "grandChild12", + "FirstChild": "(TextNode 'data 12')", + "FormatSpecific": null, + "LastChild": "(TextNode 'data 12')", + "NextSibling": "(nil)", + "Parent": "(ElementNode child1)", + "PrevSibling": "(ElementNode grandChild11)", + "Type": "ElementNode" + } + ], + "Data": "child1", + "FirstChild": "(ElementNode grandChild11)", + "FormatSpecific": null, + "LastChild": "(ElementNode grandChild12)", + "NextSibling": "(ElementNode child2)", + "Parent": "(DocumentNode)", + "PrevSibling": "(nil)", + "Type": "ElementNode" + }, + { + "Children": [ + { + "Children": [ + { + "Children": null, + "Data": "data 21", + "FirstChild": "(nil)", + "FormatSpecific": null, + "LastChild": "(nil)", + "NextSibling": "(nil)", + "Parent": "(ElementNode grandChild21)", + "PrevSibling": "(nil)", + "Type": "TextNode" + } + ], + "Data": "grandChild21", + "FirstChild": "(TextNode 'data 21')", + "FormatSpecific": null, + "LastChild": "(TextNode 'data 21')", + "NextSibling": "(nil)", + "Parent": "(ElementNode child2)", + "PrevSibling": "(nil)", + "Type": "ElementNode" + } + ], + "Data": "child2", + "FirstChild": "(ElementNode grandChild21)", + "FormatSpecific": null, + "LastChild": "(ElementNode grandChild21)", + "NextSibling": "(ElementNode child3)", + "Parent": "(DocumentNode)", + "PrevSibling": "(ElementNode child1)", + "Type": "ElementNode" + }, + { + "Children": [ + { + "Children": [ + { + "Children": null, + "Data": "attr 31", + "FirstChild": "(nil)", + "FormatSpecific": null, + "LastChild": "(nil)", + "NextSibling": "(nil)", + "Parent": "(AttributeNode grandChild31)", + "PrevSibling": "(nil)", + "Type": "TextNode" + } + ], + "Data": "grandChild31", + "FirstChild": "(TextNode 'attr 31')", + "FormatSpecific": null, + "LastChild": "(TextNode 'attr 31')", + "NextSibling": "(nil)", + "Parent": "(ElementNode child3)", + "PrevSibling": "(nil)", + "Type": "AttributeNode" + } + ], + "Data": "child3", + "FirstChild": "(AttributeNode grandChild31)", + "FormatSpecific": null, + "LastChild": "(AttributeNode grandChild31)", + "NextSibling": "(nil)", + "Parent": "(DocumentNode)", + "PrevSibling": "(ElementNode child2)", + "Type": "ElementNode" + } + ], + "Data": "root", + "FirstChild": "(ElementNode child1)", + "FormatSpecific": null, + "LastChild": "(ElementNode child3)", + "NextSibling": "(nil)", + "Parent": "(nil)", + "PrevSibling": "(nil)", + "Type": "DocumentNode" +} diff --git a/idr/README.md b/idr/README.md new file mode 100644 index 0000000..243a963 --- /dev/null +++ b/idr/README.md @@ -0,0 +1,97 @@ +# IDR + +**IDR** == **I**ntermediate **D**ata **R**epresentation or **I**n-memory **D**ata **R**epresentation + +IDR is an intermediate data structure used by omniparser ingesters to store raw data read from various +formats of inputs, including CSV/txt/XML/EDI/JSON/etc, and then used by schema handlers to perform +transforms. It is flexible and versatile to represent all kinds of data formats supported (or to be +supported) by omniparser. + +*Credit:* The basic data structures and various operations and algorithms used by IDR are mostly +inherited/adapted from, modified based on, and inspired by works done in https://github.com/antchfx/xmlquery +and https://github.com/antchfx/xpath. Thank you very much! + +The basic building block of an IDR is a `Node` and an IDR is in fact a `Node` tree. Each `Node` has +two parts (see actual code [here](./node.go)): +``` +type Node struct { + Parent, FirstChild, LastChild, PrevSibling, NextSibling *Node + + Type NodeType + Data string + + FormatSpecific interface{} +} +``` +The first part of a `Node` contains the input format agnostic fields, such as tree pointers (like +`Parent`, `FirstChild`, etc), `Type` and `Data`, which we'll explain more in details later. The second +part of a `Node` is format specific data blob. The blob not only offers a place to store format specific +data it also gives IDR code and algorithms a hint on what input format the `Node` is about. + +Below we'll go through each input format we support and show how its corresponding IDR looks like. + +## XML + +Since XML is the most complex input format we'll deal with by IDR. Let's cover it first. + +Let's take a look a simple example of XML (from [this sample](../samples/omniv2/xml/1_datetime_parse_and_format.input.xml)): +``` + + 2020/09/22 + 09/22/2020 12:34:56 + +``` +This is a simple XML blob with no non-default namespaces and with no attributes. Its corresponding IDR +looks like this (with empty field omitted and tree pointers omitted for clarity): +``` +Node(Type: DocumentNode, FormatSpecific: XMLSpecific()) + Node(Type: ElementNode, Data: "Root", FormatSpecific: XMLSpecific()) + Node(Type: TextNode, Data: "\n", FormatSpecific: XMLSpecific()) + Node(Type: ElementNode, Data: "JustData", FormatSpecific: XMLSpecific()) + Node(Type: TextNode, Data: "2020/09/22", FormatSpecific: XMLSpecific()) + Node(Type: TextNode, Data: "\n", FormatSpecific: XMLSpecific()) + Node(Type: ElementNode, Data: "DateTimeWithNoTZ", FormatSpecific: XMLSpecific()) + Node(Type: TextNode, Data: "09/22/2020 12:34:56", FormatSpecific: XMLSpecific()) + Node(Type: TextNode, Data: "\n", FormatSpecific: XMLSpecific()) +``` +Most of the IDR is quite self-explanatory, but what about those `TextNode`'s with `\n` as `Data`? Turns +out [`xml.Decoder`](https://golang.org/pkg/encoding/xml/#Decoder) treats anything in between two XML +element nodes as text, as long as the two elements are not directly adjacent to each other. Since +there is a newline `'\n'` after the XML element `` and before ``, the `'\n'` is captured +as a `TextNode`. + +Also note in this simple case, each of the `Node` has an empty but none-nil `FormatSpecific`, typed as +[`XMLSpecific`](./xmlnode.go). `XMLSpecific` contains XML namespace information for each of the node, +which we'll see in the [next example](../samples/omniv2/xml/2_multiple_objects.input.xml): +``` + + + + J. K. Rowling + + + +``` +In this example, we'll see how IDR deals with XML namespaces, as well as attributes. + +The IDR represents the example above looks like the following (note those "dummy" text nodes sprinkled +in between element nodes are omitted here for clarity; also not including empty `XMLSpecific`): +``` +Node(Type: DocumentNode) + Node(Type: ElementNode, Data: "library", FormatSpecific: XMLSpecific(NamespacePrefix: "lb0", NamespaceURI: "uri://something")) + Node(Type: ElementNode, Data: "books", FormatSpecific: XMLSpecific(NamespacePrefix: "lb0", NamespaceURI: "uri://something")) + Node(Type: ElementNode, Data: "book") + Node(Type: AttributeNode, Data: "title") + Node(Type: TextNode, Data: "Harry Potter and the Philosopher's Stone") + Node(Type: ElementNode, Data: "author") + Node(Type: TextNode, Data: "J. K. Rowling") +``` +Both `Node`'s represent `` and `` include non-empty `XMLSpecific`'s which +contain their namespace prefixes and full URIs while their `Node.Data` contains the element name without +the namespace prefixes. + +Note XML attributes on elements are represented as `Node`'s as well, `Type: AttributeNode` specifically. +If an attribute is namespace prefixed, the `AttributeNode` typed `Node` will have a non-empty +`XMLSpecific` set as well. An attribute's value is placed as a `TextNode` underneath its `ElementNode`. +`AttributeNode`'s are guaranteed to be placed before any other child nodes (`TextNode`, or `ElementNode`) +by IDR's XML reader. diff --git a/idr/marshal1.go b/idr/marshal1.go new file mode 100644 index 0000000..1041d87 --- /dev/null +++ b/idr/marshal1.go @@ -0,0 +1,54 @@ +package idr + +import ( + "fmt" + + "github.com/jf-tech/go-corelib/jsons" +) + +// j1NodePtrName returns a categorized name for a *Node pointer used in JSONify1 +func j1NodePtrName(n *Node) string { + if n == nil { + return "(nil)" + } + name := func(n *Node) string { + if IsXML(n) && XMLSpecificOf(n).NamespacePrefix != "" { + return XMLSpecificOf(n).NamespacePrefix + ":" + n.Data + } + return n.Data + } + switch n.Type { + case DocumentNode: + return fmt.Sprintf("(%s)", n.Type) + case ElementNode, AttributeNode: + return fmt.Sprintf("(%s %s)", n.Type, name(n)) + case TextNode: + return fmt.Sprintf("(%s '%s')", n.Type, n.Data) + default: + return fmt.Sprintf("(unknown '%s')", n.Data) + } +} + +// j1NodeToInterface converts *Node into an interface{} suitable for json marshaling used in JSONify1. +func j1NodeToInterface(n *Node) interface{} { + m := make(map[string]interface{}) + m["Parent"] = j1NodePtrName(n.Parent) + m["FirstChild"] = j1NodePtrName(n.FirstChild) + m["LastChild"] = j1NodePtrName(n.LastChild) + m["PrevSibling"] = j1NodePtrName(n.PrevSibling) + m["NextSibling"] = j1NodePtrName(n.NextSibling) + m["Type"] = n.Type.String() + m["Data"] = n.Data + m["FormatSpecific"] = n.FormatSpecific + var children []interface{} + for child := n.FirstChild; child != nil; child = child.NextSibling { + children = append(children, j1NodeToInterface(child)) + } + m["Children"] = children + return m +} + +// JSONify1 json marshals a *Node verbatim. Mostly used in test for snapshotting. +func JSONify1(n *Node) string { + return jsons.BPM(j1NodeToInterface(n)) +} diff --git a/idr/marshal1_test.go b/idr/marshal1_test.go new file mode 100644 index 0000000..cb3888d --- /dev/null +++ b/idr/marshal1_test.go @@ -0,0 +1,27 @@ +package idr + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestJ1NodePtrName(t *testing.T) { + for _, test := range []struct { + name string + n *Node + expected string + }{ + {name: "nil", n: nil, expected: "(nil)"}, + {name: "root", n: CreateNode(DocumentNode, "test"), expected: "(DocumentNode)"}, + {name: "elem w/o ns", n: CreateNode(ElementNode, "A"), expected: "(ElementNode A)"}, + {name: "elem w/ ns", n: CreateXMLNode(ElementNode, "A", XMLSpecific{"ns", "uri://"}), expected: "(ElementNode ns:A)"}, + {name: "text", n: CreateNode(TextNode, "data"), expected: "(TextNode 'data')"}, + {name: "attr", n: CreateNode(AttributeNode, "attr"), expected: "(AttributeNode attr)"}, + {name: "unknown", n: CreateNode(NodeType(99999), "what"), expected: "(unknown 'what')"}, + } { + t.Run(test.name, func(t *testing.T) { + assert.Equal(t, test.expected, j1NodePtrName(test.n)) + }) + } +} diff --git a/idr/node.go b/idr/node.go new file mode 100644 index 0000000..018b2fc --- /dev/null +++ b/idr/node.go @@ -0,0 +1,121 @@ +package idr + +import ( + "fmt" + "strings" +) + +// NodeType is the type of a Node. +type NodeType uint + +const ( + // DocumentNode is the root of Node tree. + DocumentNode NodeType = iota + // ElementNode is an element. + ElementNode + // TextNode is the text content of a node. + TextNode + // AttributeNode is an attribute of element. + AttributeNode +) + +// String converts NodeType to a string. +func (nt NodeType) String() string { + switch nt { + case DocumentNode: + return "DocumentNode" + case ElementNode: + return "ElementNode" + case TextNode: + return "TextNode" + case AttributeNode: + return "AttributeNode" + default: + return fmt.Sprintf("(unknown NodeType: %d)", nt) + } +} + +// Node represents a node of element/data in an IDR ingested and created by the parser. +// Credit: this is by and large a copy and some adaptation from +// https://github.com/antchfx/xmlquery/blob/master/node.go. The reasons we want to have our own struct: +// - more stability +// - one struct to represent XML/JSON/EDI/CSV/txt/etc. Vs antchfx's work have one struct for each format. +type Node struct { + Parent, FirstChild, LastChild, PrevSibling, NextSibling *Node + + Type NodeType + Data string + + FormatSpecific interface{} +} + +// CreateNode creates a generic *Node +func CreateNode(ntype NodeType, data string) *Node { + return &Node{ + Type: ntype, + Data: data, + } +} + +// InnerText returns a Node's children's texts all concatenated. +// Note in an XML Node tree, any AttributeNode's text will be ignored. +func (n *Node) InnerText() string { + var s strings.Builder + var captureText func(*Node) + captureText = func(a *Node) { + switch a.Type { + case TextNode: + s.WriteString(a.Data) + default: + for child := a.FirstChild; child != nil; child = child.NextSibling { + if child.Type != AttributeNode { + captureText(child) + } + } + } + } + captureText(n) + return s.String() +} + +// AddChild adds 'n' as the new last child to 'parent'. +func AddChild(parent, n *Node) { + n.Parent = parent + n.NextSibling = nil + if parent.FirstChild == nil { + parent.FirstChild = n + n.PrevSibling = nil + } else { + parent.LastChild.NextSibling = n + n.PrevSibling = parent.LastChild + } + parent.LastChild = n +} + +// RemoveFromTree removes a node and its subtree from the IDR +// tree it is in. If the node is the root of the tree, then it's no-op. +func RemoveFromTree(n *Node) { + if n.Parent == nil { + return + } + if n.Parent.FirstChild == n { + if n.Parent.LastChild == n { + n.Parent.FirstChild = nil + n.Parent.LastChild = nil + } else { + n.Parent.FirstChild = n.NextSibling + n.NextSibling.PrevSibling = nil + } + } else { + if n.Parent.LastChild == n { + n.Parent.LastChild = n.PrevSibling + n.PrevSibling.NextSibling = nil + } else { + n.PrevSibling.NextSibling = n.NextSibling + n.NextSibling.PrevSibling = n.PrevSibling + } + } + n.Parent = nil + n.PrevSibling = nil + n.NextSibling = nil +} diff --git a/idr/node_test.go b/idr/node_test.go new file mode 100644 index 0000000..4c3480e --- /dev/null +++ b/idr/node_test.go @@ -0,0 +1,171 @@ +package idr + +import ( + "testing" + + "github.com/bradleyjkemp/cupaloy" + "github.com/stretchr/testify/assert" +) + +func TestNodeType_String(t *testing.T) { + assert.Equal(t, "DocumentNode", DocumentNode.String()) + assert.Equal(t, "ElementNode", ElementNode.String()) + assert.Equal(t, "TextNode", TextNode.String()) + assert.Equal(t, "AttributeNode", AttributeNode.String()) + assert.Equal(t, "(unknown NodeType: 99)", NodeType(99).String()) +} + +func findRoot(n *Node) *Node { + for ; n != nil && n.Parent != nil; n = n.Parent { + } + return n +} + +func checkPointersInTree(t *testing.T, n *Node) { + if n == nil { + return + } + if n.FirstChild != nil { + assert.True(t, n == n.FirstChild.Parent) + } + if n.LastChild != nil { + assert.True(t, n == n.LastChild.Parent) + } + checkPointersInTree(t, n.FirstChild) + // There is no need to call checkPointersInTree(t, n.LastChild) + // because checkPointersInTree(t, n.FirstChild) will traverse all its + // siblings to the end, and if the last one isn't n.LastChild then it will fail. + parent := n.Parent // could be nil if n is the root of a tree. + // Verify the PrevSibling chain + cur, prev := n, n.PrevSibling + for ; prev != nil; cur, prev = prev, prev.PrevSibling { + assert.True(t, prev.Parent == parent) + assert.True(t, prev.NextSibling == cur) + } + assert.True(t, cur.PrevSibling == nil) + assert.True(t, parent == nil || parent.FirstChild == cur) + // Verify the NextSibling chain + cur, next := n, n.NextSibling + for ; next != nil; cur, next = next, next.NextSibling { + assert.True(t, next.Parent == parent) + assert.True(t, next.PrevSibling == cur) + } + assert.True(t, cur.NextSibling == nil) + assert.True(t, parent == nil || parent.LastChild == cur) +} + +type testTree struct { + // + // root + // child1 child2 child3 + // grandChild11E, grandchild12E grandChild21E grandChild31A + // grandChild11T, grandchild12T grandChild21T grandChild31T + root *Node + child1, child2, child3 *Node + grandChild11E, grandChild11T *Node + grandChild12E, grandChild12T *Node + grandChild21E, grandChild21T *Node + grandChild31A, grandChild31T *Node +} + +func newTestTree(t *testing.T) *testTree { + root := CreateNode(DocumentNode, "root") + child1 := CreateNode(ElementNode, "child1") + child2 := CreateNode(ElementNode, "child2") + child3 := CreateNode(ElementNode, "child3") + grandChild11E := CreateNode(ElementNode, "grandChild11") + grandChild11T := CreateNode(TextNode, "data 11") + grandChild12E := CreateNode(ElementNode, "grandChild12") + grandChild12T := CreateNode(TextNode, "data 12") + grandChild21E := CreateNode(ElementNode, "grandChild21") + grandChild21T := CreateNode(TextNode, "data 21") + grandChild31A := CreateNode(AttributeNode, "grandChild31") + grandChild31T := CreateNode(TextNode, "attr 31") + + AddChild(root, child1) + AddChild(root, child2) + AddChild(root, child3) + AddChild(child1, grandChild11E) + AddChild(child1, grandChild12E) + AddChild(child2, grandChild21E) + AddChild(child3, grandChild31A) + AddChild(grandChild11E, grandChild11T) + AddChild(grandChild12E, grandChild12T) + AddChild(grandChild21E, grandChild21T) + AddChild(grandChild31A, grandChild31T) + + checkPointersInTree(t, root) + checkPointersInTree(t, child1) + checkPointersInTree(t, child2) + checkPointersInTree(t, child3) + checkPointersInTree(t, grandChild11E) + checkPointersInTree(t, grandChild12E) + checkPointersInTree(t, grandChild21E) + checkPointersInTree(t, grandChild31A) + checkPointersInTree(t, grandChild11T) + checkPointersInTree(t, grandChild12T) + checkPointersInTree(t, grandChild21T) + checkPointersInTree(t, grandChild31T) + + return &testTree{ + root: root, + child1: child1, + child2: child2, + child3: child3, + grandChild11E: grandChild11E, + grandChild12E: grandChild12E, + grandChild21E: grandChild21E, + grandChild31A: grandChild31A, + grandChild11T: grandChild11T, + grandChild12T: grandChild12T, + grandChild21T: grandChild21T, + grandChild31T: grandChild31T, + } +} + +func TestReferenceTestTreeWithJSONify1(t *testing.T) { + cupaloy.SnapshotT(t, JSONify1(newTestTree(t).root)) +} + +func TestInnerText(t *testing.T) { + tt := newTestTree(t) + assert.Equal(t, tt.grandChild11T.Data+tt.grandChild12T.Data, tt.child1.InnerText()) + assert.Equal(t, tt.grandChild11T.Data+tt.grandChild12T.Data+tt.grandChild21T.Data, tt.root.InnerText()) +} + +func TestRemoveNodeAndSubTree(t *testing.T) { + t.Run("remove a node who is its parents only child", func(t *testing.T) { + tt := newTestTree(t) + RemoveFromTree(tt.grandChild21E) + checkPointersInTree(t, tt.root) + cupaloy.SnapshotT(t, JSONify1(tt.root)) + }) + + t.Run("remove a node who is its parents first child but not the last", func(t *testing.T) { + tt := newTestTree(t) + RemoveFromTree(tt.child1) + checkPointersInTree(t, tt.root) + cupaloy.SnapshotT(t, JSONify1(tt.root)) + }) + + t.Run("remove a node who is its parents middle child not the first not the last", func(t *testing.T) { + tt := newTestTree(t) + RemoveFromTree(tt.child2) + checkPointersInTree(t, tt.root) + cupaloy.SnapshotT(t, JSONify1(tt.root)) + }) + + t.Run("remove a node who is its parents last child but not the first", func(t *testing.T) { + tt := newTestTree(t) + RemoveFromTree(tt.child3) + checkPointersInTree(t, tt.root) + cupaloy.SnapshotT(t, JSONify1(tt.root)) + }) + + t.Run("remove a root does nothing", func(t *testing.T) { + tt := newTestTree(t) + RemoveFromTree(tt.root) + checkPointersInTree(t, tt.root) + cupaloy.SnapshotT(t, JSONify1(tt.root)) + }) +} diff --git a/idr/xmlnode.go b/idr/xmlnode.go new file mode 100644 index 0000000..08b0798 --- /dev/null +++ b/idr/xmlnode.go @@ -0,0 +1,29 @@ +package idr + +// XMLSpecific contains XML IDR Node specific information such as namespace. +type XMLSpecific struct { + NamespacePrefix string + NamespaceURI string +} + +// IsXML checks if a Node is of XML. +func IsXML(n *Node) bool { + _, ok := n.FormatSpecific.(XMLSpecific) + return ok +} + +// XMLSpecificOf returns the XMLSpecific field of a Node. +// Note if the Node isn't of XML, this function will panic. +func XMLSpecificOf(n *Node) XMLSpecific { + if !IsXML(n) { + panic("node is not XML") + } + return n.FormatSpecific.(XMLSpecific) +} + +// CreateXMLNode creates an XML Node. +func CreateXMLNode(ntype NodeType, data string, xmlSpecific XMLSpecific) *Node { + n := CreateNode(ntype, data) + n.FormatSpecific = xmlSpecific + return n +} diff --git a/idr/xmlnode_test.go b/idr/xmlnode_test.go new file mode 100644 index 0000000..9c9d5cb --- /dev/null +++ b/idr/xmlnode_test.go @@ -0,0 +1,26 @@ +package idr + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestIsXML(t *testing.T) { + assert.True(t, IsXML(CreateXMLNode(DocumentNode, "", XMLSpecific{}))) + assert.True(t, IsXML(CreateXMLNode(ElementNode, "A", XMLSpecific{NamespacePrefix: "ns", NamespaceURI: "uri://"}))) + assert.True(t, IsXML(CreateXMLNode(TextNode, "text", XMLSpecific{}))) + assert.True(t, IsXML(CreateXMLNode(AttributeNode, "A", XMLSpecific{}))) + assert.False(t, IsXML(CreateNode(ElementNode, "B"))) +} + +func TestXMLSpecificOf(t *testing.T) { + assert.Equal(t, XMLSpecific{}, XMLSpecificOf(CreateXMLNode(ElementNode, "A", XMLSpecific{}))) + assert.Equal(t, + XMLSpecific{NamespacePrefix: "ns", NamespaceURI: "uri"}, + XMLSpecificOf( + CreateXMLNode(ElementNode, "A", XMLSpecific{NamespacePrefix: "ns", NamespaceURI: "uri"}))) + assert.PanicsWithValue(t, "node is not XML", func() { + XMLSpecificOf(CreateNode(ElementNode, "A")) + }) +} diff --git a/samples/omniv2/xml/2_multiple_objects.input.xml b/samples/omniv2/xml/2_multiple_objects.input.xml index 1f47c0e..e43990e 100644 --- a/samples/omniv2/xml/2_multiple_objects.input.xml +++ b/samples/omniv2/xml/2_multiple_objects.input.xml @@ -1,6 +1,6 @@ - +
Scholastic Press
@@ -13,8 +13,8 @@ 1998
Harry Potter Collection
-
- +
+
Harper & Brothers
@@ -27,5 +27,5 @@ 1900
Kids Reading Collection
- +
diff --git a/samples/omniv2/xml/2_multiple_objects.schema.json b/samples/omniv2/xml/2_multiple_objects.schema.json index 3ab0c52..d0a6aa7 100644 --- a/samples/omniv2/xml/2_multiple_objects.schema.json +++ b/samples/omniv2/xml/2_multiple_objects.schema.json @@ -4,7 +4,7 @@ "file_format_type": "xml" }, "transform_declarations": { - "FINAL_OUTPUT": { "xpath": "lb0:library/books", "object": { + "FINAL_OUTPUT": { "xpath": "lb0:library/lb0:books", "object": { "authors": { "array": [ { "xpath": "book/author" } ] }, "book_titles": { "array": [ { "xpath": "book/@title" } ] }, "books": { "array": [ { "xpath": "book", "template": "book_template" } ] }, From a3ea7f5a47a07d5b1ebe2a0b06fcb2c117ad7742 Mon Sep 17 00:00:00 2001 From: jf-tech Date: Sat, 3 Oct 2020 06:42:16 +1300 Subject: [PATCH 2/4] PR comments --- .../TestReferenceTestTreeWithJSONify1 | 54 +++++++++---------- ...s_its_parents_first_child_but_not_the_last | 34 ++++++------ ...s_its_parents_last_child_but_not_the_first | 42 +++++++-------- ...ts_middle_child_not_the_first_not_the_last | 42 +++++++-------- ...emove_a_node_who_is_its_parents_only_child | 46 ++++++++-------- ...eNodeAndSubTree-remove_a_root_does_nothing | 54 +++++++++---------- idr/marshal1.go | 13 ++--- idr/marshal1_test.go | 8 ++- 8 files changed, 149 insertions(+), 144 deletions(-) diff --git a/idr/.snapshots/TestReferenceTestTreeWithJSONify1 b/idr/.snapshots/TestReferenceTestTreeWithJSONify1 index 9aa24db..22aaf5d 100644 --- a/idr/.snapshots/TestReferenceTestTreeWithJSONify1 +++ b/idr/.snapshots/TestReferenceTestTreeWithJSONify1 @@ -7,12 +7,12 @@ { "Children": null, "Data": "data 11", - "FirstChild": "(nil)", + "FirstChild": null, "FormatSpecific": null, - "LastChild": "(nil)", - "NextSibling": "(nil)", + "LastChild": null, + "NextSibling": null, "Parent": "(ElementNode grandChild11)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "TextNode" } ], @@ -22,7 +22,7 @@ "LastChild": "(TextNode 'data 11')", "NextSibling": "(ElementNode grandChild12)", "Parent": "(ElementNode child1)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "ElementNode" }, { @@ -30,12 +30,12 @@ { "Children": null, "Data": "data 12", - "FirstChild": "(nil)", + "FirstChild": null, "FormatSpecific": null, - "LastChild": "(nil)", - "NextSibling": "(nil)", + "LastChild": null, + "NextSibling": null, "Parent": "(ElementNode grandChild12)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "TextNode" } ], @@ -43,7 +43,7 @@ "FirstChild": "(TextNode 'data 12')", "FormatSpecific": null, "LastChild": "(TextNode 'data 12')", - "NextSibling": "(nil)", + "NextSibling": null, "Parent": "(ElementNode child1)", "PrevSibling": "(ElementNode grandChild11)", "Type": "ElementNode" @@ -55,7 +55,7 @@ "LastChild": "(ElementNode grandChild12)", "NextSibling": "(ElementNode child2)", "Parent": "(DocumentNode)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "ElementNode" }, { @@ -65,12 +65,12 @@ { "Children": null, "Data": "data 21", - "FirstChild": "(nil)", + "FirstChild": null, "FormatSpecific": null, - "LastChild": "(nil)", - "NextSibling": "(nil)", + "LastChild": null, + "NextSibling": null, "Parent": "(ElementNode grandChild21)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "TextNode" } ], @@ -78,9 +78,9 @@ "FirstChild": "(TextNode 'data 21')", "FormatSpecific": null, "LastChild": "(TextNode 'data 21')", - "NextSibling": "(nil)", + "NextSibling": null, "Parent": "(ElementNode child2)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "ElementNode" } ], @@ -100,12 +100,12 @@ { "Children": null, "Data": "attr 31", - "FirstChild": "(nil)", + "FirstChild": null, "FormatSpecific": null, - "LastChild": "(nil)", - "NextSibling": "(nil)", + "LastChild": null, + "NextSibling": null, "Parent": "(AttributeNode grandChild31)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "TextNode" } ], @@ -113,9 +113,9 @@ "FirstChild": "(TextNode 'attr 31')", "FormatSpecific": null, "LastChild": "(TextNode 'attr 31')", - "NextSibling": "(nil)", + "NextSibling": null, "Parent": "(ElementNode child3)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "AttributeNode" } ], @@ -123,7 +123,7 @@ "FirstChild": "(AttributeNode grandChild31)", "FormatSpecific": null, "LastChild": "(AttributeNode grandChild31)", - "NextSibling": "(nil)", + "NextSibling": null, "Parent": "(DocumentNode)", "PrevSibling": "(ElementNode child2)", "Type": "ElementNode" @@ -133,8 +133,8 @@ "FirstChild": "(ElementNode child1)", "FormatSpecific": null, "LastChild": "(ElementNode child3)", - "NextSibling": "(nil)", - "Parent": "(nil)", - "PrevSibling": "(nil)", + "NextSibling": null, + "Parent": null, + "PrevSibling": null, "Type": "DocumentNode" } diff --git a/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_first_child_but_not_the_last b/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_first_child_but_not_the_last index def94e0..9413b37 100644 --- a/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_first_child_but_not_the_last +++ b/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_first_child_but_not_the_last @@ -7,12 +7,12 @@ { "Children": null, "Data": "data 21", - "FirstChild": "(nil)", + "FirstChild": null, "FormatSpecific": null, - "LastChild": "(nil)", - "NextSibling": "(nil)", + "LastChild": null, + "NextSibling": null, "Parent": "(ElementNode grandChild21)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "TextNode" } ], @@ -20,9 +20,9 @@ "FirstChild": "(TextNode 'data 21')", "FormatSpecific": null, "LastChild": "(TextNode 'data 21')", - "NextSibling": "(nil)", + "NextSibling": null, "Parent": "(ElementNode child2)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "ElementNode" } ], @@ -32,7 +32,7 @@ "LastChild": "(ElementNode grandChild21)", "NextSibling": "(ElementNode child3)", "Parent": "(DocumentNode)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "ElementNode" }, { @@ -42,12 +42,12 @@ { "Children": null, "Data": "attr 31", - "FirstChild": "(nil)", + "FirstChild": null, "FormatSpecific": null, - "LastChild": "(nil)", - "NextSibling": "(nil)", + "LastChild": null, + "NextSibling": null, "Parent": "(AttributeNode grandChild31)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "TextNode" } ], @@ -55,9 +55,9 @@ "FirstChild": "(TextNode 'attr 31')", "FormatSpecific": null, "LastChild": "(TextNode 'attr 31')", - "NextSibling": "(nil)", + "NextSibling": null, "Parent": "(ElementNode child3)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "AttributeNode" } ], @@ -65,7 +65,7 @@ "FirstChild": "(AttributeNode grandChild31)", "FormatSpecific": null, "LastChild": "(AttributeNode grandChild31)", - "NextSibling": "(nil)", + "NextSibling": null, "Parent": "(DocumentNode)", "PrevSibling": "(ElementNode child2)", "Type": "ElementNode" @@ -75,8 +75,8 @@ "FirstChild": "(ElementNode child2)", "FormatSpecific": null, "LastChild": "(ElementNode child3)", - "NextSibling": "(nil)", - "Parent": "(nil)", - "PrevSibling": "(nil)", + "NextSibling": null, + "Parent": null, + "PrevSibling": null, "Type": "DocumentNode" } diff --git a/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_last_child_but_not_the_first b/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_last_child_but_not_the_first index 68f501e..b84f0ef 100644 --- a/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_last_child_but_not_the_first +++ b/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_last_child_but_not_the_first @@ -7,12 +7,12 @@ { "Children": null, "Data": "data 11", - "FirstChild": "(nil)", + "FirstChild": null, "FormatSpecific": null, - "LastChild": "(nil)", - "NextSibling": "(nil)", + "LastChild": null, + "NextSibling": null, "Parent": "(ElementNode grandChild11)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "TextNode" } ], @@ -22,7 +22,7 @@ "LastChild": "(TextNode 'data 11')", "NextSibling": "(ElementNode grandChild12)", "Parent": "(ElementNode child1)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "ElementNode" }, { @@ -30,12 +30,12 @@ { "Children": null, "Data": "data 12", - "FirstChild": "(nil)", + "FirstChild": null, "FormatSpecific": null, - "LastChild": "(nil)", - "NextSibling": "(nil)", + "LastChild": null, + "NextSibling": null, "Parent": "(ElementNode grandChild12)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "TextNode" } ], @@ -43,7 +43,7 @@ "FirstChild": "(TextNode 'data 12')", "FormatSpecific": null, "LastChild": "(TextNode 'data 12')", - "NextSibling": "(nil)", + "NextSibling": null, "Parent": "(ElementNode child1)", "PrevSibling": "(ElementNode grandChild11)", "Type": "ElementNode" @@ -55,7 +55,7 @@ "LastChild": "(ElementNode grandChild12)", "NextSibling": "(ElementNode child2)", "Parent": "(DocumentNode)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "ElementNode" }, { @@ -65,12 +65,12 @@ { "Children": null, "Data": "data 21", - "FirstChild": "(nil)", + "FirstChild": null, "FormatSpecific": null, - "LastChild": "(nil)", - "NextSibling": "(nil)", + "LastChild": null, + "NextSibling": null, "Parent": "(ElementNode grandChild21)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "TextNode" } ], @@ -78,9 +78,9 @@ "FirstChild": "(TextNode 'data 21')", "FormatSpecific": null, "LastChild": "(TextNode 'data 21')", - "NextSibling": "(nil)", + "NextSibling": null, "Parent": "(ElementNode child2)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "ElementNode" } ], @@ -88,7 +88,7 @@ "FirstChild": "(ElementNode grandChild21)", "FormatSpecific": null, "LastChild": "(ElementNode grandChild21)", - "NextSibling": "(nil)", + "NextSibling": null, "Parent": "(DocumentNode)", "PrevSibling": "(ElementNode child1)", "Type": "ElementNode" @@ -98,8 +98,8 @@ "FirstChild": "(ElementNode child1)", "FormatSpecific": null, "LastChild": "(ElementNode child2)", - "NextSibling": "(nil)", - "Parent": "(nil)", - "PrevSibling": "(nil)", + "NextSibling": null, + "Parent": null, + "PrevSibling": null, "Type": "DocumentNode" } diff --git a/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_middle_child_not_the_first_not_the_last b/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_middle_child_not_the_first_not_the_last index 4603520..906adad 100644 --- a/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_middle_child_not_the_first_not_the_last +++ b/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_middle_child_not_the_first_not_the_last @@ -7,12 +7,12 @@ { "Children": null, "Data": "data 11", - "FirstChild": "(nil)", + "FirstChild": null, "FormatSpecific": null, - "LastChild": "(nil)", - "NextSibling": "(nil)", + "LastChild": null, + "NextSibling": null, "Parent": "(ElementNode grandChild11)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "TextNode" } ], @@ -22,7 +22,7 @@ "LastChild": "(TextNode 'data 11')", "NextSibling": "(ElementNode grandChild12)", "Parent": "(ElementNode child1)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "ElementNode" }, { @@ -30,12 +30,12 @@ { "Children": null, "Data": "data 12", - "FirstChild": "(nil)", + "FirstChild": null, "FormatSpecific": null, - "LastChild": "(nil)", - "NextSibling": "(nil)", + "LastChild": null, + "NextSibling": null, "Parent": "(ElementNode grandChild12)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "TextNode" } ], @@ -43,7 +43,7 @@ "FirstChild": "(TextNode 'data 12')", "FormatSpecific": null, "LastChild": "(TextNode 'data 12')", - "NextSibling": "(nil)", + "NextSibling": null, "Parent": "(ElementNode child1)", "PrevSibling": "(ElementNode grandChild11)", "Type": "ElementNode" @@ -55,7 +55,7 @@ "LastChild": "(ElementNode grandChild12)", "NextSibling": "(ElementNode child3)", "Parent": "(DocumentNode)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "ElementNode" }, { @@ -65,12 +65,12 @@ { "Children": null, "Data": "attr 31", - "FirstChild": "(nil)", + "FirstChild": null, "FormatSpecific": null, - "LastChild": "(nil)", - "NextSibling": "(nil)", + "LastChild": null, + "NextSibling": null, "Parent": "(AttributeNode grandChild31)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "TextNode" } ], @@ -78,9 +78,9 @@ "FirstChild": "(TextNode 'attr 31')", "FormatSpecific": null, "LastChild": "(TextNode 'attr 31')", - "NextSibling": "(nil)", + "NextSibling": null, "Parent": "(ElementNode child3)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "AttributeNode" } ], @@ -88,7 +88,7 @@ "FirstChild": "(AttributeNode grandChild31)", "FormatSpecific": null, "LastChild": "(AttributeNode grandChild31)", - "NextSibling": "(nil)", + "NextSibling": null, "Parent": "(DocumentNode)", "PrevSibling": "(ElementNode child1)", "Type": "ElementNode" @@ -98,8 +98,8 @@ "FirstChild": "(ElementNode child1)", "FormatSpecific": null, "LastChild": "(ElementNode child3)", - "NextSibling": "(nil)", - "Parent": "(nil)", - "PrevSibling": "(nil)", + "NextSibling": null, + "Parent": null, + "PrevSibling": null, "Type": "DocumentNode" } diff --git a/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_only_child b/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_only_child index 3a4dc88..8b39d31 100644 --- a/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_only_child +++ b/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_node_who_is_its_parents_only_child @@ -7,12 +7,12 @@ { "Children": null, "Data": "data 11", - "FirstChild": "(nil)", + "FirstChild": null, "FormatSpecific": null, - "LastChild": "(nil)", - "NextSibling": "(nil)", + "LastChild": null, + "NextSibling": null, "Parent": "(ElementNode grandChild11)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "TextNode" } ], @@ -22,7 +22,7 @@ "LastChild": "(TextNode 'data 11')", "NextSibling": "(ElementNode grandChild12)", "Parent": "(ElementNode child1)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "ElementNode" }, { @@ -30,12 +30,12 @@ { "Children": null, "Data": "data 12", - "FirstChild": "(nil)", + "FirstChild": null, "FormatSpecific": null, - "LastChild": "(nil)", - "NextSibling": "(nil)", + "LastChild": null, + "NextSibling": null, "Parent": "(ElementNode grandChild12)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "TextNode" } ], @@ -43,7 +43,7 @@ "FirstChild": "(TextNode 'data 12')", "FormatSpecific": null, "LastChild": "(TextNode 'data 12')", - "NextSibling": "(nil)", + "NextSibling": null, "Parent": "(ElementNode child1)", "PrevSibling": "(ElementNode grandChild11)", "Type": "ElementNode" @@ -55,15 +55,15 @@ "LastChild": "(ElementNode grandChild12)", "NextSibling": "(ElementNode child2)", "Parent": "(DocumentNode)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "ElementNode" }, { "Children": null, "Data": "child2", - "FirstChild": "(nil)", + "FirstChild": null, "FormatSpecific": null, - "LastChild": "(nil)", + "LastChild": null, "NextSibling": "(ElementNode child3)", "Parent": "(DocumentNode)", "PrevSibling": "(ElementNode child1)", @@ -76,12 +76,12 @@ { "Children": null, "Data": "attr 31", - "FirstChild": "(nil)", + "FirstChild": null, "FormatSpecific": null, - "LastChild": "(nil)", - "NextSibling": "(nil)", + "LastChild": null, + "NextSibling": null, "Parent": "(AttributeNode grandChild31)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "TextNode" } ], @@ -89,9 +89,9 @@ "FirstChild": "(TextNode 'attr 31')", "FormatSpecific": null, "LastChild": "(TextNode 'attr 31')", - "NextSibling": "(nil)", + "NextSibling": null, "Parent": "(ElementNode child3)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "AttributeNode" } ], @@ -99,7 +99,7 @@ "FirstChild": "(AttributeNode grandChild31)", "FormatSpecific": null, "LastChild": "(AttributeNode grandChild31)", - "NextSibling": "(nil)", + "NextSibling": null, "Parent": "(DocumentNode)", "PrevSibling": "(ElementNode child2)", "Type": "ElementNode" @@ -109,8 +109,8 @@ "FirstChild": "(ElementNode child1)", "FormatSpecific": null, "LastChild": "(ElementNode child3)", - "NextSibling": "(nil)", - "Parent": "(nil)", - "PrevSibling": "(nil)", + "NextSibling": null, + "Parent": null, + "PrevSibling": null, "Type": "DocumentNode" } diff --git a/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_root_does_nothing b/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_root_does_nothing index 9aa24db..22aaf5d 100644 --- a/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_root_does_nothing +++ b/idr/.snapshots/TestRemoveNodeAndSubTree-remove_a_root_does_nothing @@ -7,12 +7,12 @@ { "Children": null, "Data": "data 11", - "FirstChild": "(nil)", + "FirstChild": null, "FormatSpecific": null, - "LastChild": "(nil)", - "NextSibling": "(nil)", + "LastChild": null, + "NextSibling": null, "Parent": "(ElementNode grandChild11)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "TextNode" } ], @@ -22,7 +22,7 @@ "LastChild": "(TextNode 'data 11')", "NextSibling": "(ElementNode grandChild12)", "Parent": "(ElementNode child1)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "ElementNode" }, { @@ -30,12 +30,12 @@ { "Children": null, "Data": "data 12", - "FirstChild": "(nil)", + "FirstChild": null, "FormatSpecific": null, - "LastChild": "(nil)", - "NextSibling": "(nil)", + "LastChild": null, + "NextSibling": null, "Parent": "(ElementNode grandChild12)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "TextNode" } ], @@ -43,7 +43,7 @@ "FirstChild": "(TextNode 'data 12')", "FormatSpecific": null, "LastChild": "(TextNode 'data 12')", - "NextSibling": "(nil)", + "NextSibling": null, "Parent": "(ElementNode child1)", "PrevSibling": "(ElementNode grandChild11)", "Type": "ElementNode" @@ -55,7 +55,7 @@ "LastChild": "(ElementNode grandChild12)", "NextSibling": "(ElementNode child2)", "Parent": "(DocumentNode)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "ElementNode" }, { @@ -65,12 +65,12 @@ { "Children": null, "Data": "data 21", - "FirstChild": "(nil)", + "FirstChild": null, "FormatSpecific": null, - "LastChild": "(nil)", - "NextSibling": "(nil)", + "LastChild": null, + "NextSibling": null, "Parent": "(ElementNode grandChild21)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "TextNode" } ], @@ -78,9 +78,9 @@ "FirstChild": "(TextNode 'data 21')", "FormatSpecific": null, "LastChild": "(TextNode 'data 21')", - "NextSibling": "(nil)", + "NextSibling": null, "Parent": "(ElementNode child2)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "ElementNode" } ], @@ -100,12 +100,12 @@ { "Children": null, "Data": "attr 31", - "FirstChild": "(nil)", + "FirstChild": null, "FormatSpecific": null, - "LastChild": "(nil)", - "NextSibling": "(nil)", + "LastChild": null, + "NextSibling": null, "Parent": "(AttributeNode grandChild31)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "TextNode" } ], @@ -113,9 +113,9 @@ "FirstChild": "(TextNode 'attr 31')", "FormatSpecific": null, "LastChild": "(TextNode 'attr 31')", - "NextSibling": "(nil)", + "NextSibling": null, "Parent": "(ElementNode child3)", - "PrevSibling": "(nil)", + "PrevSibling": null, "Type": "AttributeNode" } ], @@ -123,7 +123,7 @@ "FirstChild": "(AttributeNode grandChild31)", "FormatSpecific": null, "LastChild": "(AttributeNode grandChild31)", - "NextSibling": "(nil)", + "NextSibling": null, "Parent": "(DocumentNode)", "PrevSibling": "(ElementNode child2)", "Type": "ElementNode" @@ -133,8 +133,8 @@ "FirstChild": "(ElementNode child1)", "FormatSpecific": null, "LastChild": "(ElementNode child3)", - "NextSibling": "(nil)", - "Parent": "(nil)", - "PrevSibling": "(nil)", + "NextSibling": null, + "Parent": null, + "PrevSibling": null, "Type": "DocumentNode" } diff --git a/idr/marshal1.go b/idr/marshal1.go index 1041d87..9d5305c 100644 --- a/idr/marshal1.go +++ b/idr/marshal1.go @@ -4,12 +4,13 @@ import ( "fmt" "github.com/jf-tech/go-corelib/jsons" + "github.com/jf-tech/go-corelib/strs" ) // j1NodePtrName returns a categorized name for a *Node pointer used in JSONify1 -func j1NodePtrName(n *Node) string { +func j1NodePtrName(n *Node) *string { if n == nil { - return "(nil)" + return nil } name := func(n *Node) string { if IsXML(n) && XMLSpecificOf(n).NamespacePrefix != "" { @@ -19,13 +20,13 @@ func j1NodePtrName(n *Node) string { } switch n.Type { case DocumentNode: - return fmt.Sprintf("(%s)", n.Type) + return strs.StrPtr(fmt.Sprintf("(%s)", n.Type)) case ElementNode, AttributeNode: - return fmt.Sprintf("(%s %s)", n.Type, name(n)) + return strs.StrPtr(fmt.Sprintf("(%s %s)", n.Type, name(n))) case TextNode: - return fmt.Sprintf("(%s '%s')", n.Type, n.Data) + return strs.StrPtr(fmt.Sprintf("(%s '%s')", n.Type, n.Data)) default: - return fmt.Sprintf("(unknown '%s')", n.Data) + return strs.StrPtr(fmt.Sprintf("(unknown '%s')", n.Data)) } } diff --git a/idr/marshal1_test.go b/idr/marshal1_test.go index cb3888d..0c3dc63 100644 --- a/idr/marshal1_test.go +++ b/idr/marshal1_test.go @@ -12,7 +12,7 @@ func TestJ1NodePtrName(t *testing.T) { n *Node expected string }{ - {name: "nil", n: nil, expected: "(nil)"}, + {name: "nil", n: nil, expected: ""}, {name: "root", n: CreateNode(DocumentNode, "test"), expected: "(DocumentNode)"}, {name: "elem w/o ns", n: CreateNode(ElementNode, "A"), expected: "(ElementNode A)"}, {name: "elem w/ ns", n: CreateXMLNode(ElementNode, "A", XMLSpecific{"ns", "uri://"}), expected: "(ElementNode ns:A)"}, @@ -21,7 +21,11 @@ func TestJ1NodePtrName(t *testing.T) { {name: "unknown", n: CreateNode(NodeType(99999), "what"), expected: "(unknown 'what')"}, } { t.Run(test.name, func(t *testing.T) { - assert.Equal(t, test.expected, j1NodePtrName(test.n)) + if test.expected == "" { + assert.Nil(t, j1NodePtrName(test.n)) + } else { + assert.Equal(t, test.expected, *j1NodePtrName(test.n)) + } }) } } From 3afe02e1eaff1df9d55b498b573294374fefaa08 Mon Sep 17 00:00:00 2001 From: jf-tech Date: Sat, 3 Oct 2020 07:23:49 +1300 Subject: [PATCH 3/4] a few more comments change --- idr/marshal1.go | 2 +- idr/node.go | 27 +++++++++++++++------------ 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/idr/marshal1.go b/idr/marshal1.go index 9d5305c..229e47c 100644 --- a/idr/marshal1.go +++ b/idr/marshal1.go @@ -7,7 +7,7 @@ import ( "github.com/jf-tech/go-corelib/strs" ) -// j1NodePtrName returns a categorized name for a *Node pointer used in JSONify1 +// j1NodePtrName returns a categorized name for a *Node pointer used in JSONify1. func j1NodePtrName(n *Node) *string { if n == nil { return nil diff --git a/idr/node.go b/idr/node.go index 018b2fc..1d66b20 100644 --- a/idr/node.go +++ b/idr/node.go @@ -5,17 +5,17 @@ import ( "strings" ) -// NodeType is the type of a Node. +// NodeType is the type of a Node in an IDR. type NodeType uint const ( - // DocumentNode is the root of Node tree. + // DocumentNode is the type of the root Node in an IDR tree. DocumentNode NodeType = iota - // ElementNode is an element. + // ElementNode is the type of an element Node in an IDR tree. ElementNode - // TextNode is the text content of a node. + // TextNode is the type of an text/data Node in an IDR tree. TextNode - // AttributeNode is an attribute of element. + // AttributeNode is the type of an attribute Node in an IDR tree. AttributeNode ) @@ -35,11 +35,14 @@ func (nt NodeType) String() string { } } -// Node represents a node of element/data in an IDR ingested and created by the parser. +// Node represents a node of element/data in an IDR (intermediate data representation) ingested and created +// by the omniparser. // Credit: this is by and large a copy and some adaptation from // https://github.com/antchfx/xmlquery/blob/master/node.go. The reasons we want to have our own struct: // - more stability -// - one struct to represent XML/JSON/EDI/CSV/txt/etc. Vs antchfx's work have one struct for each format. +// - one struct to represent XML/JSON/EDI/CSV/txt/etc. Vs antchfx's work have one struct (in each repo) +// for each format. +// - Node allocation recycling. type Node struct { Parent, FirstChild, LastChild, PrevSibling, NextSibling *Node @@ -49,7 +52,7 @@ type Node struct { FormatSpecific interface{} } -// CreateNode creates a generic *Node +// CreateNode creates a generic *Node. func CreateNode(ntype NodeType, data string) *Node { return &Node{ Type: ntype, @@ -57,8 +60,8 @@ func CreateNode(ntype NodeType, data string) *Node { } } -// InnerText returns a Node's children's texts all concatenated. -// Note in an XML Node tree, any AttributeNode's text will be ignored. +// InnerText returns a Node's children's texts concatenated. +// Note (in an XML IDR tree) none of the AttributeNode's text will be included. func (n *Node) InnerText() string { var s strings.Builder var captureText func(*Node) @@ -92,8 +95,8 @@ func AddChild(parent, n *Node) { parent.LastChild = n } -// RemoveFromTree removes a node and its subtree from the IDR -// tree it is in. If the node is the root of the tree, then it's no-op. +// RemoveFromTree removes a node and its subtree from an IDR +// tree it is in. If the node is the root of the tree, it's a no-op. func RemoveFromTree(n *Node) { if n.Parent == nil { return From 2550eab912f0ebd927b374749aa63eaa273cf378 Mon Sep 17 00:00:00 2001 From: jf-tech Date: Sat, 3 Oct 2020 07:32:07 +1300 Subject: [PATCH 4/4] more readme editing --- idr/README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/idr/README.md b/idr/README.md index 243a963..0e5d126 100644 --- a/idr/README.md +++ b/idr/README.md @@ -28,13 +28,13 @@ The first part of a `Node` contains the input format agnostic fields, such as tr part of a `Node` is format specific data blob. The blob not only offers a place to store format specific data it also gives IDR code and algorithms a hint on what input format the `Node` is about. -Below we'll go through each input format we support and show how its corresponding IDR looks like. +Below we'll go through each input format we support and show what its corresponding IDR looks like. ## XML -Since XML is the most complex input format we'll deal with by IDR. Let's cover it first. +Since XML is the most complex input format we have for IDR, let's cover it first. -Let's take a look a simple example of XML (from [this sample](../samples/omniv2/xml/1_datetime_parse_and_format.input.xml)): +Here is a simple XML (from [this sample](../samples/omniv2/xml/1_datetime_parse_and_format.input.xml)): ``` 2020/09/22 @@ -74,7 +74,7 @@ which we'll see in the [next example](../samples/omniv2/xml/2_multiple_objects.i ``` In this example, we'll see how IDR deals with XML namespaces, as well as attributes. -The IDR represents the example above looks like the following (note those "dummy" text nodes sprinkled +The IDR for the example above looks like the following (note those "dummy" text nodes sprinkled in between element nodes are omitted here for clarity; also not including empty `XMLSpecific`): ``` Node(Type: DocumentNode) @@ -86,12 +86,12 @@ Node(Type: DocumentNode) Node(Type: ElementNode, Data: "author") Node(Type: TextNode, Data: "J. K. Rowling") ``` -Both `Node`'s represent `` and `` include non-empty `XMLSpecific`'s which -contain their namespace prefixes and full URIs while their `Node.Data` contains the element name without +Both `Node`'s representing `` and `` include non-empty `XMLSpecific`'s which +contain their namespace prefixes and full URIs while their `Node.Data` contain the element names without the namespace prefixes. -Note XML attributes on elements are represented as `Node`'s as well, `Type: AttributeNode` specifically. -If an attribute is namespace prefixed, the `AttributeNode` typed `Node` will have a non-empty -`XMLSpecific` set as well. An attribute's value is placed as a `TextNode` underneath its `ElementNode`. +Note XML attributes on elements are represented as `Node`'s as well, with `Type: AttributeNode` +specifically. If an attribute is namespace-prefixed, the `AttributeNode` typed `Node` will have a non-empty +`XMLSpecific` set as well. An attribute's actual value is placed as a `TextNode` underneath its `ElementNode`. `AttributeNode`'s are guaranteed to be placed before any other child nodes (`TextNode`, or `ElementNode`) by IDR's XML reader.