From c984ec3bae04648f4db28eed59c4c96e1e72acfc Mon Sep 17 00:00:00 2001 From: Hans Van Akelyen Date: Tue, 19 Jul 2022 15:18:56 +0200 Subject: [PATCH 1/3] HOP-4038: update json docs and make example more clear --- .../main/java/org/apache/hop/core/Const.java | 2 +- .../pages/pipeline/transforms/jsoninput.adoc | 62 +++++++++++-------- .../modules/ROOT/pages/variables.adoc | 1 + 3 files changed, 39 insertions(+), 26 deletions(-) diff --git a/core/src/main/java/org/apache/hop/core/Const.java b/core/src/main/java/org/apache/hop/core/Const.java index 4ac4e3ff534..735333c2e5c 100644 --- a/core/src/main/java/org/apache/hop/core/Const.java +++ b/core/src/main/java/org/apache/hop/core/Const.java @@ -853,7 +853,7 @@ public static final boolean isUsingSimpleStackTraces() { */ @Variable(value = "N", description = - "Name of te variable tset so that Nulls are considered while parsing JSON files. If HOP_JSON_INPUT_INCLUDE_NULLS is \"Y\" then nulls will be included otherwise they will not (default behavior)") + "Name of te variable to set so that Nulls are considered while parsing JSON files. If HOP_JSON_INPUT_INCLUDE_NULLS is \"Y\" then nulls will be included otherwise they will not be included (default behavior)") public static final String HOP_JSON_INPUT_INCLUDE_NULLS = "HOP_JSON_INPUT_INCLUDE_NULLS"; /** By default, HOP do not consider NULLS while parsing input */ public static final String JSON_INPUT_INCLUDE_NULLS = "N"; diff --git a/docs/hop-user-manual/modules/ROOT/pages/pipeline/transforms/jsoninput.adoc b/docs/hop-user-manual/modules/ROOT/pages/pipeline/transforms/jsoninput.adoc index 454a4f353ca..16124704225 100644 --- a/docs/hop-user-manual/modules/ROOT/pages/pipeline/transforms/jsoninput.adoc +++ b/docs/hop-user-manual/modules/ROOT/pages/pipeline/transforms/jsoninput.adoc @@ -146,53 +146,65 @@ Normally, while processing input JSON files, if a field contains null then the w For example if we have a JSON file like this ```json { - "id": "123456", - "testArray": [ + "persons" : [ { - "id": "1654879", - "Name": null, - "testArray_inner_one": [ - { - "id": "15697", - "Name": "Robert" - } - ] + "id": "1", + "name": "name 1" }, { - "id": "888", - "Name": "Robert222", - "testArray_inner_two": [ - { - "id": "4309", - "Name": null - } - ] + "id": "2", + "name": "name 2" + }, + { + "id": "3", + "name": null + }, + { + "id": "4", + "name": "name 4" } ] } ``` +When extracting the fields id and Name using the following field definition: +[%header] +|=== +|Field name|Json Path +|id| `$.persons.*.id` +|Name| `$.persons.*.name` +|=== + given the default behavior, the output will be ``` -id;ResourceName -123456;Robert222 +id;Name +1;Name 1 +2;Name 2 +3;null +4;Name 4 ``` -whereas you may prefer it to be +Now let's only select the `name` field and see what happens ``` -id;ResourceName -123456; -123456;Robert222 +Name +Name 1 +Name 2 +Name 4 ``` -To change Hop's behavior regarding Null values, add a new configuration variable: +You will notice that you only have 3 rowst retuned in this case ( the null line is omited from the result) + + +To change Hop's behavior regarding Null values in Json files, change the following configuration variable: ``` HOP_JSON_INPUT_INCLUDE_NULLS = Y ``` +When restarting and re-running the pipeline you will have 4 result rows containing the null value. + == Metadata Injection Support All fields of this transform support metadata injection. diff --git a/docs/hop-user-manual/modules/ROOT/pages/variables.adoc b/docs/hop-user-manual/modules/ROOT/pages/variables.adoc index 0ffea92d48b..049e3861516 100644 --- a/docs/hop-user-manual/modules/ROOT/pages/variables.adoc +++ b/docs/hop-user-manual/modules/ROOT/pages/variables.adoc @@ -228,6 +228,7 @@ Hostname lookup is performed by Hop so that it is capable of logging the server Set to 0 to keep all snapshots indefinitely (default) |HOP_USE_NATIVE_FILE_DIALOG|N|Set this value to Y if you want to use the system file open/save dialog when browsing files |NEO4J_LOGGING_CONNECTION||Set this variable to the name of an existing Neo4j connection to enable execution logging to a Neo4j database. +|HOP_JSON_INPUT_INCLUDE_NULLS|N|Name of te variable to set so that Nulls are considered while parsing JSON files. If HOP_JSON_INPUT_INCLUDE_NULLS is "Y" then nulls will be included otherwise they will not be included (default behavior) |=== == Environment variables From c453d6978864d7cafb910a55a87978ebb41f32f5 Mon Sep 17 00:00:00 2001 From: Hans Van Akelyen Date: Tue, 19 Jul 2022 16:30:49 +0200 Subject: [PATCH 2/3] HOP-4039: reformat and write jsonoutput documentation --- .../pages/pipeline/transforms/jsonoutput.adoc | 244 ++++++++---------- 1 file changed, 103 insertions(+), 141 deletions(-) diff --git a/docs/hop-user-manual/modules/ROOT/pages/pipeline/transforms/jsonoutput.adoc b/docs/hop-user-manual/modules/ROOT/pages/pipeline/transforms/jsonoutput.adoc index e1b54190309..39d080f7f31 100644 --- a/docs/hop-user-manual/modules/ROOT/pages/pipeline/transforms/jsonoutput.adoc +++ b/docs/hop-user-manual/modules/ROOT/pages/pipeline/transforms/jsonoutput.adoc @@ -33,7 +33,8 @@ Output JSON will be available as a javascript array or a javascript object depen General tab allows to specify type of transform operation, output json structure, transform output file. This file will be used to dump all generated json. -[width="90%",options="header"] +==== Settings Section +[width="90%",options="header", cols="1a,3a"] |=== |Option|Description |Transform name|Name of the transform; this name has to be unique in a single pipeline. @@ -45,18 +46,62 @@ Currently available 3 types of operation: 3. Output value and write to file - dump to file and pass generated json as a transform output file |Json block name|This value will be used as a name for json block. -Can be empty string that will affect output json structure, see detailed description below. -|Nr. rows in a block|Number of json block key - value pairs. -NOTE, 1 is a special values, in case of 1 every output will be generated as one object. -See description below. -|Output value|This value will be used as a transform output field. -Will contain generated json output block depending on transform settings. -|Compatibility mode|Changes the default fixed mode json structure +|Nr. rows in a block|Number of rows that are combined as one JSON Array + + +NOTE: A new file will be generated for each block + +|Output value|Output field name +|Compatibility mode|Mode to be backwards compatible, more information <> +|=== + +==== Output File Section + +[width="90%",options="header", cols="1a,3a"] +|=== +|Option|Description +|Filename|full path to output file +|Append|When checked new rows will be appended to the existing files +|Create Parent folder|When checked the parent folders will be created when they do not exist, else the transform will fail if the folder does not exist +|Do not open create at start|If not checked: + +file (and in some cases parent folder) will be created/opened to write during pipeline initialization. + +If checked: + +file and parent folder will be created only after transform will get any first input data. +|Extension|Output file extension. +Default value is 'json' +|Encoding|Output file encoding +|Pass output to servlet|Enable this option to return the data via a web service instead writing into a file. +|Include date in filename?|If checked - output file name will contains File name value + current date. +This may help to generate unique output files. +|Include time in filename|If checked - output file name will contains file creation time. +Same as for 'Include date in filename' option +|Show filename(s) button|Can be useful to test full output file path +|Add file to result filenames?|If checked - created output file path will be accessible form transform result +|=== + +=== Fields Tab + +This tab is used to map input transform fields to output json values + +[width="90%",options="header"] +|=== +|Option|Description +|Element name|Json element name as a key. +For example "A":"B" - A is a element name, B is actual input value mapped for this Element name. +|Fieldname|Input transform field name. +Use 'Get Fields' button to discover available input fields |=== -**Compatibility mode** +== Metadata Injection Support + +All fields of this transform support metadata injection. +You can use this transform with ETL Metadata Injection to pass metadata to your pipeline at runtime. + +[[compat-mode]] +== Compatibility mode +This part of the documentation will explain the differences when turning on compatibility mode. For all new development this mode is **not** recommended. +Imagine we are generating a simple list with keynames "name" and "value" and there corresponding values. -By default this transform uses the fixed structure mode, consider the Json Output transform has the following settings: +We will be using following settings: * Json block name = "data" * Nr rows in block = 3 @@ -64,23 +109,31 @@ By default this transform uses the fixed structure mode, consider the Json Outpu This will output: +First file: + [source,json] ---- { "data" : [ { - "name" : "item", - "value" : 25 + "name" : "item 1", + "value" : "value 1" }, { - "name" : "item", - "value" : 25 + "name" : "item 2", + "value" : "value 2" }, { - "name" : "item", - "value" : 25 + "name" : "item 3", + "value" : "value 3" } ] -}{ +} +---- +Second file: + +[source,json] +---- +{ "data" : [ { - "name" : "item", - "value" : 25 + "name" : "item 4", + "value" : "value 4" } ] } ---- @@ -93,136 +146,45 @@ If compatibility mode is enabled and the transform has the following settings: This will output: -[source,json] ----- -{"data":[{"name":"item"},{"value":25},{"name":"item"},{"value":25},{"name":"item"},{"value":25}]} -{"data":[{"name":"item"},{"value":25}]} ----- - -Pretty formatting does not affect compatibility mode. -We have 2 output json objects. -First object harvest first 3 input rows and second object harvests only one row. -This happens because of number of rows in a block is 3. Anyway it can be considered as incorrect result, as the real object count for array is 6 for the first output object. -By default compatibility mode is disabled. - -If 'Json block name' is an empty string (by default it has 'data' value) - compatibility mode will use empty string for block name. -Normally - if compatibility mode was not checked, transform output will be: - -[source,json] ----- -[ { - "name" : "item", - "value" : 25 -}, { - "name" : "item", - "value" : 25 -}, { - "name" : "item", - "value" : 25 -} ][ { - "name" : "item", - "value" : 25 -} ] ----- - -We will have just 4 simple json objects that will be outputted as a 4 transform output rows. - -In case of json block name is defined - output structure will looks like: - +First file: [source,json] ---- { - "data" : { - "name" : "item", - "value" : 25 - } -}{ - "data" : { - "name" : "item", - "value" : 25 - } -}{ - "data" : { - "name" : "item", - "value" : 25 - } -}{ - "data" : { - "name" : "item", - "value" : 25 - } + "data": [ + { + "name": "item 1" + }, + { + "value": "value 1" + }, + { + "name": "item 2" + }, + { + "value": "value 2" + }, + { + "name": "item 3" + }, + { + "value": "value 3" + } + ] } ---- - -So this is will be same 4 output objects with json block name defined. - -If 'Nr. rows in a block' will be less that 1 output will be as a one object: - +Second file: [source,json] ---- { - "data" : [ { - "name" : "item", - "value" : 25 - }, { - "name" : "item", - "value" : 25 - }, { - "name" : "item", - "value" : 25 - }, { - "name" : "item", - "value" : 25 - } ] + "data": [ + { + "name": "item 4" + }, + { + "value": "value 4" + } + ] } ---- -This will be one object (one output row) with data block containing json array with 4 objects (as we had 4 input data rows). -Please note - when using 0 'Nr. rows in a block' transform will build output object until input data is available. -When input is done - one big output object will be passed to output row. -For big input data it can impact memory usage. - -=== Output File - -[width="90%",options="header"] -|=== -|Option|Description -|Filename|full path to output file -|Append|If not checked new file will be created every time transform is running. -If file with specified name already existed - it will be replaced by a new one. -If checked - new json output will be appended at the end of existing file. -Or if existing file is not exists - it will be created as in previous case. -|Create Parent folder|Usually file name contains some path folder as a parent folder. -If parent folder does not exists and this option is checked - parent folder will be created as a new folder. -Otherwise - file not be found and transform will fail. -|Do not open create at start|If not checked - file (and in some cases parent folder) will be created/opened to write during pipeline initialization. -If checked - file and parent folder will be created only after transform will get any first input data. -|Extension|Output file extension. -Default value is 'js' -|Encoding|Output file encoding -|Pass output to servlet|Enable this option to return the data via a web service instead writing into a file. -|Include date in filename?|If checked - output file name will contains File name value + current date. -This may help to generate unique output files. -|Include time in filename|If checked - output file name will contains file creation time. -Same as for 'Include date in filename' option -|Show filename(s) button|Can be useful to test full output file path -|Add file to result filenames?|If checked - created output file path will be accessible form transform result -|=== - -=== Fields Tab - -This tab is used to map input transform fields to output json values - -[width="90%",options="header"] -|=== -|Option|Description -|Element name|Json element name as a key. -For example "A":"B" - A is a element name, B is actual input value mapped for this Element name. -|Fieldname|Input transform field name. -Use 'Get Fields' button to discover available input fields -|=== - -== Metadata Injection Support - -All fields of this transform support metadata injection. -You can use this transform with ETL Metadata Injection to pass metadata to your pipeline at runtime. \ No newline at end of file +As you can see when turning compatibility mode on, each field will be handles as a separate object. \ No newline at end of file From 999959d49276a441f8bfe8fdef130939139edf76 Mon Sep 17 00:00:00 2001 From: Hans Van Akelyen Date: Tue, 19 Jul 2022 21:05:38 +0200 Subject: [PATCH 3/3] HOP-4041: nest the tab info --- .../transforms/userdefinedjavaclass.adoc | 36 ++++++++++--------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/docs/hop-user-manual/modules/ROOT/pages/pipeline/transforms/userdefinedjavaclass.adoc b/docs/hop-user-manual/modules/ROOT/pages/pipeline/transforms/userdefinedjavaclass.adoc index bb6820132d2..10b60248fc9 100644 --- a/docs/hop-user-manual/modules/ROOT/pages/pipeline/transforms/userdefinedjavaclass.adoc +++ b/docs/hop-user-manual/modules/ROOT/pages/pipeline/transforms/userdefinedjavaclass.adoc @@ -36,28 +36,32 @@ For this we use the https://janino-compiler.github.io/janino/[Janino^] project l == Options -[width="90%",options="header"] +[width="90%",options="header",cols="1a,3a"] |=== |Option|Description |Transform name|Name of the transform. |Class code|The Java code. |Fields|List of output fields. -|Fieldname|Output field name. -|Type|Type of field. -|Length|Length of the field. -|Precision|Precision of the field. + +- Fieldname: Output field name. +- Type: Type of field. +- Length: Length of the field. +- Precision: Precision of the field. |Parameters|You can use the Parameters table to avoid using hard-coded string values, such as field names (customer for example). -|Tag|The parameter tag. -|Value|The parameter value. -|Description|Description of the parameter. -|Info transforms| -|Tag| -|Transform|Which transform to read from. -|Description| -|Target transforms| -|Tag| -|Transform|Which transform to output to. -|Description| + +- Tag: The parameter tag. +- Value: The parameter value. +- Description: Description of the parameter. +|Info transforms|Additional transforms to read data from + +- Tag +- Transform: Which transform to read from. +- Description +|Target transforms|Destination Transform + +- Tag +- Transform: Which transform to output to. +- Description |Test class|Tests the class. |===