From 99aa62831e8c0b79de6a5bb53e51480b32e498ee Mon Sep 17 00:00:00 2001 From: Nicolas Adment Date: Mon, 6 Jun 2022 15:24:28 +0200 Subject: [PATCH] HOP-3939 Support for checksum transformation for null values --- .../transforms/0015-add-checksum.hpl | 164 ++++++++++++++---- .../datasets/golden-add-checksum.csv | 3 + .../unit-test/0015-add-checksum UNIT.json | 25 +-- .../transforms/checksum/CheckSum.java | 55 ++++-- .../messages/messages_fr_FR.properties | 5 - 5 files changed, 189 insertions(+), 63 deletions(-) diff --git a/integration-tests/transforms/0015-add-checksum.hpl b/integration-tests/transforms/0015-add-checksum.hpl index 1a77061ba0a..6ebd3cab73c 100644 --- a/integration-tests/transforms/0015-add-checksum.hpl +++ b/integration-tests/transforms/0015-add-checksum.hpl @@ -50,21 +50,11 @@ limitations under the License. mod Y - - mod - CRC32 - Y - CRC32 addler32 Y - - addler32 - md5 - Y - md5 sha1 @@ -80,12 +70,56 @@ limitations under the License. sha384 Y + + addler32 + md5 + Y + + + Special cases null values + Append streams + Y + + + mod + Append streams + Y + + + Append streams + CRC32 + Y + sha384 sha512 Y + + sha512 + Dummy (do nothing) + Y + + + Append streams + Append + + Y + + 1 + + none + + + Special cases null values + mod + + + 208 + 352 + + CRC32 CheckSum @@ -110,8 +144,8 @@ limitations under the License. hexadecimal - 96 - 176 + 304 + 352 @@ -133,9 +167,64 @@ limitations under the License. 100 now + + 208 + 96 + + + + Special cases null values + DataGrid + + Y + + 1 + + none + + + + + + + N + + + -1 + id + -1 + Integer + + + + + N + + + -1 + mod + -1 + Integer + + + + + + 2 + + + 1 + + + + + + + + 96 - 80 + 352 @@ -162,8 +251,8 @@ limitations under the License. hexadecimal - 192 - 176 + 400 + 352 @@ -186,8 +275,8 @@ limitations under the License. id - 240 - 80 + 208 + 176 @@ -214,8 +303,8 @@ limitations under the License. hexadecimal - 288 - 176 + 496 + 352 @@ -251,8 +340,8 @@ limitations under the License. - 368 - 80 + 208 + 256 @@ -279,8 +368,8 @@ limitations under the License. hexadecimal - 384 - 176 + 592 + 352 @@ -307,8 +396,8 @@ limitations under the License. hexadecimal - 480 - 176 + 688 + 352 @@ -335,8 +424,8 @@ limitations under the License. hexadecimal - 576 - 176 + 784 + 352 @@ -363,8 +452,25 @@ limitations under the License. hexadecimal - 672 - 176 + 880 + 352 + + + + Dummy (do nothing) + Dummy + + Y + + 1 + + none + + + + + 992 + 352 diff --git a/integration-tests/transforms/datasets/golden-add-checksum.csv b/integration-tests/transforms/datasets/golden-add-checksum.csv index 97ff339e17b..ff09221d1e7 100644 --- a/integration-tests/transforms/datasets/golden-add-checksum.csv +++ b/integration-tests/transforms/datasets/golden-add-checksum.csv @@ -1,4 +1,7 @@ id,mod,crc32,addler32,md5,sha1,sha256,sha384,sha512 +,2,450215437,3342387,c81e728d9d4c2f636f067f89cc14862c,da4b9237bacccdf19c0760cab7aec4a8359010b0,d4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f90da3a666eec13ab35,d063457705d66d6f016e4cdd747db3af8d70ebfd36badd63de6c8ca4a9d8bfb5d874e7fbd750aa804dcaddae7eeef51e,40b244112641dd78dd4f93b6c9190dd46e0099194d5a44257b7efad6ef9ff4683da1eda0244448cb343aa688f5d3efd7314dafe580ac0bcbf115aeca9e8dc114 +1,,2212294583,3276850,c4ca4238a0b923820dcc509a6f75849b,356a192b7913b04c54574d18c28d46e6395428ab,6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b,47f05d367b0c32e438fb63e6cf4a5f35c2aa2f90dc7543f8a41a0f95ce8a40a313ab5cf36134a2068c4c969cb50db776,4dff4ea340f0a823f15d3f4f01ab62eae0e5da579ccb851f8db9dfe84c58b2b37b89903a740e1ee172da793a6e79d560e5f7f9bd058a12a280433ed6fa46510a +,,,,,,,, 1,1,3596227959,9764963,6512bd43d9caa6e02c990b0a82652dca,17ba0791499db908433b80f37c5fbc89b870084b,4fc82b26aecb47d2868c4efbe3581732a3e7cbcc6c2efb32062c08170a05eeb8,9b20aa6472eef4fd186d231637b1c1d55a5a434cc9130d6afcaaf486253a20c23a4eaeea419594c17f46bc53c7cee12e,74a49c698dbd3c12e36b0b287447d833f74f3937ff132ebff7054baa18623c35a705bb18b82e2ac0384b5127db97016e63609f712bc90e3506cfbea97599f46f 2,2,1685985038,9961573,b6d767d2f8ed5d21a44b0e5886680cb9,12c6fc06c99a462375eeb3f43dfd832b08ca9e17,785f3ec7eb32f30b90cd0fcf3657d388b5ff4297f2f9716ff66e9b69c05ddd09,1ba40d8a5dcd0f2f0071687f3253f59780a582305a0cee1a49a56a4736dce4fc8af88372c79393a3a569aeda0c15959d,6ad275d26c200e81534d9996183c8748ddfabc7b0a011a90f46301626d709923474703cacab0ff8b67cd846b6cb55b23a39b03fbdfb5218eec3373cf7010a166 3,0,2473281379,9961572,34173cb38f07f89ddbebc2ac9128303f,22d200f8670dbdb3e253a90eee5098477c95c23d,624b60c58c9d8bfb6ff1886c2fd605d2adeb6ea4da576068201b6c6958ce93f4,32f5039553078543bf8748756a64c8b02338afbc1ee3c70dde5988760c3b8833e0e3c830fea5b65f08cb803842eb6ed6,1ccbff33e55627a50beca8cf5c89f77c3165dcb3218171308423f250f0bb0be9700bbfdd92d35dfa2e579110266a40194d707b50e7d27b6f09b81fbbf80231a3 diff --git a/integration-tests/transforms/metadata/unit-test/0015-add-checksum UNIT.json b/integration-tests/transforms/metadata/unit-test/0015-add-checksum UNIT.json index 33cde6ed29e..4f2c9328432 100644 --- a/integration-tests/transforms/metadata/unit-test/0015-add-checksum UNIT.json +++ b/integration-tests/transforms/metadata/unit-test/0015-add-checksum UNIT.json @@ -7,25 +7,25 @@ { "field_mappings": [ { - "transform_field": "addler32", - "data_set_field": "addler32" + "transform_field": "id", + "data_set_field": "id" + }, + { + "transform_field": "mod", + "data_set_field": "mod" }, { "transform_field": "crc32", "data_set_field": "crc32" }, { - "transform_field": "id", - "data_set_field": "id" + "transform_field": "addler32", + "data_set_field": "addler32" }, { "transform_field": "md5", "data_set_field": "md5" }, - { - "transform_field": "mod", - "data_set_field": "mod" - }, { "transform_field": "sha1", "data_set_field": "sha1" @@ -37,12 +37,17 @@ { "transform_field": "sha384", "data_set_field": "sha384" + }, + { + "transform_field": "sha512", + "data_set_field": "sha512" } ], "field_order": [ - "id" + "id", + "mod" ], - "transform_name": "sha512", + "transform_name": "Dummy (do nothing)", "data_set_name": "golden-add-checksum" } ], diff --git a/plugins/transforms/checksum/src/main/java/org/apache/hop/pipeline/transforms/checksum/CheckSum.java b/plugins/transforms/checksum/src/main/java/org/apache/hop/pipeline/transforms/checksum/CheckSum.java index 6614e99d7f1..17c2e776663 100644 --- a/plugins/transforms/checksum/src/main/java/org/apache/hop/pipeline/transforms/checksum/CheckSum.java +++ b/plugins/transforms/checksum/src/main/java/org/apache/hop/pipeline/transforms/checksum/CheckSum.java @@ -19,6 +19,7 @@ import org.apache.commons.codec.binary.Hex; import org.apache.hop.core.exception.HopException; +import org.apache.hop.core.row.IValueMeta; import org.apache.hop.core.row.RowDataUtil; import org.apache.hop.core.util.Utils; import org.apache.hop.i18n.BaseMessages; @@ -108,7 +109,6 @@ public boolean processRow() throws HopException { outputRowData = RowDataUtil.addValueData(r, data.nrInfields, checksum); } else { // get checksum - byte[] o = createCheckSum(r); switch (meta.getResultType()) { @@ -116,12 +116,13 @@ public boolean processRow() throws HopException { outputRowData = RowDataUtil.addValueData(r, data.nrInfields, o); break; case HEXADECIMAL: - String hex = new String(Hex.encodeHex(o)); + String hex = (o == null) ? null : Hex.encodeHexString(o); outputRowData = RowDataUtil.addValueData(r, data.nrInfields, hex); break; case STRING: default: - outputRowData = RowDataUtil.addValueData(r, data.nrInfields, getStringFromBytes(o)); + String str = (o == null) ? null : getStringFromBytes(o); + outputRowData = RowDataUtil.addValueData(r, data.nrInfields, str); break; } } @@ -162,18 +163,25 @@ private byte[] createCheckSum(Object[] r) throws Exception { // Loop through fields for (int i = 0; i < data.fieldnr; i++) { - if (getInputRowMeta().getValueMeta(data.fieldnrs[i]).isBinary()) { - baos.write(getInputRowMeta().getBinary(r, data.fieldnrs[i])); + IValueMeta valueMeta = getInputRowMeta().getValueMeta(data.fieldnrs[i]); + if (valueMeta.isBinary()) { + byte[] bytes = getInputRowMeta().getBinary(r, data.fieldnrs[i]); + if (bytes != null) { + baos.write(bytes); + } } else { - baos.write( - getInputRowMeta() - .getValueMeta(data.fieldnrs[i]) - .getNativeDataType(r[data.fieldnrs[i]]) - .toString() - .getBytes()); + Object value = valueMeta.getNativeDataType(r[data.fieldnrs[i]]); + if (value != null) { + baos.write(value.toString().getBytes()); + } } } + // Return null when all input values are null. + if (baos.size() == 0) { + return null; + } + // Updates the digest using the specified array of bytes data.digest.update(baos.toByteArray()); @@ -204,17 +212,26 @@ private Long calculCheckSum(Object[] r) throws Exception { // Loop through fields for (int i = 0; i < data.fieldnr; i++) { - if (getInputRowMeta().getValueMeta(data.fieldnrs[i]).isBinary()) { - baos.write(getInputRowMeta().getBinary(r, data.fieldnrs[i])); + IValueMeta valueMeta = getInputRowMeta().getValueMeta(data.fieldnrs[i]); + + if (valueMeta.isBinary()) { + byte[] bytes = getInputRowMeta().getBinary(r, data.fieldnrs[i]); + if (bytes != null) { + baos.write(bytes); + } } else { - baos.write( - getInputRowMeta() - .getValueMeta(data.fieldnrs[i]) - .getNativeDataType(r[data.fieldnrs[i]]) - .toString() - .getBytes()); + Object value = valueMeta.getNativeDataType(r[data.fieldnrs[i]]); + if (value != null) { + baos.write(value.toString().getBytes()); + } } } + + // Return null when all input values are null. + if (baos.size() == 0) { + return null; + } + byteArray = baos.toByteArray(); if (meta.getCheckSumType() == CheckSumMeta.CheckSumType.CRC32) { diff --git a/plugins/transforms/checksum/src/main/resources/org/apache/hop/pipeline/transforms/checksum/messages/messages_fr_FR.properties b/plugins/transforms/checksum/src/main/resources/org/apache/hop/pipeline/transforms/checksum/messages/messages_fr_FR.properties index e0e30dbc1ef..50518ff4782 100644 --- a/plugins/transforms/checksum/src/main/resources/org/apache/hop/pipeline/transforms/checksum/messages/messages_fr_FR.properties +++ b/plugins/transforms/checksum/src/main/resources/org/apache/hop/pipeline/transforms/checksum/messages/messages_fr_FR.properties @@ -36,11 +36,6 @@ CheckSumMeta.CheckResult.TransformRecevingData=La transformation est connect\u00 CheckSumMeta.ResultType.Binary=Octets CheckSumMeta.ResultType.Hexadecimal=Hexad\u00E9cimal CheckSumMeta.ResultType.String=Cha\u00EEne de caract\u00E8res -CheckSumMeta.Type.ADLER32=Adler 32 -CheckSumMeta.Type.CRC32=CRC 32 -CheckSumMeta.Type.MD5=MD5 -CheckSumMeta.Type.SHA1=SHA 1 -CheckSumMeta.Type.SHA256=SHA 256 CheckSumDialog.Fieldname.Column=Champ CheckSumDialog.Fields.Label=Champs utilis\u00E9s dans le calcul CheckSumDialog.Result.Label=Champ r\u00E9sultat