From fcd35556d62a951847c91c0b07017a837ad58499 Mon Sep 17 00:00:00 2001 From: dennis Date: Tue, 16 Jun 2020 14:46:47 +0200 Subject: [PATCH 01/11] -Implements csv converter methods -Adds Tests --- src/main/java/edu/ie3/util/StringUtils.java | 67 +++++++++++++++++ .../edu/ie3/util/StringUtilsTest.groovy | 73 +++++++++++++++++++ 2 files changed, 140 insertions(+) diff --git a/src/main/java/edu/ie3/util/StringUtils.java b/src/main/java/edu/ie3/util/StringUtils.java index e65a8ee8..8348113b 100644 --- a/src/main/java/edu/ie3/util/StringUtils.java +++ b/src/main/java/edu/ie3/util/StringUtils.java @@ -6,6 +6,8 @@ package edu.ie3.util; import java.util.Arrays; +import java.util.LinkedHashMap; +import java.util.Map; /** Some useful functions to manipulate Strings */ public class StringUtils { @@ -102,4 +104,69 @@ public static String[] quote(String[] input) { public static String cleanString(String input) { return input.replaceAll("[^\\w]", "_"); } + + /** + * Quotes header elements to predefine a valid CsvFileDefinition + * + * @param headerElements Array of csv header elements + * @param csvSep Csv separator to check if it appears within the header element + * @return Quoted header elements + */ + public static String[] quoteHeaderElements(String[] headerElements, String csvSep) { + for (int index = 0; index <= headerElements.length - 1; index++) { + if (headerElements[index].matches("(?:.*)\\{(?:.*)}") + || headerElements[index].contains(csvSep) + || headerElements[index].contains(",") + || headerElements[index].contains("\"") + || headerElements[index].contains("\n")) { + headerElements[index] = + headerElements[index] + .replaceAll("\"", "\"\"") + .replaceAll("^([^\"])", "\"$1") + .replaceAll("([^\"])$", "$1\""); + } + } + return headerElements; + } + + /** + * Quotes all fields that contain special characters to comply with the CSV specification RFC 4180 + * (https://tools.ietf.org/html/rfc4180) The " contained in the JSON strings are escaped with the + * same character to make the CSV data readable later + * + * @param entityFieldData LinkedHashMap containing all entityData + * @param csvSep Csv separator to check if it appears within the data + * @return LinkedHashMap containing all entityData with the relevant data quoted + */ + public static LinkedHashMap quoteCSVStrings( + LinkedHashMap entityFieldData, String csvSep) { + LinkedHashMap quotedEntityFieldData = new LinkedHashMap<>(); + for (Map.Entry entry : entityFieldData.entrySet()) { + String key = entry.getKey(); + String value = entry.getValue(); + if (key.matches("(?:.*)\\{(?:.*)}") + || key.contains(csvSep) + || key.contains(",") + || key.contains("\"") + || key.contains("\n")) { + key = + key.replaceAll("\"", "\"\"") + .replaceAll("^([^\"])", "\"$1") + .replaceAll("([^\"])$", "$1\""); + } + if (value.matches("(?:.*)\\{(?:.*)}") + || value.contains(csvSep) + || value.contains(",") + || value.contains("\"") + || value.contains("\n")) { + value = + value + .replaceAll("\"", "\"\"") + .replaceAll("^([^\"])", "\"$1") + .replaceAll("([^\"])$", "$1\""); + } + quotedEntityFieldData.put(key, value); + } + return quotedEntityFieldData; + } } diff --git a/src/test/groovy/edu/ie3/util/StringUtilsTest.groovy b/src/test/groovy/edu/ie3/util/StringUtilsTest.groovy index f6ebc246..32a967e3 100644 --- a/src/test/groovy/edu/ie3/util/StringUtilsTest.groovy +++ b/src/test/groovy/edu/ie3/util/StringUtilsTest.groovy @@ -211,4 +211,77 @@ class StringUtilsTest extends Specification { "?ab123" || "_ab123" "ßab123" || "_ab123" } + + def "The StringUtils converts a given Array of csv header elements to match the csv specification RFC 4180 "() { + given: + def input = [ + "4ca90220-74c2-4369-9afa-a18bf068840d", + "{\"type\":\"Point\",\"coordinates\":[7.411111,51.492528],\"crs\":{\"type\":\"name\",\"properties\":{\"name\":\"EPSG:4326\"}}}", + "node_a", + "2020-03-25T15:11:31Z[UTC] \n 2020-03-24T15:11:31Z[UTC]", + "8f9682df-0744-4b58-a122-f0dc730f6510", + "true", + "1,0", + "1.0", + "Höchstspannung", + "380.0", + "olm:{(0.00,1.00)}", + "cosPhiP:{(0.0,1.0),(0.9,1.0),(1.2,-0.3)}"] as String[] + def expected = [ + "4ca90220-74c2-4369-9afa-a18bf068840d", + "\"{\"\"type\"\":\"\"Point\"\",\"\"coordinates\"\":[7.411111,51.492528],\"\"crs\"\":{\"\"type\"\":\"\"name\"\",\"\"properties\"\":{\"\"name\"\":\"\"EPSG:4326\"\"}}}\"", + "node_a", + "\"2020-03-25T15:11:31Z[UTC] \n 2020-03-24T15:11:31Z[UTC]\"", + "8f9682df-0744-4b58-a122-f0dc730f6510", + "true", + "\"1,0\"", + "1.0", + "Höchstspannung", + "380.0", + "\"olm:{(0.00,1.00)}\"", + "\"cosPhiP:{(0.0,1.0),(0.9,1.0),(1.2,-0.3)}\""] as String[] + + when: + def actual = StringUtils.quoteHeaderElements(input, ",") + + then: + actual == expected + } + + def "The StringUtils converts a given LinkedHashMap of csv data to match the csv specification RFC 4180 "() { + given: + def input = [ + activePowerGradient: "25.0", + capex : "100,0", + cosphiRated : "0.95", + etaConv : "98.0", + id : "test \n bmTypeInput", + opex : "50.0", + sRated : "25.0", + uuid : "5ebd8f7e-dedb-4017-bb86-6373c4b68eb8", + geoPosition : "{\"type\":\"Point\",\"coordinates\":[7.411111,51.492528],\"crs\":{\"type\":\"name\",\"properties\":{\"name\":\"EPSG:4326\"}}}", + olmcharacteristic : "olm:{(0.0,1.0)}", + cosPhiFixed : "cosPhiFixed:{(0.0,1.0)}" + ] as LinkedHashMap + + def expected = [ + activePowerGradient: "25.0", + capex : "\"100,0\"", + cosphiRated : "0.95", + etaConv : "98.0", + id : "\"test \n bmTypeInput\"", + opex : "50.0", + sRated : "25.0", + uuid : "5ebd8f7e-dedb-4017-bb86-6373c4b68eb8", + geoPosition : "\"{\"\"type\"\":\"\"Point\"\",\"\"coordinates\"\":[7.411111,51.492528],\"\"crs\"\":{\"\"type\"\":\"\"name\"\",\"\"properties\"\":{\"\"name\"\":\"\"EPSG:4326\"\"}}}\"", + olmcharacteristic : "\"olm:{(0.0,1.0)}\"", + cosPhiFixed : "\"cosPhiFixed:{(0.0,1.0)}\"" + ] as LinkedHashMap + + when: + def actual = StringUtils.quoteCSVStrings(input, ",") + + then: + actual == expected + } } From 979e2266edfb5153bcdf35f03014c19102633801 Mon Sep 17 00:00:00 2001 From: dennis Date: Tue, 16 Jun 2020 16:11:37 +0200 Subject: [PATCH 02/11] -Eliminates code smells --- src/main/java/edu/ie3/util/StringUtils.java | 28 +++++++----- .../edu/ie3/util/StringUtilsTest.groovy | 44 +++++++++---------- 2 files changed, 38 insertions(+), 34 deletions(-) diff --git a/src/main/java/edu/ie3/util/StringUtils.java b/src/main/java/edu/ie3/util/StringUtils.java index 8348113b..60107b47 100644 --- a/src/main/java/edu/ie3/util/StringUtils.java +++ b/src/main/java/edu/ie3/util/StringUtils.java @@ -15,6 +15,10 @@ private StringUtils() { throw new IllegalStateException("Utility classes cannot be instantiated."); } + private static final String jsonRegex = "(?:.*)\\{(?:.*)}"; + private static final String beginningOfStringRegex = "^([^\"])"; + private static final String endOfStringRegex = "([^\"])$"; + /** * Converts a given camel case string to its snake case representation * @@ -82,7 +86,7 @@ public static String[] camelCaseToSnakeCase(String[] input) { * @return Quoted String */ public static String quote(String input) { - return input.replaceAll("^([^\"])", "\"$1").replaceAll("([^\"])$", "$1\""); + return input.replaceAll(beginningOfStringRegex, "\"$1").replaceAll(endOfStringRegex, "$1\""); } /** @@ -114,7 +118,7 @@ public static String cleanString(String input) { */ public static String[] quoteHeaderElements(String[] headerElements, String csvSep) { for (int index = 0; index <= headerElements.length - 1; index++) { - if (headerElements[index].matches("(?:.*)\\{(?:.*)}") + if (headerElements[index].matches(jsonRegex) || headerElements[index].contains(csvSep) || headerElements[index].contains(",") || headerElements[index].contains("\"") @@ -122,8 +126,8 @@ public static String[] quoteHeaderElements(String[] headerElements, String csvSe headerElements[index] = headerElements[index] .replaceAll("\"", "\"\"") - .replaceAll("^([^\"])", "\"$1") - .replaceAll("([^\"])$", "$1\""); + .replaceAll(beginningOfStringRegex, "\"$1") + .replaceAll(endOfStringRegex, "$1\""); } } return headerElements; @@ -138,23 +142,23 @@ public static String[] quoteHeaderElements(String[] headerElements, String csvSe * @param csvSep Csv separator to check if it appears within the data * @return LinkedHashMap containing all entityData with the relevant data quoted */ - public static LinkedHashMap quoteCSVStrings( - LinkedHashMap entityFieldData, String csvSep) { + public static Map quoteCSVStrings( + Map entityFieldData, String csvSep) { LinkedHashMap quotedEntityFieldData = new LinkedHashMap<>(); for (Map.Entry entry : entityFieldData.entrySet()) { String key = entry.getKey(); String value = entry.getValue(); - if (key.matches("(?:.*)\\{(?:.*)}") + if (key.matches(jsonRegex) || key.contains(csvSep) || key.contains(",") || key.contains("\"") || key.contains("\n")) { key = key.replaceAll("\"", "\"\"") - .replaceAll("^([^\"])", "\"$1") - .replaceAll("([^\"])$", "$1\""); + .replaceAll(beginningOfStringRegex, "\"$1") + .replaceAll(endOfStringRegex, "$1\""); } - if (value.matches("(?:.*)\\{(?:.*)}") + if (value.matches(jsonRegex) || value.contains(csvSep) || value.contains(",") || value.contains("\"") @@ -162,8 +166,8 @@ public static LinkedHashMap quoteCSVStrings( value = value .replaceAll("\"", "\"\"") - .replaceAll("^([^\"])", "\"$1") - .replaceAll("([^\"])$", "$1\""); + .replaceAll(beginningOfStringRegex, "\"$1") + .replaceAll(endOfStringRegex, "$1\""); } quotedEntityFieldData.put(key, value); } diff --git a/src/test/groovy/edu/ie3/util/StringUtilsTest.groovy b/src/test/groovy/edu/ie3/util/StringUtilsTest.groovy index 32a967e3..0744959b 100644 --- a/src/test/groovy/edu/ie3/util/StringUtilsTest.groovy +++ b/src/test/groovy/edu/ie3/util/StringUtilsTest.groovy @@ -251,31 +251,31 @@ class StringUtilsTest extends Specification { def "The StringUtils converts a given LinkedHashMap of csv data to match the csv specification RFC 4180 "() { given: def input = [ - activePowerGradient: "25.0", - capex : "100,0", - cosphiRated : "0.95", - etaConv : "98.0", - id : "test \n bmTypeInput", - opex : "50.0", - sRated : "25.0", - uuid : "5ebd8f7e-dedb-4017-bb86-6373c4b68eb8", - geoPosition : "{\"type\":\"Point\",\"coordinates\":[7.411111,51.492528],\"crs\":{\"type\":\"name\",\"properties\":{\"name\":\"EPSG:4326\"}}}", - olmcharacteristic : "olm:{(0.0,1.0)}", - cosPhiFixed : "cosPhiFixed:{(0.0,1.0)}" + "activePowerGradient" : "25.0", + "capex" : "100,0", + "cosphiRated" : "0.95", + "etaConv" : "98.0", + "id" : "test \n bmTypeInput", + "opex" : "50.0", + "sRated" : "25.0", + "uu,id" : "5ebd8f7e-dedb-4017-bb86-6373c4b68eb8", + "geoPosition" : "{\"type\":\"Point\",\"coordinates\":[7.411111,51.492528],\"crs\":{\"type\":\"name\",\"properties\":{\"name\":\"EPSG:4326\"}}}", + "olm\"characteristic" : "olm:{(0.0,1.0)}", + "cosPhiFixed" : "cosPhiFixed:{(0.0,1.0)}" ] as LinkedHashMap def expected = [ - activePowerGradient: "25.0", - capex : "\"100,0\"", - cosphiRated : "0.95", - etaConv : "98.0", - id : "\"test \n bmTypeInput\"", - opex : "50.0", - sRated : "25.0", - uuid : "5ebd8f7e-dedb-4017-bb86-6373c4b68eb8", - geoPosition : "\"{\"\"type\"\":\"\"Point\"\",\"\"coordinates\"\":[7.411111,51.492528],\"\"crs\"\":{\"\"type\"\":\"\"name\"\",\"\"properties\"\":{\"\"name\"\":\"\"EPSG:4326\"\"}}}\"", - olmcharacteristic : "\"olm:{(0.0,1.0)}\"", - cosPhiFixed : "\"cosPhiFixed:{(0.0,1.0)}\"" + "activePowerGradient" : "25.0", + "capex" : "\"100,0\"", + "cosphiRated" : "0.95", + "etaConv" : "98.0", + "id" : "\"test \n bmTypeInput\"", + "opex" : "50.0", + "sRated" : "25.0", + "\"uu,id\"" : "5ebd8f7e-dedb-4017-bb86-6373c4b68eb8", + "geoPosition" : "\"{\"\"type\"\":\"\"Point\"\",\"\"coordinates\"\":[7.411111,51.492528],\"\"crs\"\":{\"\"type\"\":\"\"name\"\",\"\"properties\"\":{\"\"name\"\":\"\"EPSG:4326\"\"}}}\"", + "\"olm\"\"characteristic\"" : "\"olm:{(0.0,1.0)}\"", + "cosPhiFixed" : "\"cosPhiFixed:{(0.0,1.0)}\"" ] as LinkedHashMap when: From 7da06acf27fbec78c9ec8ce935f8cc6517d8b6a0 Mon Sep 17 00:00:00 2001 From: dennis Date: Tue, 16 Jun 2020 16:17:58 +0200 Subject: [PATCH 03/11] -Eliminates code smells --- src/main/java/edu/ie3/util/StringUtils.java | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/ie3/util/StringUtils.java b/src/main/java/edu/ie3/util/StringUtils.java index 60107b47..02552d2c 100644 --- a/src/main/java/edu/ie3/util/StringUtils.java +++ b/src/main/java/edu/ie3/util/StringUtils.java @@ -11,14 +11,15 @@ /** Some useful functions to manipulate Strings */ public class StringUtils { - private StringUtils() { - throw new IllegalStateException("Utility classes cannot be instantiated."); - } private static final String jsonRegex = "(?:.*)\\{(?:.*)}"; private static final String beginningOfStringRegex = "^([^\"])"; private static final String endOfStringRegex = "([^\"])$"; + private StringUtils() { + throw new IllegalStateException("Utility classes cannot be instantiated."); + } + /** * Converts a given camel case string to its snake case representation * @@ -142,8 +143,8 @@ public static String[] quoteHeaderElements(String[] headerElements, String csvSe * @param csvSep Csv separator to check if it appears within the data * @return LinkedHashMap containing all entityData with the relevant data quoted */ - public static Map quoteCSVStrings( - Map entityFieldData, String csvSep) { + public static LinkedHashMap quoteCSVStrings( + LinkedHashMap entityFieldData, String csvSep) { LinkedHashMap quotedEntityFieldData = new LinkedHashMap<>(); for (Map.Entry entry : entityFieldData.entrySet()) { String key = entry.getKey(); From 8d41ce5115b7f5d2916b8d3ab03d0a656ca51f14 Mon Sep 17 00:00:00 2001 From: dennis Date: Tue, 16 Jun 2020 16:20:34 +0200 Subject: [PATCH 04/11] -Eliminates code smells --- src/main/java/edu/ie3/util/StringUtils.java | 26 ++++++++++----------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/main/java/edu/ie3/util/StringUtils.java b/src/main/java/edu/ie3/util/StringUtils.java index 02552d2c..2dc27abe 100644 --- a/src/main/java/edu/ie3/util/StringUtils.java +++ b/src/main/java/edu/ie3/util/StringUtils.java @@ -12,9 +12,9 @@ /** Some useful functions to manipulate Strings */ public class StringUtils { - private static final String jsonRegex = "(?:.*)\\{(?:.*)}"; - private static final String beginningOfStringRegex = "^([^\"])"; - private static final String endOfStringRegex = "([^\"])$"; + private static final String JSONREGEX = "(?:.*)\\{(?:.*)}"; + private static final String STARTOFSTRINGREGEX = "^([^\"])"; + private static final String ENDOFSTRINGREGEX = "([^\"])$"; private StringUtils() { throw new IllegalStateException("Utility classes cannot be instantiated."); @@ -87,7 +87,7 @@ public static String[] camelCaseToSnakeCase(String[] input) { * @return Quoted String */ public static String quote(String input) { - return input.replaceAll(beginningOfStringRegex, "\"$1").replaceAll(endOfStringRegex, "$1\""); + return input.replaceAll(STARTOFSTRINGREGEX, "\"$1").replaceAll(ENDOFSTRINGREGEX, "$1\""); } /** @@ -119,7 +119,7 @@ public static String cleanString(String input) { */ public static String[] quoteHeaderElements(String[] headerElements, String csvSep) { for (int index = 0; index <= headerElements.length - 1; index++) { - if (headerElements[index].matches(jsonRegex) + if (headerElements[index].matches(JSONREGEX) || headerElements[index].contains(csvSep) || headerElements[index].contains(",") || headerElements[index].contains("\"") @@ -127,8 +127,8 @@ public static String[] quoteHeaderElements(String[] headerElements, String csvSe headerElements[index] = headerElements[index] .replaceAll("\"", "\"\"") - .replaceAll(beginningOfStringRegex, "\"$1") - .replaceAll(endOfStringRegex, "$1\""); + .replaceAll(STARTOFSTRINGREGEX, "\"$1") + .replaceAll(ENDOFSTRINGREGEX, "$1\""); } } return headerElements; @@ -149,17 +149,17 @@ public static LinkedHashMap quoteCSVStrings( for (Map.Entry entry : entityFieldData.entrySet()) { String key = entry.getKey(); String value = entry.getValue(); - if (key.matches(jsonRegex) + if (key.matches(JSONREGEX) || key.contains(csvSep) || key.contains(",") || key.contains("\"") || key.contains("\n")) { key = key.replaceAll("\"", "\"\"") - .replaceAll(beginningOfStringRegex, "\"$1") - .replaceAll(endOfStringRegex, "$1\""); + .replaceAll(STARTOFSTRINGREGEX, "\"$1") + .replaceAll(ENDOFSTRINGREGEX, "$1\""); } - if (value.matches(jsonRegex) + if (value.matches(JSONREGEX) || value.contains(csvSep) || value.contains(",") || value.contains("\"") @@ -167,8 +167,8 @@ public static LinkedHashMap quoteCSVStrings( value = value .replaceAll("\"", "\"\"") - .replaceAll(beginningOfStringRegex, "\"$1") - .replaceAll(endOfStringRegex, "$1\""); + .replaceAll(STARTOFSTRINGREGEX, "\"$1") + .replaceAll(ENDOFSTRINGREGEX, "$1\""); } quotedEntityFieldData.put(key, value); } From 49ce6d31b9674a4defefa6aafc516f573f25767c Mon Sep 17 00:00:00 2001 From: dennis Date: Tue, 16 Jun 2020 16:44:25 +0200 Subject: [PATCH 05/11] -Removes regex for json strings --- src/main/java/edu/ie3/util/StringUtils.java | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/ie3/util/StringUtils.java b/src/main/java/edu/ie3/util/StringUtils.java index 2dc27abe..04756236 100644 --- a/src/main/java/edu/ie3/util/StringUtils.java +++ b/src/main/java/edu/ie3/util/StringUtils.java @@ -12,7 +12,6 @@ /** Some useful functions to manipulate Strings */ public class StringUtils { - private static final String JSONREGEX = "(?:.*)\\{(?:.*)}"; private static final String STARTOFSTRINGREGEX = "^([^\"])"; private static final String ENDOFSTRINGREGEX = "([^\"])$"; @@ -119,8 +118,7 @@ public static String cleanString(String input) { */ public static String[] quoteHeaderElements(String[] headerElements, String csvSep) { for (int index = 0; index <= headerElements.length - 1; index++) { - if (headerElements[index].matches(JSONREGEX) - || headerElements[index].contains(csvSep) + if (headerElements[index].contains(csvSep) || headerElements[index].contains(",") || headerElements[index].contains("\"") || headerElements[index].contains("\n")) { @@ -149,8 +147,7 @@ public static LinkedHashMap quoteCSVStrings( for (Map.Entry entry : entityFieldData.entrySet()) { String key = entry.getKey(); String value = entry.getValue(); - if (key.matches(JSONREGEX) - || key.contains(csvSep) + if (key.contains(csvSep) || key.contains(",") || key.contains("\"") || key.contains("\n")) { @@ -159,8 +156,7 @@ public static LinkedHashMap quoteCSVStrings( .replaceAll(STARTOFSTRINGREGEX, "\"$1") .replaceAll(ENDOFSTRINGREGEX, "$1\""); } - if (value.matches(JSONREGEX) - || value.contains(csvSep) + if (value.contains(csvSep) || value.contains(",") || value.contains("\"") || value.contains("\n")) { From c465558e73d1c1471b420e84439d51b986fd0e77 Mon Sep 17 00:00:00 2001 From: dennis Date: Tue, 16 Jun 2020 16:46:46 +0200 Subject: [PATCH 06/11] sA.. --- src/main/java/edu/ie3/util/StringUtils.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/main/java/edu/ie3/util/StringUtils.java b/src/main/java/edu/ie3/util/StringUtils.java index 04756236..ff21c8e0 100644 --- a/src/main/java/edu/ie3/util/StringUtils.java +++ b/src/main/java/edu/ie3/util/StringUtils.java @@ -147,10 +147,7 @@ public static LinkedHashMap quoteCSVStrings( for (Map.Entry entry : entityFieldData.entrySet()) { String key = entry.getKey(); String value = entry.getValue(); - if (key.contains(csvSep) - || key.contains(",") - || key.contains("\"") - || key.contains("\n")) { + if (key.contains(csvSep) || key.contains(",") || key.contains("\"") || key.contains("\n")) { key = key.replaceAll("\"", "\"\"") .replaceAll(STARTOFSTRINGREGEX, "\"$1") From af032aaef79213f802bb0cc9f9ce99cf962db411 Mon Sep 17 00:00:00 2001 From: Johannes Hiry Date: Tue, 16 Jun 2020 19:08:22 +0200 Subject: [PATCH 07/11] finalizing csvString creation in StringUtils --- src/main/java/edu/ie3/util/StringUtils.java | 93 ++++++++----------- .../edu/ie3/util/StringUtilsTest.groovy | 90 ++++++++++++------ 2 files changed, 101 insertions(+), 82 deletions(-) diff --git a/src/main/java/edu/ie3/util/StringUtils.java b/src/main/java/edu/ie3/util/StringUtils.java index ff21c8e0..f59231a1 100644 --- a/src/main/java/edu/ie3/util/StringUtils.java +++ b/src/main/java/edu/ie3/util/StringUtils.java @@ -6,14 +6,12 @@ package edu.ie3.util; import java.util.Arrays; -import java.util.LinkedHashMap; -import java.util.Map; /** Some useful functions to manipulate Strings */ public class StringUtils { - private static final String STARTOFSTRINGREGEX = "^([^\"])"; - private static final String ENDOFSTRINGREGEX = "([^\"])$"; + private static final String START_OF_STRING_REGEX = "^([^\"])"; + private static final String END_OF_STRING_REGEX = "([^\"])$"; private StringUtils() { throw new IllegalStateException("Utility classes cannot be instantiated."); @@ -86,7 +84,15 @@ public static String[] camelCaseToSnakeCase(String[] input) { * @return Quoted String */ public static String quote(String input) { - return input.replaceAll(STARTOFSTRINGREGEX, "\"$1").replaceAll(ENDOFSTRINGREGEX, "$1\""); + return quoteEnd(quoteStart(input)); + } + + private static String quoteStart(String input) { + return input.replaceAll(START_OF_STRING_REGEX, "\"$1"); + } + + private static String quoteEnd(String input) { + return input.replaceAll(END_OF_STRING_REGEX, "$1\""); } /** @@ -110,61 +116,40 @@ public static String cleanString(String input) { } /** - * Quotes header elements to predefine a valid CsvFileDefinition + * Quotes a given string that contains special characters to comply with the csv specification RFC + * 4180 (https://tools.ietf.org/html/rfc4180). Double quotes in JSON strings are escaped with the + * same character to make the csv data readable later. * - * @param headerElements Array of csv header elements - * @param csvSep Csv separator to check if it appears within the header element - * @return Quoted header elements + * @param inputString string that should be converted to a valid rfc 4180 string + * @param csvSep separator of the csv file + * @return a csv string that is valid according to rfc 4180 */ - public static String[] quoteHeaderElements(String[] headerElements, String csvSep) { - for (int index = 0; index <= headerElements.length - 1; index++) { - if (headerElements[index].contains(csvSep) - || headerElements[index].contains(",") - || headerElements[index].contains("\"") - || headerElements[index].contains("\n")) { - headerElements[index] = - headerElements[index] - .replaceAll("\"", "\"\"") - .replaceAll(STARTOFSTRINGREGEX, "\"$1") - .replaceAll(ENDOFSTRINGREGEX, "$1\""); - } - } - return headerElements; + public static String csvString(String inputString, String csvSep) { + if (needsCsvRFC4180Quote(inputString, csvSep)) { + /* clean the string by first quoting start and end of the string and then replace all double quotes + * that are followed by one or more double quotes with single double quotes */ + String quotedStartEndString = quote(inputString).replaceAll("\"\"*", "\""); + /* get everything in between the start and end quotes and replace single quotes with double quotes */ + String stringWOStartEndQuotes = + quotedStartEndString + .substring(1, quotedStartEndString.length() - 1) + .replaceAll("\"", "\"\""); + /* finally add quotes to the strings start and end again */ + return quote(stringWOStartEndQuotes); + } else return inputString; } /** - * Quotes all fields that contain special characters to comply with the CSV specification RFC 4180 - * (https://tools.ietf.org/html/rfc4180) The " contained in the JSON strings are escaped with the - * same character to make the CSV data readable later + * Check if the provided string needs to be quoted according to the csv specification RFC 4180 * - * @param entityFieldData LinkedHashMap containing all entityData - * @param csvSep Csv separator to check if it appears within the data - * @return LinkedHashMap containing all entityData with the relevant data quoted + * @param inputString the string that should be checked + * @param csvSep separator of the csv file + * @return true of the string needs to be quoted, false otherwise */ - public static LinkedHashMap quoteCSVStrings( - LinkedHashMap entityFieldData, String csvSep) { - LinkedHashMap quotedEntityFieldData = new LinkedHashMap<>(); - for (Map.Entry entry : entityFieldData.entrySet()) { - String key = entry.getKey(); - String value = entry.getValue(); - if (key.contains(csvSep) || key.contains(",") || key.contains("\"") || key.contains("\n")) { - key = - key.replaceAll("\"", "\"\"") - .replaceAll(STARTOFSTRINGREGEX, "\"$1") - .replaceAll(ENDOFSTRINGREGEX, "$1\""); - } - if (value.contains(csvSep) - || value.contains(",") - || value.contains("\"") - || value.contains("\n")) { - value = - value - .replaceAll("\"", "\"\"") - .replaceAll(STARTOFSTRINGREGEX, "\"$1") - .replaceAll(ENDOFSTRINGREGEX, "$1\""); - } - quotedEntityFieldData.put(key, value); - } - return quotedEntityFieldData; + private static boolean needsCsvRFC4180Quote(String inputString, String csvSep) { + return inputString.contains(csvSep) + || inputString.contains(",") + || inputString.contains("\"") + || inputString.contains("\n"); } } diff --git a/src/test/groovy/edu/ie3/util/StringUtilsTest.groovy b/src/test/groovy/edu/ie3/util/StringUtilsTest.groovy index 0744959b..c68dd3b3 100644 --- a/src/test/groovy/edu/ie3/util/StringUtilsTest.groovy +++ b/src/test/groovy/edu/ie3/util/StringUtilsTest.groovy @@ -7,6 +7,8 @@ package edu.ie3.util import spock.lang.Specification +import java.util.stream.Collectors + class StringUtilsTest extends Specification { def "The StringUtils quote a single String correctly"() { @@ -226,7 +228,8 @@ class StringUtilsTest extends Specification { "Höchstspannung", "380.0", "olm:{(0.00,1.00)}", - "cosPhiP:{(0.0,1.0),(0.9,1.0),(1.2,-0.3)}"] as String[] + "cosPhiP:{(0.0,1.0),(0.9,1.0),(1.2,-0.3)}" + ] def expected = [ "4ca90220-74c2-4369-9afa-a18bf068840d", "\"{\"\"type\"\":\"\"Point\"\",\"\"coordinates\"\":[7.411111,51.492528],\"\"crs\"\":{\"\"type\"\":\"\"name\"\",\"\"properties\"\":{\"\"name\"\":\"\"EPSG:4326\"\"}}}\"", @@ -239,10 +242,10 @@ class StringUtilsTest extends Specification { "Höchstspannung", "380.0", "\"olm:{(0.00,1.00)}\"", - "\"cosPhiP:{(0.0,1.0),(0.9,1.0),(1.2,-0.3)}\""] as String[] + "\"cosPhiP:{(0.0,1.0),(0.9,1.0),(1.2,-0.3)}\""] as Set when: - def actual = StringUtils.quoteHeaderElements(input, ",") + def actual = input.stream().map({ inputElement -> StringUtils.csvString(inputElement, ",") }).collect(Collectors.toSet()) as Set then: actual == expected @@ -251,37 +254,68 @@ class StringUtilsTest extends Specification { def "The StringUtils converts a given LinkedHashMap of csv data to match the csv specification RFC 4180 "() { given: def input = [ - "activePowerGradient" : "25.0", - "capex" : "100,0", - "cosphiRated" : "0.95", - "etaConv" : "98.0", - "id" : "test \n bmTypeInput", - "opex" : "50.0", - "sRated" : "25.0", - "uu,id" : "5ebd8f7e-dedb-4017-bb86-6373c4b68eb8", - "geoPosition" : "{\"type\":\"Point\",\"coordinates\":[7.411111,51.492528],\"crs\":{\"type\":\"name\",\"properties\":{\"name\":\"EPSG:4326\"}}}", - "olm\"characteristic" : "olm:{(0.0,1.0)}", - "cosPhiFixed" : "cosPhiFixed:{(0.0,1.0)}" - ] as LinkedHashMap + "activePowerGradient": "25.0", + "capex" : "100,0", + "cosphiRated" : "0.95", + "etaConv" : "98.0", + "id" : "test \n bmTypeInput", + "opex" : "50.0", + "sRated" : "25.0", + "uu,id" : "5ebd8f7e-dedb-4017-bb86-6373c4b68eb8", + "geoPosition" : "{\"type\":\"Point\",\"coordinates\":[7.411111,51.492528],\"crs\":{\"type\":\"name\",\"properties\":{\"name\":\"EPSG:4326\"}}}", + "olm\"characteristic": "olm:{(0.0,1.0)}", + "cosPhiFixed" : "cosPhiFixed:{(0.0,1.0)}" + ] as LinkedHashMap def expected = [ - "activePowerGradient" : "25.0", - "capex" : "\"100,0\"", - "cosphiRated" : "0.95", - "etaConv" : "98.0", - "id" : "\"test \n bmTypeInput\"", - "opex" : "50.0", - "sRated" : "25.0", - "\"uu,id\"" : "5ebd8f7e-dedb-4017-bb86-6373c4b68eb8", - "geoPosition" : "\"{\"\"type\"\":\"\"Point\"\",\"\"coordinates\"\":[7.411111,51.492528],\"\"crs\"\":{\"\"type\"\":\"\"name\"\",\"\"properties\"\":{\"\"name\"\":\"\"EPSG:4326\"\"}}}\"", - "\"olm\"\"characteristic\"" : "\"olm:{(0.0,1.0)}\"", - "cosPhiFixed" : "\"cosPhiFixed:{(0.0,1.0)}\"" - ] as LinkedHashMap + "activePowerGradient" : "25.0", + "capex" : "\"100,0\"", + "cosphiRated" : "0.95", + "etaConv" : "98.0", + "id" : "\"test \n bmTypeInput\"", + "opex" : "50.0", + "sRated" : "25.0", + "\"uu,id\"" : "5ebd8f7e-dedb-4017-bb86-6373c4b68eb8", + "geoPosition" : "\"{\"\"type\"\":\"\"Point\"\",\"\"coordinates\"\":[7.411111,51.492528],\"\"crs\"\":{\"\"type\"\":\"\"name\"\",\"\"properties\"\":{\"\"name\"\":\"\"EPSG:4326\"\"}}}\"", + "\"olm\"\"characteristic\"": "\"olm:{(0.0,1.0)}\"", + "cosPhiFixed" : "\"cosPhiFixed:{(0.0,1.0)}\"" + ] as LinkedHashMap when: - def actual = StringUtils.quoteCSVStrings(input, ",") + def actualList = input.entrySet().stream().map({ mapEntry -> + return new AbstractMap.SimpleEntry(StringUtils.csvString(mapEntry.key, ","), StringUtils.csvString(mapEntry.value, ",")) + }) as Set + + def actual = actualList.collectEntries { + [it.key, it.value] + } then: actual == expected } + + def "The StringUtils converts a given Array of csv header elements to match the csv specification RFC "() { + expect: + StringUtils.csvString(inputString, csvSep) == expect + + where: + inputString | csvSep || expect + "activePowerGradient" | "," || "activePowerGradient" + "\"100,0\"" | "," || "\"100,0\"" + "100,0" | "," || "\"100,0\"" + "100,0" | ";" || "\"100,0\"" + "100;0" | ";" || "\"100;0\"" + "\"100;0\"" | ";" || "\"100;0\"" + "100;0" | "," || "100;0" + "olm:{(0.00,1.00)}" | "," || "\"olm:{(0.00,1.00)}\"" + "olm:{(0.00,1.00)}" | ";" || "\"olm:{(0.00,1.00)}\"" + "{\"type\":\"Point\",\"coordinates\":[7.411111,51.492528]}" | "," || "\"{\"\"type\"\":\"\"Point\"\",\"\"coordinates\"\":[7.411111,51.492528]}\"" + "{\"type\":\"Point\",\"coordinates\":[7.411111,51.492528]}" | ";" || "\"{\"\"type\"\":\"\"Point\"\",\"\"coordinates\"\":[7.411111,51.492528]}\"" + "\"{\"\"type\"\":\"\"Point\"\",\"\"coordinates\"\":[7.411111,51.492528]}\"" | "," || "\"{\"\"type\"\":\"\"Point\"\",\"\"coordinates\"\":[7.411111,51.492528]}\"" + "\"{\"\"type\"\":\"\"Point\"\",\"\"coordinates\"\":[7.411111,51.492528]}\"" | ";" || "\"{\"\"type\"\":\"\"Point\"\",\"\"coordinates\"\":[7.411111,51.492528]}\"" + "\"{\"\"type\"\"\":\"\"Point\"\"\"\",\"\"coordinates\"\"\":[7.411111,51.492528]}\"" | "," || "\"{\"\"type\"\":\"\"Point\"\",\"\"coordinates\"\":[7.411111,51.492528]}\"" + "\"{\"\"type\"\"\":\"\"Point\"\",\"\"coordinates\"\":[7.411111,51.492528]}\"" | ";" || "\"{\"\"type\"\":\"\"Point\"\",\"\"coordinates\"\":[7.411111,51.492528]}\"" + "uu,id" | "," || "\"uu,id\"" + "uu,id" | ";" || "\"uu,id\"" + } } From 4de549abccc5d3c1da240cf2f4e9b3661b7a111e Mon Sep 17 00:00:00 2001 From: Johannes Hiry Date: Wed, 17 Jun 2020 08:54:02 +0200 Subject: [PATCH 08/11] added two more tests --- src/test/groovy/edu/ie3/util/StringUtilsTest.groovy | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/test/groovy/edu/ie3/util/StringUtilsTest.groovy b/src/test/groovy/edu/ie3/util/StringUtilsTest.groovy index c68dd3b3..64be3488 100644 --- a/src/test/groovy/edu/ie3/util/StringUtilsTest.groovy +++ b/src/test/groovy/edu/ie3/util/StringUtilsTest.groovy @@ -309,6 +309,8 @@ class StringUtilsTest extends Specification { "100;0" | "," || "100;0" "olm:{(0.00,1.00)}" | "," || "\"olm:{(0.00,1.00)}\"" "olm:{(0.00,1.00)}" | ";" || "\"olm:{(0.00,1.00)}\"" + "\"olm:{(0.00,1.00)}\"" | "," || "\"olm:{(0.00,1.00)}\"" + "\"olm:{(0.00,1.00)}\"" | ";" || "\"olm:{(0.00,1.00)}\"" "{\"type\":\"Point\",\"coordinates\":[7.411111,51.492528]}" | "," || "\"{\"\"type\"\":\"\"Point\"\",\"\"coordinates\"\":[7.411111,51.492528]}\"" "{\"type\":\"Point\",\"coordinates\":[7.411111,51.492528]}" | ";" || "\"{\"\"type\"\":\"\"Point\"\",\"\"coordinates\"\":[7.411111,51.492528]}\"" "\"{\"\"type\"\":\"\"Point\"\",\"\"coordinates\"\":[7.411111,51.492528]}\"" | "," || "\"{\"\"type\"\":\"\"Point\"\",\"\"coordinates\"\":[7.411111,51.492528]}\"" From bc0020ca52d648df7afa34b575791e9f9378a512 Mon Sep 17 00:00:00 2001 From: Johannes Hiry Date: Wed, 17 Jun 2020 09:06:55 +0200 Subject: [PATCH 09/11] adapted test method --- src/test/groovy/edu/ie3/util/StringUtilsTest.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/groovy/edu/ie3/util/StringUtilsTest.groovy b/src/test/groovy/edu/ie3/util/StringUtilsTest.groovy index 64be3488..902ee55d 100644 --- a/src/test/groovy/edu/ie3/util/StringUtilsTest.groovy +++ b/src/test/groovy/edu/ie3/util/StringUtilsTest.groovy @@ -294,7 +294,7 @@ class StringUtilsTest extends Specification { actual == expected } - def "The StringUtils converts a given Array of csv header elements to match the csv specification RFC "() { + def "The StringUtils converts a given String to match the csv specification RFC 4180 "() { expect: StringUtils.csvString(inputString, csvSep) == expect From 09a864dbfcf21593c8a576434f677fbc1a0dedfd Mon Sep 17 00:00:00 2001 From: Johannes Hiry Date: Wed, 17 Jun 2020 09:10:55 +0200 Subject: [PATCH 10/11] removed unnecessary tests --- src/test/groovy/edu/ie3/util/StringUtilsTest.groovy | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/test/groovy/edu/ie3/util/StringUtilsTest.groovy b/src/test/groovy/edu/ie3/util/StringUtilsTest.groovy index 902ee55d..06e15df8 100644 --- a/src/test/groovy/edu/ie3/util/StringUtilsTest.groovy +++ b/src/test/groovy/edu/ie3/util/StringUtilsTest.groovy @@ -309,8 +309,6 @@ class StringUtilsTest extends Specification { "100;0" | "," || "100;0" "olm:{(0.00,1.00)}" | "," || "\"olm:{(0.00,1.00)}\"" "olm:{(0.00,1.00)}" | ";" || "\"olm:{(0.00,1.00)}\"" - "\"olm:{(0.00,1.00)}\"" | "," || "\"olm:{(0.00,1.00)}\"" - "\"olm:{(0.00,1.00)}\"" | ";" || "\"olm:{(0.00,1.00)}\"" "{\"type\":\"Point\",\"coordinates\":[7.411111,51.492528]}" | "," || "\"{\"\"type\"\":\"\"Point\"\",\"\"coordinates\"\":[7.411111,51.492528]}\"" "{\"type\":\"Point\",\"coordinates\":[7.411111,51.492528]}" | ";" || "\"{\"\"type\"\":\"\"Point\"\",\"\"coordinates\"\":[7.411111,51.492528]}\"" "\"{\"\"type\"\":\"\"Point\"\",\"\"coordinates\"\":[7.411111,51.492528]}\"" | "," || "\"{\"\"type\"\":\"\"Point\"\",\"\"coordinates\"\":[7.411111,51.492528]}\"" From fa85c68623f8c54040345909c7c067e60a79c955 Mon Sep 17 00:00:00 2001 From: Johannes Hiry Date: Thu, 18 Jun 2020 12:59:38 +0200 Subject: [PATCH 11/11] addressed reviewers comments --- src/main/java/edu/ie3/util/StringUtils.java | 41 ++++++++----------- .../edu/ie3/util/StringUtilsTest.groovy | 17 ++++---- 2 files changed, 26 insertions(+), 32 deletions(-) diff --git a/src/main/java/edu/ie3/util/StringUtils.java b/src/main/java/edu/ie3/util/StringUtils.java index f59231a1..20c88037 100644 --- a/src/main/java/edu/ie3/util/StringUtils.java +++ b/src/main/java/edu/ie3/util/StringUtils.java @@ -10,9 +10,6 @@ /** Some useful functions to manipulate Strings */ public class StringUtils { - private static final String START_OF_STRING_REGEX = "^([^\"])"; - private static final String END_OF_STRING_REGEX = "([^\"])$"; - private StringUtils() { throw new IllegalStateException("Utility classes cannot be instantiated."); } @@ -84,15 +81,7 @@ public static String[] camelCaseToSnakeCase(String[] input) { * @return Quoted String */ public static String quote(String input) { - return quoteEnd(quoteStart(input)); - } - - private static String quoteStart(String input) { - return input.replaceAll(START_OF_STRING_REGEX, "\"$1"); - } - - private static String quoteEnd(String input) { - return input.replaceAll(END_OF_STRING_REGEX, "$1\""); + return input.matches("^\".*\"$") ? input : "\"" + input + "\""; } /** @@ -117,8 +106,8 @@ public static String cleanString(String input) { /** * Quotes a given string that contains special characters to comply with the csv specification RFC - * 4180 (https://tools.ietf.org/html/rfc4180). Double quotes in JSON strings are escaped with the - * same character to make the csv data readable later. + * 4180 (https://tools.ietf.org/html/rfc4180). Double quotes are escaped according to + * specification. * * @param inputString string that should be converted to a valid rfc 4180 string * @param csvSep separator of the csv file @@ -126,19 +115,25 @@ public static String cleanString(String input) { */ public static String csvString(String inputString, String csvSep) { if (needsCsvRFC4180Quote(inputString, csvSep)) { - /* clean the string by first quoting start and end of the string and then replace all double quotes - * that are followed by one or more double quotes with single double quotes */ - String quotedStartEndString = quote(inputString).replaceAll("\"\"*", "\""); - /* get everything in between the start and end quotes and replace single quotes with double quotes */ - String stringWOStartEndQuotes = - quotedStartEndString - .substring(1, quotedStartEndString.length() - 1) - .replaceAll("\"", "\"\""); + /* Get rid of first and last quotation if there is some. */ + String inputUnquoted = unquoteStartEnd(inputString); + /* Escape every double quotation mark within the String by doubling it */ + String withEscapedQuotes = inputUnquoted.replaceAll("\"", "\"\""); /* finally add quotes to the strings start and end again */ - return quote(stringWOStartEndQuotes); + return quote(withEscapedQuotes); } else return inputString; } + /** + * Removes double quotes at start and end position of the provided string, if any + * + * @param input string that should be unquoted + * @return copy of the provided string without start and end double quotes + */ + public static String unquoteStartEnd(String input) { + return input.matches("^\".*\"$") ? input.substring(1, input.length() - 1) : input; + } + /** * Check if the provided string needs to be quoted according to the csv specification RFC 4180 * diff --git a/src/test/groovy/edu/ie3/util/StringUtilsTest.groovy b/src/test/groovy/edu/ie3/util/StringUtilsTest.groovy index 06e15df8..a0c503c0 100644 --- a/src/test/groovy/edu/ie3/util/StringUtilsTest.groovy +++ b/src/test/groovy/edu/ie3/util/StringUtilsTest.groovy @@ -19,11 +19,14 @@ class StringUtilsTest extends Specification { actual == expected where: - input || expected - "test" || "\"test\"" - "\"test" || "\"test\"" - "test\"" || "\"test\"" - "\"test\"" || "\"test\"" + input || expected + "test" || "\"test\"" + "\"test" || "\"\"test\"" + "test\"" || "\"test\"\"" + "\"test\"" || "\"test\"" + "\"This\" is a test" || "\"\"This\" is a test\"" + "This is \"a\" test" || "\"This is \"a\" test\"" + "This is a \"test\"" || "\"This is a \"test\"\"" } def "The StringUtils are able to quote each element of an array of Strings"() { @@ -311,10 +314,6 @@ class StringUtilsTest extends Specification { "olm:{(0.00,1.00)}" | ";" || "\"olm:{(0.00,1.00)}\"" "{\"type\":\"Point\",\"coordinates\":[7.411111,51.492528]}" | "," || "\"{\"\"type\"\":\"\"Point\"\",\"\"coordinates\"\":[7.411111,51.492528]}\"" "{\"type\":\"Point\",\"coordinates\":[7.411111,51.492528]}" | ";" || "\"{\"\"type\"\":\"\"Point\"\",\"\"coordinates\"\":[7.411111,51.492528]}\"" - "\"{\"\"type\"\":\"\"Point\"\",\"\"coordinates\"\":[7.411111,51.492528]}\"" | "," || "\"{\"\"type\"\":\"\"Point\"\",\"\"coordinates\"\":[7.411111,51.492528]}\"" - "\"{\"\"type\"\":\"\"Point\"\",\"\"coordinates\"\":[7.411111,51.492528]}\"" | ";" || "\"{\"\"type\"\":\"\"Point\"\",\"\"coordinates\"\":[7.411111,51.492528]}\"" - "\"{\"\"type\"\"\":\"\"Point\"\"\"\",\"\"coordinates\"\"\":[7.411111,51.492528]}\"" | "," || "\"{\"\"type\"\":\"\"Point\"\",\"\"coordinates\"\":[7.411111,51.492528]}\"" - "\"{\"\"type\"\"\":\"\"Point\"\",\"\"coordinates\"\":[7.411111,51.492528]}\"" | ";" || "\"{\"\"type\"\":\"\"Point\"\",\"\"coordinates\"\":[7.411111,51.492528]}\"" "uu,id" | "," || "\"uu,id\"" "uu,id" | ";" || "\"uu,id\"" }