ie3-institute · ckittl · Jun 19, 2020 · Jun 8, 2020 · Jun 9, 2020 · Jun 9, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -32,6 +32,7 @@ coordinates or multiple exactly equal coordinates possible
 -  Extended functionality of `GridAndGeoUtils`
 - `CsvFileConnector` is now set up to process either UniqueEntities or only by file name
 - `SwitchResult` superclass changed from `ConnectorResult` to `ResultEntity`
+- ``CsvDataSource`` now parses valid RFC 4180 rows correctly (invalid, old syntax is still supported but deprecated!)
 
 ### Fixed
 -  CsvDataSource now stops trying to get an operator for empty operator uuid field in entities

diff --git a/build.gradle b/build.gradle
@@ -45,11 +45,14 @@ repositories {
 	jcenter() //searches in bintray's repository 'jCenter', which contains Maven Central
 	maven { url 'https://www.jitpack.io' } // allows github repos as dependencies
 
+	// sonatype snapshot repo
+	maven { url 'http://oss.sonatype.org/content/repositories/snapshots' }
+
 }
 
 dependencies {
 	//	 ie³ power system utils
-	compile 'com.github.ie3-institute:PowerSystemUtils:1.3.1'
+	compile 'com.github.ie3-institute:PowerSystemUtils:1.3.2-SNAPSHOT'
 
 	compile "tec.uom:uom-se:$unitsOfMeasurementVersion"
 
@@ -62,7 +65,7 @@ dependencies {
 
 	// testing
 	testCompile 'org.junit.jupiter:junit-jupiter:5.5.2'
-	testCompile 'org.spockframework:spock-core:2.0-M1-groovy-2.5'
+	testCompile 'org.spockframework:spock-core:2.0-M3-groovy-3.0'
 	testCompile 'org.objenesis:objenesis:3.1' // Mock creation with constructor parameters
 
 	// Testcontainers (Docker Framework for testing)

diff --git a/src/main/java/edu/ie3/datamodel/io/connectors/CsvFileConnector.java b/src/main/java/edu/ie3/datamodel/io/connectors/CsvFileConnector.java
@@ -107,13 +107,13 @@ private BufferedCsvWriter initWriter(String baseFolder, CsvFileDefinition fileDe
 
     File pathFile = new File(fullPathToFile);
     if (!pathFile.exists()) {
-      return new BufferedCsvWriter(baseFolder, fileDefinition, false, true, false);
+      return new BufferedCsvWriter(baseFolder, fileDefinition, true, false);
     }
     log.warn(
         "File '{}.csv' already exist. Will append new content WITHOUT new header! Full path: {}",
         fileDefinition.getFileName(),
         pathFile.getAbsolutePath());
-    return new BufferedCsvWriter(baseFolder, fileDefinition, false, false, true);
+    return new BufferedCsvWriter(baseFolder, fileDefinition, false, true);
   }
 
   /**

diff --git a/src/main/java/edu/ie3/datamodel/io/csv/BufferedCsvWriter.java b/src/main/java/edu/ie3/datamodel/io/csv/BufferedCsvWriter.java
@@ -20,53 +20,28 @@
 public class BufferedCsvWriter extends BufferedWriter {
   /** Information on the shape of the file */
   private final CsvFileDefinition fileDefinition;
-  /** True, if every entry should be quoted */
-  private final boolean quoted;
-
   /**
    * Build a new CsvBufferedWriter
    *
    * @param baseFolder Base folder, from where the file hierarchy should start
    * @param fileDefinition The foreseen shape of the file
    * @param writeHeader Toggles, if the head line is written or not
-   * @param quoted True, if the entries may be quoted
    * @param append true to append to an existing file, false to overwrite an existing file (if any),
    *     if no file exists, a new one will be created in both cases
    * @throws IOException If the FileOutputStream cannot be established.
    */
   public BufferedCsvWriter(
-      String baseFolder,
-      CsvFileDefinition fileDefinition,
-      boolean quoted,
-      boolean writeHeader,
-      boolean append)
+      String baseFolder, CsvFileDefinition fileDefinition, boolean writeHeader, boolean append)
       throws IOException {
     super(
         new OutputStreamWriter(
             new FileOutputStream(
                 baseFolder + File.separator + fileDefinition.getFilePath(), append),
             StandardCharsets.UTF_8));
     this.fileDefinition = fileDefinition;
-    this.quoted = quoted;
     if (writeHeader) writeFileHeader(fileDefinition.headLineElements);
   }
 
-  /**
-   * Build a new CsvBufferedWriter. All entries are quoted
-   *
-   * @param baseFolder Base folder, from where the file hierarchy should start
-   * @param fileDefinition The foreseen shape of the file
-   * @param writeHeader Toggles, if the head line is written or not
-   * @param append true to append to an existing file, false to overwrite an existing file (if any),
-   *     if no file exists, a new one will be created in both cases
-   * @throws IOException If the FileOutputStream cannot be established.
-   */
-  public BufferedCsvWriter(
-      String baseFolder, CsvFileDefinition fileDefinition, boolean writeHeader, boolean append)
-      throws IOException {
-    this(baseFolder, fileDefinition, false, writeHeader, append);
-  }
-
   /**
    * Actually persisting the provided entity field data
    *
@@ -85,16 +60,16 @@ public void write(Map<String, String> entityFieldData) throws IOException, SinkE
               + "'.");
 
     String[] entries = entityFieldData.values().toArray(new String[0]);
-    writeOneLine(quoted ? StringUtils.quote(entries) : entries);
+    writeOneLine(entries);
   }
 
   /**
    * Writes the file header
    *
    * @throws IOException If something is messed up
    */
-  private void writeFileHeader(String[] headLineElements) throws IOException {
-    writeOneLine(StringUtils.quote(StringUtils.camelCaseToSnakeCase(headLineElements)));
+  protected final void writeFileHeader(String[] headLineElements) throws IOException {
+    writeOneLine(StringUtils.camelCaseToSnakeCase(headLineElements));
   }
 
   /**
@@ -103,7 +78,7 @@ private void writeFileHeader(String[] headLineElements) throws IOException {
    * @param entries Entries to write to the line of the file
    * @throws IOException If writing is not possible
    */
-  private void writeOneLine(String[] entries) throws IOException {
+  protected final void writeOneLine(String[] entries) throws IOException {
     for (int i = 0; i < entries.length; i++) {
       String attribute = entries[i];
       super.append(attribute);

diff --git a/src/main/java/edu/ie3/datamodel/io/sink/CsvFileSink.java b/src/main/java/edu/ie3/datamodel/io/sink/CsvFileSink.java
@@ -28,6 +28,7 @@
 import edu.ie3.datamodel.models.timeseries.TimeSeries;
 import edu.ie3.datamodel.models.timeseries.TimeSeriesEntry;
 import edu.ie3.datamodel.models.value.Value;
+import edu.ie3.util.StringUtils;
 import java.io.IOException;
 import java.util.*;
 import java.util.stream.Collectors;
@@ -147,9 +148,8 @@ public <T extends UniqueEntity> void persist(T entity) {
 
   @Override
   public <C extends UniqueEntity> void persistIgnoreNested(C entity) {
-    LinkedHashMap<String, String> entityFieldData = new LinkedHashMap<>();
     try {
-      entityFieldData =
+      LinkedHashMap<String, String> entityFieldData =
           processorProvider
               .handleEntity(entity)
               .orElseThrow(
@@ -163,10 +163,13 @@ public <C extends UniqueEntity> void persistIgnoreNested(C entity) {
                                   .collect(Collectors.joining(","))
                               + "]"));
 
-      String[] headerElements = processorProvider.getHeaderElements(entity.getClass());
+      String[] headerElements =
+          csvHeaderElements(processorProvider.getHeaderElements(entity.getClass()));
+
       BufferedCsvWriter writer =
           connector.getOrInitWriter(entity.getClass(), headerElements, csvSep);
-      writer.write(entityFieldData);
+
+      writer.write(csvEntityFieldData(entityFieldData));
     } catch (ProcessorProviderException e) {
       log.error(
           "Exception occurred during receiving of header elements. Cannot write this element.", e);
@@ -297,12 +300,12 @@ public <E extends TimeSeriesEntry<V>, V extends Value> void persistTimeSeries(
                                   .collect(Collectors.joining(","))
                               + "]"));
 
-      String[] headerElements = processorProvider.getHeaderElements(key);
+      String[] headerElements = csvHeaderElements(processorProvider.getHeaderElements(key));
       BufferedCsvWriter writer = connector.getOrInitWriter(timeSeries, headerElements, csvSep);
       entityFieldData.forEach(
           data -> {
             try {
-              writer.write(data);
+              writer.write(csvEntityFieldData(data));
             } catch (IOException e) {
               log.error(
                   "Cannot write the following entity data: '{}'. Exception: {}",
@@ -338,7 +341,9 @@ private void initFiles(
         .forEach(
             clz -> {
               try {
-                String[] headerElements = processorProvider.getHeaderElements(clz);
+                String[] headerElements =
+                    csvHeaderElements(processorProvider.getHeaderElements(clz));
+
                 connector.getOrInitWriter(clz, headerElements, csvSep);
               } catch (ProcessorProviderException e) {
                 log.error(
@@ -353,4 +358,47 @@ private void initFiles(
               }
             });
   }
+
+  /**
+   * Transforms a provided array of strings to valid csv formatted strings (according to csv
+   * specification RFC 4180)
+   *
+   * @param strings array of strings that should be processed
+   * @return a new array with valid csv formatted strings
+   */
+  private String[] csvHeaderElements(String[] strings) {
+    return Arrays.stream(strings)
+        .map(inputElement -> StringUtils.csvString(inputElement, csvSep))
+        .toArray(String[]::new);
+  }
+
+  /**
+   * Transforms a provided map of string to string to valid csv formatted strings (according to csv
+   * specification RFC 4180)
+   *
+   * @param entityFieldData a string to string map that should be processed
+   * @return a new map with valid csv formatted keys and values strings
+   */
+  private LinkedHashMap<String, String> csvEntityFieldData(
+      LinkedHashMap<String, String> entityFieldData) {
+
+    return entityFieldData.entrySet().stream()
+        .map(
+            mapEntry ->
+                new AbstractMap.SimpleEntry<>(
+                    StringUtils.csvString(mapEntry.getKey(), csvSep),
+                    StringUtils.csvString(mapEntry.getValue(), csvSep)))
+        .collect(
+            Collectors.toMap(
+                AbstractMap.SimpleEntry::getKey,
+                AbstractMap.SimpleEntry::getValue,
+                (v1, v2) -> {
+                  throw new IllegalStateException(
+                      "Duplicate keys in entityFieldData are not allowed!"
+                          + entityFieldData.entrySet().stream()
+                              .map(entry -> entry.getKey() + " = " + entry.getValue())
+                              .collect(Collectors.joining(",\n")));
+                },
+                LinkedHashMap::new));
+  }
 }
diff --git a/src/main/java/edu/ie3/datamodel/io/source/csv/CsvDataSource.java b/src/main/java/edu/ie3/datamodel/io/source/csv/CsvDataSource.java
@@ -56,6 +56,8 @@ public abstract class CsvDataSource {
   protected static final String TYPE = "type";
   protected static final String FIELDS_TO_VALUES_MAP = "fieldsToValuesMap";
 
+  @Deprecated private boolean notYetLoggedWarning = true;
+
   public CsvDataSource(String csvSep, String folderPath, FileNamingStrategy fileNamingStrategy) {
     this.csvSep = csvSep;
     this.connector = new CsvFileConnector(folderPath, fileNamingStrategy);
@@ -78,15 +80,35 @@ private Map<String, String> buildFieldsToAttributes(
     TreeMap<String, String> insensitiveFieldsToAttributes =
         new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
 
-    final String[] fieldVals = fieldVals(csvSep, csvRow);
+    // todo when replacing deprecated workaround code below add final modifier before parseCsvRow as
+    // well as remove
+    //  'finalFieldVals' and notYetLoggedWarning below!
+    String[] fieldVals = parseCsvRow(csvRow, csvSep);
+
+    // start workaround for deprecated data model processing
+    if (fieldVals.length != headline.length) {
+      // try to parse old structure
+      fieldVals = oldFieldVals(csvSep, csvRow);
+      // if this works log a warning to inform the user that this will not work much longer,
+      // otherwise parsing will fail regularly as expected below
+      if (fieldVals.length == headline.length && notYetLoggedWarning) {
+        notYetLoggedWarning = false;
+        log.warn(
+            "You are using an outdated version of the data "
+                + "model with invalid formatted csv rows. This is okay for now, but please updated your files, as the "
+                + "support for the old model will be removed soon.");
+      }
+    }
+    // end workaround for deprecated data model processing
 
     try {
+      String[] finalFieldVals = fieldVals;
       insensitiveFieldsToAttributes.putAll(
           IntStream.range(0, fieldVals.length)
               .boxed()
               .collect(
                   Collectors.toMap(
-                      k -> StringUtils.snakeCaseToCamelCase(headline[k]), v -> fieldVals[v])));
+                      k -> StringUtils.snakeCaseToCamelCase(headline[k]), v -> finalFieldVals[v])));
 
       if (insensitiveFieldsToAttributes.size() != headline.length) {
         Set<String> fieldsToAttributesKeySet = insensitiveFieldsToAttributes.keySet();
@@ -112,6 +134,23 @@ private Map<String, String> buildFieldsToAttributes(
     return insensitiveFieldsToAttributes;
   }
 
+  /**
+   * Parse a given row of a valid RFC 4180 formatted csv row
+   *
+   * @param csvRow the valid row
+   * @param csvSep separator of the csv file
+   * @return an array with the csv field values as strings
+   */
+  protected String[] parseCsvRow(String csvRow, String csvSep) {
+    return Arrays.stream(csvRow.split(csvSep + "(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)", -1))
+        .map(
+            maybeStartEndQuotedString ->
+                StringUtils.unquoteStartEnd(maybeStartEndQuotedString.trim())
+                    .replaceAll("\"{2}", "\"")
+                    .trim())
+        .toArray(String[]::new);
+  }
+
   /**
    * Build an array of from the provided csv row string considering special cases where geoJson or
    * {@link edu.ie3.datamodel.models.input.system.characteristic.CharacteristicInput} are provided
@@ -120,8 +159,10 @@ private Map<String, String> buildFieldsToAttributes(
    * @param csvSep the column separator of the csv row string
    * @param csvRow the csv row string
    * @return an array with one entry per column of the provided csv row string
+   * @deprecated only left for downward compatibility. Will be removed in a major release
    */
-  private String[] fieldVals(String csvSep, String csvRow) {
+  @Deprecated
+  private String[] oldFieldVals(String csvSep, String csvRow) {
 
     /*geo json support*/
     final String geoJsonRegex = "[\\{].+?\\}\\}\\}";
@@ -131,8 +172,9 @@ private String[] fieldVals(String csvSep, String csvRow) {
     final String charInputRegex = "(cP:|olm:|cosPhiFixed:|cosPhiP:|qV:)\\{.+?\\}";
     final String charReplacement = "charRepl";
 
-    List<String> geoList = extractMatchingStrings(geoJsonRegex, csvRow);
-    List<String> charList = extractMatchingStrings(charInputRegex, csvRow);
+    /*removes double double quotes*/
+    List<String> geoList = extractMatchingStrings(geoJsonRegex, csvRow.replaceAll("\"\"", "\""));
+    List<String> charList = extractMatchingStrings(charInputRegex, csvRow.replaceAll("\"\"", "\""));
 
     AtomicInteger geoCounter = new AtomicInteger(0);
     AtomicInteger charCounter = new AtomicInteger(0);
@@ -141,7 +183,7 @@ private String[] fieldVals(String csvSep, String csvRow) {
             csvRow
                 .replaceAll(charInputRegex, charReplacement)
                 .replaceAll(geoJsonRegex, geoReplacement)
-                .replaceAll("\"", "")
+                .replaceAll("\"*", "") // remove all quotes from
                 .split(csvSep, -1))
         .map(
             fieldVal -> {
@@ -303,7 +345,7 @@ protected Stream<Map<String, String>> buildStreamWithFieldsToAttributesMap(
       Class<? extends UniqueEntity> entityClass, CsvFileConnector connector) {
 
     try (BufferedReader reader = connector.initReader(entityClass)) {
-      final String[] headline = parseCsvHeadline(reader.readLine(), csvSep);
+      final String[] headline = parseCsvRow(reader.readLine(), csvSep);
 
       // sanity check for headline
       if (!Arrays.asList(headline).contains("uuid")) {
@@ -341,10 +383,6 @@ protected List<Map<String, String>> csvRowFieldValueMapping(
         .collect(Collectors.toList());
   }
 
-  protected String[] parseCsvHeadline(String csvHeadline, String csvSep) {
-    return csvHeadline.replaceAll("\"", "").toLowerCase().split(csvSep);
-  }
-
   /**
    * Returns a collection of maps each representing a row in csv file that can be used to built an
    * instance of a {@link UniqueEntity}. The uniqueness of each row is doubled checked by a) that no

diff --git a/src/main/java/edu/ie3/datamodel/io/source/csv/CsvIdCoordinateSource.java b/src/main/java/edu/ie3/datamodel/io/source/csv/CsvIdCoordinateSource.java
@@ -116,7 +116,7 @@ private Stream<Map<String, String>> buildStreamWithFieldsToAttributesMap(
     // As we still want to consume the data at other places, we start a new stream instead of
     // returning the original one
     try (BufferedReader reader = connector.initReader(filename)) {
-      final String[] headline = parseCsvHeadline(reader.readLine(), csvSep);
+      final String[] headline = parseCsvRow(reader.readLine(), csvSep);
 
       if (!Arrays.asList(headline).containsAll(Arrays.asList("id", "lat", "lon"))) {
         throw new SourceException(