Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
6000d32
Adds Regex to modify JSON Stings to match csv spec.
denstre Jun 8, 2020
3d48c8b
-Factory removes double double quotes to match standard format
denstre Jun 9, 2020
c99a5aa
Applies Spotless
denstre Jun 9, 2020
0bd70c1
Merge branch 'master' of https://github.com/ie3-institute/PowerSystem…
denstre Jun 9, 2020
00f7580
Merge remote-tracking branch 'remotes/origin/master' into ds/#141-ada…
ckittl Jun 15, 2020
b99df92
Reworks quoting process
denstre Jun 15, 2020
cdc0059
Merge branch 'master' of https://github.com/ie3-institute/PowerSystem…
denstre Jun 15, 2020
2af5491
Merge remote-tracking branch 'origin/ds/#141-adapt-jsonstrings-for-cs…
denstre Jun 15, 2020
14529db
Spotless Apply...
denstre Jun 15, 2020
cfbfdf1
Undos method reference
denstre Jun 15, 2020
6357203
Resets EntityData class
denstre Jun 15, 2020
e1a6ff0
Adds all csv spec cases
denstre Jun 15, 2020
913fbe5
-Removes header Quotes
denstre Jun 16, 2020
6b9f910
-Implements quoteHeaderElements method to predefine header for CsvFil…
denstre Jun 16, 2020
7cda0a9
sA...
denstre Jun 16, 2020
b13de38
-Removes json string regex
denstre Jun 16, 2020
cd80e27
-Turns double double quotes into double quotes when reading csv data
denstre Jun 16, 2020
4ca3164
-Implements csv quoting from utils
denstre Jun 16, 2020
049af41
adapted CsvFileSink for valid quoting
johanneshiry Jun 17, 2020
4c2b858
- fix test
johanneshiry Jun 17, 2020
e0134db
replaced quoting in CsvFileSink with StringUtils
johanneshiry Jun 17, 2020
d381577
updated PowerSystemUtils dependency to current SNAPSHOT version
johanneshiry Jun 17, 2020
d6d9289
changed method order in CsvFileSink
johanneshiry Jun 17, 2020
5f80e75
minor change in CsvFileSink
johanneshiry Jun 17, 2020
d7ea57d
Update src/main/java/edu/ie3/datamodel/io/sink/CsvFileSink.java
johanneshiry Jun 18, 2020
3831a3c
added sonatype snapshot repo
johanneshiry Jun 18, 2020
2d10c3d
- set CsvDataSource to RFC 4180 valid parsing
johanneshiry Jun 18, 2020
93d618d
fixed invalid method parameter order in CsvDataSource#parseCsvRow
johanneshiry Jun 18, 2020
d6b14ee
updated spock testing framework version
johanneshiry Jun 18, 2020
ffa0a13
fix broken tests + addressing reviewers comments
johanneshiry Jun 18, 2020
18f6b07
adapt testfiles to new format + added documentation in CsvFileSinkTest
johanneshiry Jun 18, 2020
7265669
addressing PMD issue in BufferedCsvWriter
johanneshiry Jun 18, 2020
f513a1f
addressing error prone implementations
johanneshiry Jun 18, 2020
a645e62
minor code style adaptions
johanneshiry Jun 18, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ coordinates or multiple exactly equal coordinates possible
- Extended functionality of `GridAndGeoUtils`
- `CsvFileConnector` is now set up to process either UniqueEntities or only by file name
- `SwitchResult` superclass changed from `ConnectorResult` to `ResultEntity`
- ``CsvDataSource`` now parses valid RFC 4180 rows correctly (invalid, old syntax is still supported but deprecated!)

### Fixed
- CsvDataSource now stops trying to get an operator for empty operator uuid field in entities
Expand Down
7 changes: 5 additions & 2 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,14 @@ repositories {
jcenter() //searches in bintray's repository 'jCenter', which contains Maven Central
maven { url 'https://www.jitpack.io' } // allows github repos as dependencies

// sonatype snapshot repo
maven { url 'http://oss.sonatype.org/content/repositories/snapshots' }

}

dependencies {
// ie³ power system utils
compile 'com.github.ie3-institute:PowerSystemUtils:1.3.1'
compile 'com.github.ie3-institute:PowerSystemUtils:1.3.2-SNAPSHOT'

compile "tec.uom:uom-se:$unitsOfMeasurementVersion"

Expand All @@ -62,7 +65,7 @@ dependencies {

// testing
testCompile 'org.junit.jupiter:junit-jupiter:5.5.2'
testCompile 'org.spockframework:spock-core:2.0-M1-groovy-2.5'
testCompile 'org.spockframework:spock-core:2.0-M3-groovy-3.0'
testCompile 'org.objenesis:objenesis:3.1' // Mock creation with constructor parameters

// Testcontainers (Docker Framework for testing)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,13 +107,13 @@ private BufferedCsvWriter initWriter(String baseFolder, CsvFileDefinition fileDe

File pathFile = new File(fullPathToFile);
if (!pathFile.exists()) {
return new BufferedCsvWriter(baseFolder, fileDefinition, false, true, false);
return new BufferedCsvWriter(baseFolder, fileDefinition, true, false);
}
log.warn(
"File '{}.csv' already exist. Will append new content WITHOUT new header! Full path: {}",
fileDefinition.getFileName(),
pathFile.getAbsolutePath());
return new BufferedCsvWriter(baseFolder, fileDefinition, false, false, true);
return new BufferedCsvWriter(baseFolder, fileDefinition, false, true);
}

/**
Expand Down
35 changes: 5 additions & 30 deletions src/main/java/edu/ie3/datamodel/io/csv/BufferedCsvWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,53 +20,28 @@
public class BufferedCsvWriter extends BufferedWriter {
/** Information on the shape of the file */
private final CsvFileDefinition fileDefinition;
/** True, if every entry should be quoted */
private final boolean quoted;

/**
* Build a new CsvBufferedWriter
*
* @param baseFolder Base folder, from where the file hierarchy should start
* @param fileDefinition The foreseen shape of the file
* @param writeHeader Toggles, if the head line is written or not
* @param quoted True, if the entries may be quoted
* @param append true to append to an existing file, false to overwrite an existing file (if any),
* if no file exists, a new one will be created in both cases
* @throws IOException If the FileOutputStream cannot be established.
*/
public BufferedCsvWriter(
String baseFolder,
CsvFileDefinition fileDefinition,
boolean quoted,
boolean writeHeader,
boolean append)
String baseFolder, CsvFileDefinition fileDefinition, boolean writeHeader, boolean append)
throws IOException {
super(
new OutputStreamWriter(
new FileOutputStream(
baseFolder + File.separator + fileDefinition.getFilePath(), append),
StandardCharsets.UTF_8));
this.fileDefinition = fileDefinition;
this.quoted = quoted;
if (writeHeader) writeFileHeader(fileDefinition.headLineElements);
}

/**
* Build a new CsvBufferedWriter. All entries are quoted
*
* @param baseFolder Base folder, from where the file hierarchy should start
* @param fileDefinition The foreseen shape of the file
* @param writeHeader Toggles, if the head line is written or not
* @param append true to append to an existing file, false to overwrite an existing file (if any),
* if no file exists, a new one will be created in both cases
* @throws IOException If the FileOutputStream cannot be established.
*/
public BufferedCsvWriter(
String baseFolder, CsvFileDefinition fileDefinition, boolean writeHeader, boolean append)
throws IOException {
this(baseFolder, fileDefinition, false, writeHeader, append);
}

/**
* Actually persisting the provided entity field data
*
Expand All @@ -85,16 +60,16 @@ public void write(Map<String, String> entityFieldData) throws IOException, SinkE
+ "'.");

String[] entries = entityFieldData.values().toArray(new String[0]);
writeOneLine(quoted ? StringUtils.quote(entries) : entries);
writeOneLine(entries);
}

/**
* Writes the file header
*
* @throws IOException If something is messed up
*/
private void writeFileHeader(String[] headLineElements) throws IOException {
writeOneLine(StringUtils.quote(StringUtils.camelCaseToSnakeCase(headLineElements)));
protected final void writeFileHeader(String[] headLineElements) throws IOException {
writeOneLine(StringUtils.camelCaseToSnakeCase(headLineElements));
}

/**
Expand All @@ -103,7 +78,7 @@ private void writeFileHeader(String[] headLineElements) throws IOException {
* @param entries Entries to write to the line of the file
* @throws IOException If writing is not possible
*/
private void writeOneLine(String[] entries) throws IOException {
protected final void writeOneLine(String[] entries) throws IOException {
for (int i = 0; i < entries.length; i++) {
String attribute = entries[i];
super.append(attribute);
Expand Down
62 changes: 55 additions & 7 deletions src/main/java/edu/ie3/datamodel/io/sink/CsvFileSink.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import edu.ie3.datamodel.models.timeseries.TimeSeries;
import edu.ie3.datamodel.models.timeseries.TimeSeriesEntry;
import edu.ie3.datamodel.models.value.Value;
import edu.ie3.util.StringUtils;
import java.io.IOException;
import java.util.*;
import java.util.stream.Collectors;
Expand Down Expand Up @@ -147,9 +148,8 @@ public <T extends UniqueEntity> void persist(T entity) {

@Override
public <C extends UniqueEntity> void persistIgnoreNested(C entity) {
LinkedHashMap<String, String> entityFieldData = new LinkedHashMap<>();
try {
entityFieldData =
LinkedHashMap<String, String> entityFieldData =
processorProvider
.handleEntity(entity)
.orElseThrow(
Expand All @@ -163,10 +163,13 @@ public <C extends UniqueEntity> void persistIgnoreNested(C entity) {
.collect(Collectors.joining(","))
+ "]"));

String[] headerElements = processorProvider.getHeaderElements(entity.getClass());
String[] headerElements =
csvHeaderElements(processorProvider.getHeaderElements(entity.getClass()));

BufferedCsvWriter writer =
connector.getOrInitWriter(entity.getClass(), headerElements, csvSep);
writer.write(entityFieldData);

writer.write(csvEntityFieldData(entityFieldData));
} catch (ProcessorProviderException e) {
log.error(
"Exception occurred during receiving of header elements. Cannot write this element.", e);
Expand Down Expand Up @@ -297,12 +300,12 @@ public <E extends TimeSeriesEntry<V>, V extends Value> void persistTimeSeries(
.collect(Collectors.joining(","))
+ "]"));

String[] headerElements = processorProvider.getHeaderElements(key);
String[] headerElements = csvHeaderElements(processorProvider.getHeaderElements(key));
BufferedCsvWriter writer = connector.getOrInitWriter(timeSeries, headerElements, csvSep);
entityFieldData.forEach(
data -> {
try {
writer.write(data);
writer.write(csvEntityFieldData(data));
} catch (IOException e) {
log.error(
"Cannot write the following entity data: '{}'. Exception: {}",
Expand Down Expand Up @@ -338,7 +341,9 @@ private void initFiles(
.forEach(
clz -> {
try {
String[] headerElements = processorProvider.getHeaderElements(clz);
String[] headerElements =
csvHeaderElements(processorProvider.getHeaderElements(clz));

connector.getOrInitWriter(clz, headerElements, csvSep);
} catch (ProcessorProviderException e) {
log.error(
Expand All @@ -353,4 +358,47 @@ private void initFiles(
}
});
}

/**
* Transforms a provided array of strings to valid csv formatted strings (according to csv
* specification RFC 4180)
*
* @param strings array of strings that should be processed
* @return a new array with valid csv formatted strings
*/
private String[] csvHeaderElements(String[] strings) {
return Arrays.stream(strings)
.map(inputElement -> StringUtils.csvString(inputElement, csvSep))
.toArray(String[]::new);
}

/**
* Transforms a provided map of string to string to valid csv formatted strings (according to csv
* specification RFC 4180)
*
* @param entityFieldData a string to string map that should be processed
* @return a new map with valid csv formatted keys and values strings
*/
private LinkedHashMap<String, String> csvEntityFieldData(
LinkedHashMap<String, String> entityFieldData) {

return entityFieldData.entrySet().stream()
.map(
mapEntry ->
new AbstractMap.SimpleEntry<>(
StringUtils.csvString(mapEntry.getKey(), csvSep),
StringUtils.csvString(mapEntry.getValue(), csvSep)))
.collect(
Collectors.toMap(
AbstractMap.SimpleEntry::getKey,
AbstractMap.SimpleEntry::getValue,
(v1, v2) -> {
throw new IllegalStateException(
"Duplicate keys in entityFieldData are not allowed!"
+ entityFieldData.entrySet().stream()
.map(entry -> entry.getKey() + " = " + entry.getValue())
.collect(Collectors.joining(",\n")));
},
LinkedHashMap::new));
}
}
60 changes: 49 additions & 11 deletions src/main/java/edu/ie3/datamodel/io/source/csv/CsvDataSource.java
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ public abstract class CsvDataSource {
protected static final String TYPE = "type";
protected static final String FIELDS_TO_VALUES_MAP = "fieldsToValuesMap";

@Deprecated private boolean notYetLoggedWarning = true;

public CsvDataSource(String csvSep, String folderPath, FileNamingStrategy fileNamingStrategy) {
this.csvSep = csvSep;
this.connector = new CsvFileConnector(folderPath, fileNamingStrategy);
Expand All @@ -78,15 +80,35 @@ private Map<String, String> buildFieldsToAttributes(
TreeMap<String, String> insensitiveFieldsToAttributes =
new TreeMap<>(String.CASE_INSENSITIVE_ORDER);

final String[] fieldVals = fieldVals(csvSep, csvRow);
// todo when replacing deprecated workaround code below add final modifier before parseCsvRow as
// well as remove
// 'finalFieldVals' and notYetLoggedWarning below!
String[] fieldVals = parseCsvRow(csvRow, csvSep);

// start workaround for deprecated data model processing
if (fieldVals.length != headline.length) {
// try to parse old structure
fieldVals = oldFieldVals(csvSep, csvRow);
// if this works log a warning to inform the user that this will not work much longer,
// otherwise parsing will fail regularly as expected below
if (fieldVals.length == headline.length && notYetLoggedWarning) {
notYetLoggedWarning = false;
log.warn(
"You are using an outdated version of the data "
+ "model with invalid formatted csv rows. This is okay for now, but please updated your files, as the "
+ "support for the old model will be removed soon.");
}
}
// end workaround for deprecated data model processing

try {
String[] finalFieldVals = fieldVals;
insensitiveFieldsToAttributes.putAll(
IntStream.range(0, fieldVals.length)
.boxed()
.collect(
Collectors.toMap(
k -> StringUtils.snakeCaseToCamelCase(headline[k]), v -> fieldVals[v])));
k -> StringUtils.snakeCaseToCamelCase(headline[k]), v -> finalFieldVals[v])));

if (insensitiveFieldsToAttributes.size() != headline.length) {
Set<String> fieldsToAttributesKeySet = insensitiveFieldsToAttributes.keySet();
Expand All @@ -112,6 +134,23 @@ private Map<String, String> buildFieldsToAttributes(
return insensitiveFieldsToAttributes;
}

/**
* Parse a given row of a valid RFC 4180 formatted csv row
*
* @param csvRow the valid row
* @param csvSep separator of the csv file
* @return an array with the csv field values as strings
*/
protected String[] parseCsvRow(String csvRow, String csvSep) {
return Arrays.stream(csvRow.split(csvSep + "(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)", -1))
.map(
maybeStartEndQuotedString ->
StringUtils.unquoteStartEnd(maybeStartEndQuotedString.trim())
.replaceAll("\"{2}", "\"")
.trim())
.toArray(String[]::new);
}

/**
* Build an array of from the provided csv row string considering special cases where geoJson or
* {@link edu.ie3.datamodel.models.input.system.characteristic.CharacteristicInput} are provided
Expand All @@ -120,8 +159,10 @@ private Map<String, String> buildFieldsToAttributes(
* @param csvSep the column separator of the csv row string
* @param csvRow the csv row string
* @return an array with one entry per column of the provided csv row string
* @deprecated only left for downward compatibility. Will be removed in a major release
*/
private String[] fieldVals(String csvSep, String csvRow) {
@Deprecated
private String[] oldFieldVals(String csvSep, String csvRow) {

/*geo json support*/
final String geoJsonRegex = "[\\{].+?\\}\\}\\}";
Expand All @@ -131,8 +172,9 @@ private String[] fieldVals(String csvSep, String csvRow) {
final String charInputRegex = "(cP:|olm:|cosPhiFixed:|cosPhiP:|qV:)\\{.+?\\}";
final String charReplacement = "charRepl";

List<String> geoList = extractMatchingStrings(geoJsonRegex, csvRow);
List<String> charList = extractMatchingStrings(charInputRegex, csvRow);
/*removes double double quotes*/
List<String> geoList = extractMatchingStrings(geoJsonRegex, csvRow.replaceAll("\"\"", "\""));
List<String> charList = extractMatchingStrings(charInputRegex, csvRow.replaceAll("\"\"", "\""));

AtomicInteger geoCounter = new AtomicInteger(0);
AtomicInteger charCounter = new AtomicInteger(0);
Expand All @@ -141,7 +183,7 @@ private String[] fieldVals(String csvSep, String csvRow) {
csvRow
.replaceAll(charInputRegex, charReplacement)
.replaceAll(geoJsonRegex, geoReplacement)
.replaceAll("\"", "")
.replaceAll("\"*", "") // remove all quotes from
.split(csvSep, -1))
.map(
fieldVal -> {
Expand Down Expand Up @@ -303,7 +345,7 @@ protected Stream<Map<String, String>> buildStreamWithFieldsToAttributesMap(
Class<? extends UniqueEntity> entityClass, CsvFileConnector connector) {

try (BufferedReader reader = connector.initReader(entityClass)) {
final String[] headline = parseCsvHeadline(reader.readLine(), csvSep);
final String[] headline = parseCsvRow(reader.readLine(), csvSep);

// sanity check for headline
if (!Arrays.asList(headline).contains("uuid")) {
Expand Down Expand Up @@ -341,10 +383,6 @@ protected List<Map<String, String>> csvRowFieldValueMapping(
.collect(Collectors.toList());
}

protected String[] parseCsvHeadline(String csvHeadline, String csvSep) {
return csvHeadline.replaceAll("\"", "").toLowerCase().split(csvSep);
}

/**
* Returns a collection of maps each representing a row in csv file that can be used to built an
* instance of a {@link UniqueEntity}. The uniqueness of each row is doubled checked by a) that no
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ private Stream<Map<String, String>> buildStreamWithFieldsToAttributesMap(
// As we still want to consume the data at other places, we start a new stream instead of
// returning the original one
try (BufferedReader reader = connector.initReader(filename)) {
final String[] headline = parseCsvHeadline(reader.readLine(), csvSep);
final String[] headline = parseCsvRow(reader.readLine(), csvSep);

if (!Arrays.asList(headline).containsAll(Arrays.asList("id", "lat", "lon"))) {
throw new SourceException(
Expand Down
Loading