Skip to content

Commit

Permalink
Cleaning up CsvFileConnector and friends regarding time series meta i…
Browse files Browse the repository at this point in the history
…nformation
  • Loading branch information
sebastian-peter committed Mar 17, 2022
1 parent 3b09288 commit 0ee55fc
Show file tree
Hide file tree
Showing 7 changed files with 117 additions and 169 deletions.
132 changes: 22 additions & 110 deletions src/main/java/edu/ie3/datamodel/io/connectors/CsvFileConnector.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.io.FilenameUtils;
Expand All @@ -42,8 +43,10 @@ public class CsvFileConnector implements DataConnector {
new HashMap<>();
private final Map<UUID, BufferedCsvWriter> timeSeriesWriters = new HashMap<>();
// ATTENTION: Do not finalize. It's meant for lazy evaluation.
@Deprecated(since = "3.0", forRemoval = true)
private Map<UUID, edu.ie3.datamodel.io.csv.CsvIndividualTimeSeriesMetaInformation>
individualTimeSeriesMetaInformation;

private final FileNamingStrategy fileNamingStrategy;
private final String baseDirectoryName;

Expand Down Expand Up @@ -221,87 +224,45 @@ public BufferedReader initReader(String filePath) throws FileNotFoundException {
*
* @param timeSeriesUuid The time series in question
* @return An option on the queried information
* @deprecated since 3.0. Use {@link #getIndividualTimeSeriesMetaInformation()} instead
* @deprecated since 3.0. Use {@link #getCsvIndividualTimeSeriesMetaInformation(ColumnScheme...)}
* instead
*/
@Deprecated(since = "3.0", forRemoval = true)
public Optional<edu.ie3.datamodel.io.csv.timeseries.IndividualTimeSeriesMetaInformation>
getIndividualTimeSeriesMetaInformation(UUID timeSeriesUuid) {
if (Objects.isNull(individualTimeSeriesMetaInformation))
individualTimeSeriesMetaInformation = buildIndividualTimeSeriesMetaInformation();
individualTimeSeriesMetaInformation = getCsvIndividualTimeSeriesMetaInformation();

return Optional.ofNullable(individualTimeSeriesMetaInformation.get(timeSeriesUuid))
.map(edu.ie3.datamodel.io.csv.timeseries.IndividualTimeSeriesMetaInformation::new);
}

/**
* Get time series meta information
*
* <p>This method lazily evaluates the mapping from <i>all</i> time series files to their meta
* information.
* Receive the information for specific time series. They are given back filtered by the column
* scheme in order to allow for accounting the different content types.
*
* @return All time series meta information
* @param columnSchemes the column schemes to initialize readers for. If no scheme is given, all
* possible readers will be initialized.
* @return A mapping from column scheme to the individual time series meta information
*/
public Map<UUID, edu.ie3.datamodel.io.csv.CsvIndividualTimeSeriesMetaInformation>
getIndividualTimeSeriesMetaInformation() {
if (Objects.isNull(individualTimeSeriesMetaInformation))
individualTimeSeriesMetaInformation = buildIndividualTimeSeriesMetaInformation();

return individualTimeSeriesMetaInformation;
}

/**
* This method creates a map from time series uuid to it's meta information.
*
* @return Mapping from time series uuid to it's meta information.
*/
private Map<UUID, edu.ie3.datamodel.io.csv.CsvIndividualTimeSeriesMetaInformation>
buildIndividualTimeSeriesMetaInformation() {
getCsvIndividualTimeSeriesMetaInformation(final ColumnScheme... columnSchemes) {
return getIndividualTimeSeriesFilePaths().parallelStream()
.map(
filePath -> {
/* Extract meta information from file path and enhance it with the file path itself */
String filePathWithoutEnding = removeFileEnding(filePath);
IndividualTimeSeriesMetaInformation metaInformation =
(IndividualTimeSeriesMetaInformation)
fileNamingStrategy.timeSeriesMetaInformation(filePathWithoutEnding);
fileNamingStrategy.individualTimeSeriesMetaInformation(filePath);
return new edu.ie3.datamodel.io.csv.CsvIndividualTimeSeriesMetaInformation(
metaInformation, filePathWithoutEnding);
})
.collect(Collectors.toMap(TimeSeriesMetaInformation::getUuid, v -> v));
}

/**
* Receive the information for specific time series. They are given back grouped by the column
* scheme in order to allow for accounting the different content types.
*
* @param columnSchemes the column schemes to initialize readers for. If no scheme is given, all
* possible readers will be initialized.
* @return A mapping from column scheme to the individual time series meta information
*/
public Map<ColumnScheme, Set<edu.ie3.datamodel.io.csv.CsvIndividualTimeSeriesMetaInformation>>
getCsvIndividualTimeSeriesMetaInformation(ColumnScheme... columnSchemes) {
return getIndividualTimeSeriesFilePaths().parallelStream()
.map(
pathString -> {
String filePathWithoutEnding = removeFileEnding(pathString);
return buildCsvTimeSeriesMetaInformation(filePathWithoutEnding, columnSchemes);
metaInformation, FileNamingStrategy.removeFileNameEnding(filePath));
})
.filter(Optional::isPresent)
.map(Optional::get)
.collect(
Collectors.groupingBy(
edu.ie3.datamodel.io.csv.CsvIndividualTimeSeriesMetaInformation::getColumnScheme,
Collectors.toSet()));
}

/**
* Removes the file ending from input string
*
* @param input String to manipulate
* @return input without possible ending
*/
private String removeFileEnding(String input) {
return input.replaceAll(FILE_ENDING + "$", "");
.filter(
metaInformation ->
columnSchemes == null
|| columnSchemes.length == 0
|| Stream.of(columnSchemes)
.anyMatch(scheme -> scheme.equals(metaInformation.getColumnScheme())))
.collect(Collectors.toMap(TimeSeriesMetaInformation::getUuid, Function.identity()));
}

/**
Expand All @@ -320,7 +281,7 @@ private Set<String> getIndividualTimeSeriesFilePaths() {
.map(baseDirectoryPath::relativize)
.filter(
path -> {
String withoutEnding = removeFileEnding(path.toString());
String withoutEnding = FileNamingStrategy.removeFileNameEnding(path.toString());
return fileNamingStrategy
.getIndividualTimeSeriesPattern()
.matcher(withoutEnding)
Expand All @@ -334,55 +295,6 @@ private Set<String> getIndividualTimeSeriesFilePaths() {
}
}

/**
* Compose the needed information for reading in a single time series. If the file points to a
* non-individual time series or a time series of a column scheme other than the specified ones,
* or the initialisation of the reader does not work, an empty {@link Optional} is given back
*
* @param filePathString String describing the path to the time series file
* @param columnSchemes the allowed column schemes. If no scheme is specified, all schemes are
* allowed.
* @return An {@link Optional} to {@link IndividualTimeSeriesMetaInformation}
*/
private Optional<edu.ie3.datamodel.io.csv.CsvIndividualTimeSeriesMetaInformation>
buildCsvTimeSeriesMetaInformation(String filePathString, ColumnScheme... columnSchemes) {
try {
TimeSeriesMetaInformation metaInformation =
fileNamingStrategy.timeSeriesMetaInformation(filePathString);
if (!IndividualTimeSeriesMetaInformation.class.isAssignableFrom(metaInformation.getClass())) {
log.error(
"The time series file '{}' does not represent an individual time series.",
filePathString);
return Optional.empty();
}

IndividualTimeSeriesMetaInformation individualMetaInformation =
(IndividualTimeSeriesMetaInformation) metaInformation;

// If no column schemes are specified, we will include all. If there a specified schemes, we
// check if the file's column scheme matches any of them
if (columnSchemes != null
&& columnSchemes.length > 0
&& Stream.of(columnSchemes)
.noneMatch(scheme -> scheme.equals(individualMetaInformation.getColumnScheme()))) {
log.warn(
"The column scheme of the time series file {} does not match any of the specified column schemes ({}), so it will not be processed.",
filePathString,
columnSchemes);
return Optional.empty();
}
return Optional.of(
new edu.ie3.datamodel.io.csv.CsvIndividualTimeSeriesMetaInformation(
individualMetaInformation.getUuid(),
individualMetaInformation.getColumnScheme(),
filePathString));
} catch (IllegalArgumentException e) {
log.error(
"Error during extraction of meta information from file name '{}'.", filePathString, e);
return Optional.empty();
}
}

/**
* Initialises a reader to get grip on the file that contains mapping information between
* coordinate id and actual coordinate
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ public FileNameMetaInformation extractTimeSeriesMetaInformation(String fileName)
*/
public TimeSeriesMetaInformation timeSeriesMetaInformation(String fileName) {
/* Remove the file ending (ending limited to 255 chars, which is the max file name allowed in NTFS and ext4) */
String withoutEnding = fileName.replaceAll("(?:\\.[^\\\\/\\s]{1,255}){1,2}$", "");
String withoutEnding = removeFileNameEnding(fileName);

if (getIndividualTimeSeriesPattern().matcher(withoutEnding).matches())
return entityPersistenceNamingStrategy.individualTimesSeriesMetaInformation(withoutEnding);
Expand All @@ -285,6 +285,15 @@ else if (getLoadProfileTimeSeriesPattern().matcher(withoutEnding).matches())
"Unknown format of '" + fileName + "'. Cannot extract meta information.");
}

public IndividualTimeSeriesMetaInformation individualTimeSeriesMetaInformation(String fileName) {
return entityPersistenceNamingStrategy.individualTimesSeriesMetaInformation(
removeFileNameEnding(fileName));
}

public static String removeFileNameEnding(String fileName) {
return fileName.replaceAll("(?:\\.[^.\\\\/\\s]{1,255}){1,2}$", "");
}

/**
* Get the entity name for coordinates
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ public interface TimeSeriesMetaInformationSource extends DataSource {
*
* @return that mapping
*/
Map<UUID, ? extends IndividualTimeSeriesMetaInformation> getTimeSeriesMetaInformation();
Map<UUID, IndividualTimeSeriesMetaInformation> getTimeSeriesMetaInformation();

/**
* Get an option on the given time series meta information
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
package edu.ie3.datamodel.io.source.csv;

import edu.ie3.datamodel.io.naming.FileNamingStrategy;
import edu.ie3.datamodel.io.naming.timeseries.ColumnScheme;
import edu.ie3.datamodel.io.naming.timeseries.IndividualTimeSeriesMetaInformation;
import edu.ie3.datamodel.io.source.TimeSeriesMetaInformationSource;
import edu.ie3.datamodel.utils.TimeSeriesUtils;
Expand All @@ -21,6 +22,9 @@
public class CsvTimeSeriesMetaInformationSource extends CsvDataSource
implements TimeSeriesMetaInformationSource {

private final Map<UUID, edu.ie3.datamodel.io.csv.CsvIndividualTimeSeriesMetaInformation>
timeSeriesMetaInformation;

/**
* Creates a time series type source
*
Expand All @@ -31,19 +35,22 @@ public class CsvTimeSeriesMetaInformationSource extends CsvDataSource
public CsvTimeSeriesMetaInformationSource(
String csvSep, String folderPath, FileNamingStrategy fileNamingStrategy) {
super(csvSep, folderPath, fileNamingStrategy);

// retrieve only the desired time series
timeSeriesMetaInformation =
connector.getCsvIndividualTimeSeriesMetaInformation(
TimeSeriesUtils.getAcceptedColumnSchemes().toArray(new ColumnScheme[0]));
}

@Override
public Map<UUID, ? extends IndividualTimeSeriesMetaInformation> getTimeSeriesMetaInformation() {
return connector.getIndividualTimeSeriesMetaInformation().entrySet().stream()
.filter(entry -> TimeSeriesUtils.isSchemeAccepted(entry.getValue().getColumnScheme()))
public Map<UUID, IndividualTimeSeriesMetaInformation> getTimeSeriesMetaInformation() {
return timeSeriesMetaInformation.entrySet().stream()
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
}

@Override
public Optional<IndividualTimeSeriesMetaInformation> getTimeSeriesMetaInformation(
UUID timeSeriesUuid) {
return Optional.ofNullable(
connector.getIndividualTimeSeriesMetaInformation().get(timeSeriesUuid));
return Optional.ofNullable(timeSeriesMetaInformation.get(timeSeriesUuid));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -95,14 +95,10 @@ public CsvWeatherSource(
*/
private Map<Point, IndividualTimeSeries<WeatherValue>> getWeatherTimeSeries() {
/* Get only weather time series meta information */
Map<ColumnScheme, Set<CsvIndividualTimeSeriesMetaInformation>> colTypeToMetaData =
connector.getCsvIndividualTimeSeriesMetaInformation(ColumnScheme.WEATHER);
Collection<CsvIndividualTimeSeriesMetaInformation> weatherCsvMetaInformation =
connector.getCsvIndividualTimeSeriesMetaInformation(ColumnScheme.WEATHER).values();

/* Reading in weather time series */
Set<CsvIndividualTimeSeriesMetaInformation> weatherCsvMetaInformation =
colTypeToMetaData.get(ColumnScheme.WEATHER);

return readWeatherTimeSeries(weatherCsvMetaInformation, connector);
return readWeatherTimeSeries(Set.copyOf(weatherCsvMetaInformation), connector);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,42 +90,28 @@ class CsvFileConnectorTest extends Specification {
]

when:
def actual = cfc.individualTimeSeriesMetaInformation
def actual = cfc.getCsvIndividualTimeSeriesMetaInformation()

then:
actual == expected
}

def "The csv file connector returns empty Optional of CsvTimeSeriesMetaInformation when pointed to non-individual time series"() {
def "The csv file connector is able to build correct uuid to meta information mapping when restricting column schemes"() {
given:
def pathString = "lpts_h0_53990eea-1b5d-47e8-9134-6d8de36604bf"

when:
def actual = cfc.buildCsvTimeSeriesMetaInformation(pathString)

then:
!actual.present
}

def "The csv file connector is able to build correct meta information from valid input"() {
given:
def pathString = "its_pq_53990eea-1b5d-47e8-9134-6d8de36604bf"
def expected = new CsvIndividualTimeSeriesMetaInformation(
UUID.fromString("53990eea-1b5d-47e8-9134-6d8de36604bf"),
ColumnScheme.APPARENT_POWER,
""
)
def expected = [
(UUID.fromString("b88dee50-5484-4136-901d-050d8c1c97d1")): new CsvIndividualTimeSeriesMetaInformation(UUID.fromString("b88dee50-5484-4136-901d-050d8c1c97d1"), ColumnScheme.ENERGY_PRICE, "its_c_b88dee50-5484-4136-901d-050d8c1c97d1"),
(UUID.fromString("c7b0d9d6-5044-4f51-80b4-f221d8b1f14b")): new CsvIndividualTimeSeriesMetaInformation(UUID.fromString("c7b0d9d6-5044-4f51-80b4-f221d8b1f14b"), ColumnScheme.ENERGY_PRICE, "its_c_c7b0d9d6-5044-4f51-80b4-f221d8b1f14b"),
(UUID.fromString("085d98ee-09a2-4de4-b119-83949690d7b6")): new CsvIndividualTimeSeriesMetaInformation(UUID.fromString("085d98ee-09a2-4de4-b119-83949690d7b6"), ColumnScheme.WEATHER, "its_weather_085d98ee-09a2-4de4-b119-83949690d7b6")
]

when:
def actual = cfc.buildCsvTimeSeriesMetaInformation(pathString)
def actual = cfc.getCsvIndividualTimeSeriesMetaInformation(
ColumnScheme.ENERGY_PRICE,
ColumnScheme.WEATHER
)

then:
actual.present
actual.get().with {
assert uuid == expected.uuid
assert columnScheme == expected.columnScheme
/* Don't check the reader explicitly */
}
actual == expected
}

def "The csv file connector throws an Exception, if the foreseen file cannot be found"() {
Expand Down

0 comments on commit 0ee55fc

Please sign in to comment.