From 42c9814069dfd519bb21c445164ef23e5fa121bc Mon Sep 17 00:00:00 2001 From: Stephan Ewen Date: Mon, 13 Oct 2014 21:36:10 +0200 Subject: [PATCH] [FLINK-1148] Create a writeAsCsv(path, writemode) variant --- .../org/apache/flink/api/java/DataSet.java | 37 +++++++++++++------ 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/flink-java/src/main/java/org/apache/flink/api/java/DataSet.java b/flink-java/src/main/java/org/apache/flink/api/java/DataSet.java index dd8c4ba812cae..bfaf6126f3cb4 100644 --- a/flink-java/src/main/java/org/apache/flink/api/java/DataSet.java +++ b/flink-java/src/main/java/org/apache/flink/api/java/DataSet.java @@ -554,9 +554,8 @@ public UnsortedGrouping groupBy(String... fields) { * {@link DataSet DataSets} on key equality and provides multiple ways to combine * joining elements into one DataSet.
* - * This method returns a {@link JoinOperatorSets} on which - * {@link JoinOperatorSets#where()} needs to be called to define the join key of the first - * joining (i.e., this) DataSet. + * This method returns a {@link JoinOperatorSets} on which one of the {@code where} methods + * can be called to define the join key of the first joining (i.e., this) DataSet. * * @param other The other DataSet with which this DataSet is joined. * @return A JoinOperatorSets to continue the definition of the Join transformation. @@ -576,7 +575,7 @@ public JoinOperatorSets join(DataSet other) { * This method also gives the hint to the optimizer that the second DataSet to join is much * smaller than the first one.
* This method returns a {@link JoinOperatorSets} on which - * {@link JoinOperatorSets#where()} needs to be called to define the join key of the first + * {@link JoinOperatorSets#where(String...)} needs to be called to define the join key of the first * joining (i.e., this) DataSet. * * @param other The other DataSet with which this DataSet is joined. @@ -596,9 +595,8 @@ public JoinOperatorSets joinWithTiny(DataSet other) { * joining elements into one DataSet.
* This method also gives the hint to the optimizer that the second DataSet to join is much * larger than the first one.
- * This method returns a {@link JoinOperatorSets JoinOperatorSet} on which - * {@link JoinOperatorSets#where()} needs to be called to define the join key of the first - * joining (i.e., this) DataSet. + * This method returns a {@link JoinOperatorSets} on which one of the {@code where} methods + * can be called to define the join key of the first joining (i.e., this) DataSet. * * @param other The other DataSet with which this DataSet is joined. * @return A JoinOperatorSet to continue the definition of the Join transformation. @@ -623,9 +621,8 @@ public JoinOperatorSets joinWithHuge(DataSet other) { * is called with an empty group for the non-existing group.
* The CoGroupFunction can iterate over the elements of both groups and return any number * of elements including none.
- * This method returns a {@link CoGroupOperatorSets} on which - * {@link CoGroupOperatorSets#where()} needs to be called to define the grouping key of the first - * (i.e., this) DataSet. + * This method returns a {@link JoinOperatorSets} on which one of the {@code where} methods + * can be called to define the join key of the first joining (i.e., this) DataSet. * * @param other The other DataSet of the CoGroup transformation. * @return A CoGroupOperatorSets to continue the definition of the CoGroup transformation. @@ -1002,6 +999,24 @@ public DataSink writeAsCsv(String filePath) { return writeAsCsv(filePath, CsvOutputFormat.DEFAULT_LINE_DELIMITER, CsvOutputFormat.DEFAULT_FIELD_DELIMITER); } + /** + * Writes a {@link Tuple} DataSet as a CSV file to the specified location.
+ * Note: Only a Tuple DataSet can written as a CSV file.
+ * For each Tuple field the result of {@link Object#toString()} is written. + * Tuple fields are separated by the default field delimiter {@code "comma" (,)}.
+ * Tuples are are separated by the newline character ({@code \n}). + * + * @param filePath The path pointing to the location the CSV file is written to. + * @param writeMode The behavior regarding existing files. Options are NO_OVERWRITE and OVERWRITE. + * @return The DataSink that writes the DataSet. + * + * @see Tuple + * @see CsvOutputFormat + */ + public DataSink writeAsCsv(String filePath, WriteMode writeMode) { + return internalWriteAsCsv(new Path(filePath),CsvOutputFormat.DEFAULT_LINE_DELIMITER, CsvOutputFormat.DEFAULT_FIELD_DELIMITER, writeMode); + } + /** * Writes a {@link Tuple} DataSet as a CSV file to the specified location with the specified field and line delimiters.
* Note: Only a Tuple DataSet can written as a CSV file.
@@ -1026,7 +1041,7 @@ public DataSink writeAsCsv(String filePath, String rowDelimiter, String field * @param filePath The path pointing to the location the CSV file is written to. * @param rowDelimiter The row delimiter to separate Tuples. * @param fieldDelimiter The field delimiter to separate Tuple fields. - * @param writeMode Control the behavior for existing files. Options are NO_OVERWRITE and OVERWRITE. + * @param writeMode The behavior regarding existing files. Options are NO_OVERWRITE and OVERWRITE. * * @see Tuple * @see CsvOutputFormat