From 51bb2774609a6a8070c261e748ed6355761a6141 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Wed, 1 Nov 2017 14:46:57 +0100 Subject: [PATCH] [minor] Data source v2 docs update. --- .../org/apache/spark/sql/sources/v2/DataSourceV2.java | 9 ++++----- .../org/apache/spark/sql/sources/v2/WriteSupport.java | 4 ++-- .../sql/sources/v2/reader/DataSourceV2Reader.java | 10 +++++----- .../v2/reader/SupportsPushDownCatalystFilters.java | 2 -- .../sql/sources/v2/reader/SupportsScanUnsafeRow.java | 2 -- .../sql/sources/v2/writer/DataSourceV2Writer.java | 11 +++-------- .../spark/sql/sources/v2/writer/DataWriter.java | 10 +++++----- 7 files changed, 19 insertions(+), 29 deletions(-) diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceV2.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceV2.java index dbcbe326a7510..6234071320dc9 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceV2.java +++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceV2.java @@ -20,12 +20,11 @@ import org.apache.spark.annotation.InterfaceStability; /** - * The base interface for data source v2. Implementations must have a public, no arguments - * constructor. + * The base interface for data source v2. Implementations must have a public, 0-arg constructor. * - * Note that this is an empty interface, data source implementations should mix-in at least one of - * the plug-in interfaces like {@link ReadSupport}. Otherwise it's just a dummy data source which is - * un-readable/writable. + * Note that this is an empty interface. Data source implementations should mix-in at least one of + * the plug-in interfaces like {@link ReadSupport} and {@link WriteSupport}. Otherwise it's just + * a dummy data source which is un-readable/writable. */ @InterfaceStability.Evolving public interface DataSourceV2 {} diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/WriteSupport.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/WriteSupport.java index a8a961598bde3..8fdfdfd19ea1e 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/WriteSupport.java +++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/WriteSupport.java @@ -36,8 +36,8 @@ public interface WriteSupport { * sources can return None if there is no writing needed to be done according to the save mode. * * @param jobId A unique string for the writing job. It's possible that there are many writing - * jobs running at the same time, and the returned {@link DataSourceV2Writer} should - * use this job id to distinguish itself with writers of other jobs. + * jobs running at the same time, and the returned {@link DataSourceV2Writer} can + * use this job id to distinguish itself from other jobs. * @param schema the schema of the data to be written. * @param mode the save mode which determines what to do when the data are already in this data * source, please refer to {@link SaveMode} for more details. diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/DataSourceV2Reader.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/DataSourceV2Reader.java index 5989a4ac8440b..88c3219a75c1d 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/DataSourceV2Reader.java +++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/DataSourceV2Reader.java @@ -34,11 +34,11 @@ * * There are mainly 3 kinds of query optimizations: * 1. Operators push-down. E.g., filter push-down, required columns push-down(aka column - * pruning), etc. These push-down interfaces are named like `SupportsPushDownXXX`. - * 2. Information Reporting. E.g., statistics reporting, ordering reporting, etc. These - * reporting interfaces are named like `SupportsReportingXXX`. - * 3. Special scans. E.g, columnar scan, unsafe row scan, etc. These scan interfaces are named - * like `SupportsScanXXX`. + * pruning), etc. Names of these interfaces start with `SupportsPushDown`. + * 2. Information Reporting. E.g., statistics reporting, ordering reporting, etc. + * Names of these interfaces start with `SupportsReporting`. + * 3. Special scans. E.g, columnar scan, unsafe row scan, etc. + * Names of these interfaces start with `SupportsScan`. * * Spark first applies all operator push-down optimizations that this data source supports. Then * Spark collects information this data source reported for further optimizations. Finally Spark diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownCatalystFilters.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownCatalystFilters.java index d6091774d75aa..efc42242f4421 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownCatalystFilters.java +++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownCatalystFilters.java @@ -31,8 +31,6 @@ * {@link SupportsPushDownFilters}, Spark will ignore {@link SupportsPushDownFilters} and only * process this interface. */ -@InterfaceStability.Evolving -@Experimental @InterfaceStability.Unstable public interface SupportsPushDownCatalystFilters { diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsScanUnsafeRow.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsScanUnsafeRow.java index d5eada808a16c..6008fb5f71cc1 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsScanUnsafeRow.java +++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsScanUnsafeRow.java @@ -30,8 +30,6 @@ * This is an experimental and unstable interface, as {@link UnsafeRow} is not public and may get * changed in the future Spark versions. */ -@InterfaceStability.Evolving -@Experimental @InterfaceStability.Unstable public interface SupportsScanUnsafeRow extends DataSourceV2Reader { diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataSourceV2Writer.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataSourceV2Writer.java index 8d8e33633fb0d..37bb15f87c59a 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataSourceV2Writer.java +++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataSourceV2Writer.java @@ -40,15 +40,10 @@ * some writers are aborted, or the job failed with an unknown reason, call * {@link #abort(WriterCommitMessage[])}. * - * Spark won't retry failed writing jobs, users should do it manually in their Spark applications if - * they want to retry. + * While Spark will retry failed writing tasks, Spark won't retry failed writing jobs. Users should + * do it manually in their Spark applications if they want to retry. * - * Please refer to the document of commit/abort methods for detailed specifications. - * - * Note that, this interface provides a protocol between Spark and data sources for transactional - * data writing, but the transaction here is Spark-level transaction, which may not be the - * underlying storage transaction. For example, Spark successfully writes data to a Cassandra data - * source, but Cassandra may need some more time to reach consistency at storage level. + * Please refer to the documentation of commit/abort methods for detailed specifications. */ @InterfaceStability.Evolving public interface DataSourceV2Writer { diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriter.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriter.java index d84afbae32892..dc1aab33bdcef 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriter.java +++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriter.java @@ -57,8 +57,8 @@ public interface DataWriter { /** * Writes one record. * - * If this method fails(throw exception), {@link #abort()} will be called and this data writer is - * considered to be failed. + * If this method fails (by throwing an exception), {@link #abort()} will be called and this + * data writer is considered to have been failed. */ void write(T record); @@ -70,10 +70,10 @@ public interface DataWriter { * The written data should only be visible to data source readers after * {@link DataSourceV2Writer#commit(WriterCommitMessage[])} succeeds, which means this method * should still "hide" the written data and ask the {@link DataSourceV2Writer} at driver side to - * do the final commitment via {@link WriterCommitMessage}. + * do the final commit via {@link WriterCommitMessage}. * - * If this method fails(throw exception), {@link #abort()} will be called and this data writer is - * considered to be failed. + * If this method fails (by throwing an exception), {@link #abort()} will be called and this + * data writer is considered to have been failed. */ WriterCommitMessage commit();