Skip to content

Commit

Permalink
[SPARK-34760][EXAMPLES] Replace favorite_color with age in JavaSQ…
Browse files Browse the repository at this point in the history
…LDataSourceExample

### What changes were proposed in this pull request?
In JavaSparkSQLExample when excecute 'peopleDF.write().partitionBy("favorite_color").bucketBy(42,"name").saveAsTable("people_partitioned_bucketed");'
throws Exception: 'Exception in thread "main" org.apache.spark.sql.AnalysisException: partition column favorite_color is not defined in table people_partitioned_bucketed, defined table columns are: age, name;'
Change the column favorite_color to age.

### Why are the changes needed?
Run JavaSparkSQLExample successfully.

### Does this PR introduce _any_ user-facing change?
NO

### How was this patch tested?
test in JavaSparkSQLExample .

Closes #31851 from zengruios/SPARK-34760.

Authored-by: zengruios <578395184@qq.com>
Signed-off-by: Kent Yao <yao@apache.org>
  • Loading branch information
zengruios authored and yaooqinn committed Mar 18, 2021
1 parent 2e836cd commit 5570f81
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -204,15 +204,15 @@ private static void runBasicDataSourceExample(SparkSession spark) {
.save("namesPartByColor.parquet");
// $example off:write_partitioning$
// $example on:write_partition_and_bucket$
peopleDF
usersDF
.write()
.partitionBy("favorite_color")
.bucketBy(42, "name")
.saveAsTable("people_partitioned_bucketed");
.saveAsTable("users_partitioned_bucketed");
// $example off:write_partition_and_bucket$

spark.sql("DROP TABLE IF EXISTS people_bucketed");
spark.sql("DROP TABLE IF EXISTS people_partitioned_bucketed");
spark.sql("DROP TABLE IF EXISTS users_partitioned_bucketed");
}

private static void runBasicParquetExample(SparkSession spark) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public class JavaSparkSQLExample {
// $example on:create_ds$
public static class Person implements Serializable {
private String name;
private int age;
private long age;

public String getName() {
return name;
Expand All @@ -75,11 +75,11 @@ public void setName(String name) {
this.name = name;
}

public int getAge() {
public long getAge() {
return age;
}

public void setAge(int age) {
public void setAge(long age) {
this.age = age;
}
}
Expand Down Expand Up @@ -225,11 +225,11 @@ private static void runDatasetCreationExample(SparkSession spark) {
// +---+----+

// Encoders for most common types are provided in class Encoders
Encoder<Integer> integerEncoder = Encoders.INT();
Dataset<Integer> primitiveDS = spark.createDataset(Arrays.asList(1, 2, 3), integerEncoder);
Dataset<Integer> transformedDS = primitiveDS.map(
(MapFunction<Integer, Integer>) value -> value + 1,
integerEncoder);
Encoder<Long> longEncoder = Encoders.LONG();
Dataset<Long> primitiveDS = spark.createDataset(Arrays.asList(1L, 2L, 3L), longEncoder);
Dataset<Long> transformedDS = primitiveDS.map(
(MapFunction<Long, Long>) value -> value + 1L,
longEncoder);
transformedDS.collect(); // Returns [2, 3, 4]

// DataFrames can be converted to a Dataset by providing a class. Mapping based on name
Expand Down
4 changes: 2 additions & 2 deletions examples/src/main/python/sql/datasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def basic_datasource_example(spark):
.write
.partitionBy("favorite_color")
.bucketBy(42, "name")
.saveAsTable("people_partitioned_bucketed"))
.saveAsTable("users_partitioned_bucketed"))
# $example off:write_partition_and_bucket$

# $example on:manual_load_options$
Expand Down Expand Up @@ -135,7 +135,7 @@ def basic_datasource_example(spark):
# $example off:direct_sql$

spark.sql("DROP TABLE IF EXISTS people_bucketed")
spark.sql("DROP TABLE IF EXISTS people_partitioned_bucketed")
spark.sql("DROP TABLE IF EXISTS users_partitioned_bucketed")


def parquet_example(spark):
Expand Down

0 comments on commit 5570f81

Please sign in to comment.