From bea3f3306d45342c2acdd8a53ac6eccac61bef04 Mon Sep 17 00:00:00 2001 From: Alex Baretta Date: Fri, 26 Dec 2014 18:29:29 -0800 Subject: [PATCH] [Alex Baretta] SQLContext: overload createParquetFile Overload taking a StructType instead of TypeTag --- .../org/apache/spark/sql/SQLContext.scala | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala index 6a1a4d995bf61..a0f3fcb509051 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala @@ -269,6 +269,45 @@ class SQLContext(@transient val sparkContext: SparkContext) path, ScalaReflection.attributesFor[A], allowExisting, conf, this)) } + + /** + * :: Experimental :: + * Creates an empty parquet file with the provided schema. The parquet file thus created + * can be registered as a table, which can then be used as the target of future + * `insertInto` operations. + * + * {{{ + * val sqlContext = new SQLContext(...) + * import sqlContext._ + * + * val schema = + * StructType(List(StructField("name", StringType), + * StructField("age", IntegerType))) + * createParquetFile(schema, "path/to/file.parquet").registerTempTable("people") + * sql("INSERT INTO people SELECT 'michael', 29") + * }}} + * + * @param schema StructType describing the records to be stored in the Parquet file. + * @param path The path where the directory containing parquet metadata should be created. + * Data inserted into this table will also be stored at this location. + * @param allowExisting When false, an exception will be thrown if this directory already exists. + * @param conf A Hadoop configuration object that can be used to specify options to the parquet + * output format. + * + * @group userf + */ + @Experimental + def createParquetFile( + schema: StructType, + path: String, + allowExisting: Boolean = true, + conf: Configuration = new Configuration()): SchemaRDD = { + new SchemaRDD( + this, + ParquetRelation.createEmpty( + path, schema.toAttributes, allowExisting, conf, this)) + } + /** * Registers the given RDD as a temporary table in the catalog. Temporary tables exist only * during the lifetime of this instance of SQLContext.