apache · swapnilushinde · May 25, 2019 · May 28, 2019
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -312,6 +312,23 @@ class SparkSession private(
     Dataset.ofRows(self, ExternalRDD(rdd, self)(encoder))
   }
 
+  /**
+   * Creates a `DataFrame` from a csv files with schema of Product (e.g. case classes).
+   * All options related to CSV data source can be used.
+   * @return
+   */
+  def createDataFrame[A <: Product : TypeTag](options: Map[String, String], csvFilePath: String*)
+  : DataFrame = {
+    val structSchema = Encoders.product[A].schema
+    val inferredSchemaDisabled = options.map(x => (x._1,
+      if (x._1 == "inferSchema") "false" else x._2))
+
+    SparkSession.this.read
+      .options(inferredSchemaDisabled)
+      .schema(structSchema)
+      .csv(csvFilePath: _*)
+  }
+
   /**
    * :: Experimental ::
    * Creates a `DataFrame` from a local Seq of Product.
@@ -527,6 +544,42 @@ class SparkSession private(
     createDataset(data.asScala)
   }
 
+  /**
+   * Creates a `[[Dataset]]` from a csv file with schema of Product (e.g. case classes).
+   * All options related to CSV data source can be used.
+   *
+   * == Example ==
+   *
+   * {{{
+   *
+   *   import spark.implicits._
+   *   case class Person(name: String, age: Long)
+   *   val ds = spark.createDataset[Person](Map("header" -> "false"), csvFilePath)
+   *
+   *   ds.show()
+   *   // +-------+---+
+   *   // |   name|age|
+   *   // +-------+---+
+   *   // |Michael| 29|
+   *   // |   Andy| 30|
+   *   // | Justin| 19|
+   *   // +-------+---+
+   * }}}
+   * @return
+   */
+  def createDataset[A <: Product : TypeTag](options: Map[String, String], csvFilePath: String*)
+  : Dataset[A] = {
+    import implicits._
+    val structSchema = Encoders.product[A].schema
+    val inferredSchemaDisabled = options.map(x => (x._1,
+      if (x._1 == "inferSchema") "false" else x._2))
+
+    SparkSession.this.read
+      .options(inferredSchemaDisabled)
+      .schema(structSchema)
+      .csv(csvFilePath: _*).as[A]
+  }
+
   /**
    * :: Experimental ::
    * Creates a [[Dataset]] with a single `LongType` column named `id`, containing elements