From cedd7c8a0c71cb9dee8772cf0d1c9411af819f53 Mon Sep 17 00:00:00 2001 From: Hossein Date: Thu, 16 Jul 2015 14:33:36 -0700 Subject: [PATCH] Updated README --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index f15b565..b85560f 100755 --- a/README.md +++ b/README.md @@ -45,6 +45,7 @@ When reading files the API accepts several options: * `DROPMALFORMED`: drops lines which have fewer or more tokens than expected * `FAILFAST`: aborts with a RuntimeException if encounters any malformed line * `charset`: defaults to 'UTF-8' but can be set to other valid charset names +* `inferSchema`: automatically infers column types. It requires one extra pass over the data and is false by default The package also support saving simple (non-nested) DataFrame. When saving you can specify the delimiter and whether we should generate a header row for the table. See following examples for more details. @@ -136,6 +137,9 @@ df.select("year", "model").save("newcars.csv", "com.databricks.spark.csv") ### R API Spark 1.4+: ```R +library(SparkR) + +Sys.setenv('SPARKR_SUBMIT_ARGS'='"--packages" "com.databricks:spark-csv_2.10:1.1.0" "sparkr-shell"') sqlContext <- sparkRSQL.init(sc) df <- read.df(sqlContext, "cars.csv", source = "com.databricks.spark.csv")