Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[CARBONDATA-2434] Add ExternalTableExample and LuceneDataMapExample
For preparing 1.4.0 release. This closes #2268
- Loading branch information
1 parent
09feb9c
commit d5da9a1
Showing
4 changed files
with
234 additions
and
6 deletions.
There are no files selected for viewing
104 changes: 104 additions & 0 deletions
104
examples/spark2/src/main/scala/org/apache/carbondata/examples/ExternalTableExample.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.carbondata.examples | ||
|
||
import java.io.File | ||
|
||
import org.apache.spark.sql.{CarbonEnv, SaveMode, SparkSession} | ||
|
||
import org.apache.carbondata.core.constants.CarbonCommonConstants | ||
import org.apache.carbondata.core.metadata.CarbonTableIdentifier | ||
import org.apache.carbondata.core.util.CarbonProperties | ||
import org.apache.carbondata.examples.util.ExampleUtils | ||
|
||
/** | ||
* This example is for showing how to create external table with location. | ||
*/ | ||
|
||
object ExternalTableExample { | ||
|
||
def main(args: Array[String]) { | ||
val spark = ExampleUtils.createCarbonSession("ExternalTableExample") | ||
exampleBody(spark) | ||
spark.close() | ||
} | ||
|
||
def exampleBody(spark : SparkSession): Unit = { | ||
|
||
CarbonProperties.getInstance() | ||
.addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, "yyyy/MM/dd") | ||
|
||
// Create origin_table | ||
spark.sql("DROP TABLE IF EXISTS origin_table") | ||
spark.sql( | ||
s""" | ||
| CREATE TABLE origin_table( | ||
| shortField SHORT, | ||
| intField INT, | ||
| bigintField LONG, | ||
| doubleField DOUBLE, | ||
| stringField STRING, | ||
| timestampField TIMESTAMP, | ||
| decimalField DECIMAL(18,2), | ||
| dateField DATE, | ||
| charField CHAR(5), | ||
| floatField FLOAT | ||
| ) | ||
| STORED BY 'carbondata' | ||
""".stripMargin) | ||
|
||
val rootPath = new File(this.getClass.getResource("/").getPath | ||
+ "../../../..").getCanonicalPath | ||
val path = s"$rootPath/examples/spark2/src/main/resources/data.csv" | ||
|
||
// load 4 times, each load has 10 rows data | ||
// scalastyle:off | ||
(1 to 4).foreach(_ => spark.sql( | ||
s""" | ||
| LOAD DATA LOCAL INPATH '$path' | ||
| INTO TABLE origin_table | ||
| OPTIONS('HEADER'='true', 'COMPLEX_DELIMITER_LEVEL_1'='#') | ||
""".stripMargin)) | ||
// scalastyle:on | ||
|
||
// 40 rows | ||
spark.sql("SELECT count(*) FROM origin_table").show() | ||
|
||
val origin_table_path = CarbonEnv.getTablePath(Some("default"), "origin_table")(spark) | ||
|
||
// Create external_table | ||
spark.sql("DROP TABLE IF EXISTS external_table") | ||
spark.sql("CREATE EXTERNAL TABLE external_table STORED BY 'carbondata'" + | ||
s" LOCATION '$origin_table_path'") | ||
spark.sql("SELECT count(*) FROM external_table").show() | ||
|
||
// Load 2 times again | ||
(1 to 2).foreach(_ => spark.sql( | ||
s""" | ||
| LOAD DATA LOCAL INPATH '$path' | ||
| INTO TABLE origin_table | ||
| OPTIONS('HEADER'='true', 'COMPLEX_DELIMITER_LEVEL_1'='#') | ||
""".stripMargin)) | ||
|
||
spark.sql("SELECT count(*) FROM external_table").show() | ||
|
||
// Drop tables | ||
spark.sql("DROP TABLE IF EXISTS origin_table") | ||
spark.sql("DROP TABLE IF EXISTS external_table") | ||
} | ||
} |
116 changes: 116 additions & 0 deletions
116
examples/spark2/src/main/scala/org/apache/carbondata/examples/LuceneDataMapExample.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.carbondata.examples | ||
|
||
import org.apache.spark.sql.{SaveMode, SparkSession} | ||
|
||
import org.apache.carbondata.examples.util.ExampleUtils | ||
|
||
|
||
/** | ||
* This example is for lucene datamap. | ||
*/ | ||
|
||
object LuceneDataMapExample { | ||
|
||
def main(args: Array[String]) { | ||
val spark = ExampleUtils.createCarbonSession("LuceneDataMapExample") | ||
exampleBody(spark) | ||
spark.close() | ||
} | ||
|
||
def exampleBody(spark : SparkSession): Unit = { | ||
|
||
// build the test data, please increase the data for more obvious comparison. | ||
// if set the data is larger than 100M, it will take 10+ mins. | ||
import scala.util.Random | ||
|
||
import spark.implicits._ | ||
val r = new Random() | ||
val df = spark.sparkContext.parallelize(1 to 10 * 10 * 1000) | ||
.map(x => ("which test" + r.nextInt(10000) + " good" + r.nextInt(10), | ||
"who and name" + x % 8, "city" + x % 50, x % 60)) | ||
.toDF("id", "name", "city", "age") | ||
|
||
spark.sql("DROP TABLE IF EXISTS personTable") | ||
df.write.format("carbondata") | ||
.option("tableName", "personTable") | ||
.option("compress", "true") | ||
.mode(SaveMode.Overwrite).save() | ||
|
||
// create lucene datamap on personTable | ||
spark.sql( | ||
s""" | ||
| CREATE DATAMAP IF NOT EXISTS dm ON TABLE personTable | ||
| USING 'lucene' | ||
| DMProperties('INDEX_COLUMNS'='id , name') | ||
""".stripMargin) | ||
|
||
spark.sql("refresh datamap dm ON TABLE personTable") | ||
|
||
// 1. Compare the performance: | ||
|
||
def time(code: => Unit): Double = { | ||
val start = System.currentTimeMillis() | ||
code | ||
// return time in second | ||
(System.currentTimeMillis() - start).toDouble / 1000 | ||
} | ||
|
||
val time_without_lucenedatamap = time { | ||
|
||
spark.sql( | ||
s""" | ||
| SELECT count(*) | ||
| FROM personTable where id like '% test1 %' | ||
""".stripMargin).show() | ||
|
||
} | ||
|
||
val time_with_lucenedatamap = time { | ||
|
||
spark.sql( | ||
s""" | ||
| SELECT count(*) | ||
| FROM personTable where TEXT_MATCH('id:test1') | ||
""".stripMargin).show() | ||
|
||
} | ||
|
||
// scalastyle:off | ||
println("time for query on table with lucene datamap table:" + time_with_lucenedatamap.toString) | ||
println("time for query on table without lucene datamap table:" + time_without_lucenedatamap.toString) | ||
// scalastyle:on | ||
|
||
// 2. Search for word "test1" and not "good" in the id field | ||
spark.sql( | ||
s""" | ||
| SELECT id,name | ||
| FROM personTable where TEXT_MATCH('id:test1 -id:good1') | ||
""".stripMargin).show(100) | ||
|
||
// 3. TEXT_MATCH_WITH_LIMIT usage: | ||
spark.sql( | ||
s""" | ||
| SELECT id,name | ||
| FROM personTable where TEXT_MATCH_WITH_LIMIT('id:test1',10) | ||
""".stripMargin).show() | ||
|
||
spark.sql("DROP TABLE IF EXISTS personTable") | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters