Skip to content

Commit

Permalink
Merge 3a12fb6 into 9051ab7
Browse files Browse the repository at this point in the history
  • Loading branch information
QiangCai committed Mar 4, 2019
2 parents 9051ab7 + 3a12fb6 commit ceb78a7
Show file tree
Hide file tree
Showing 7 changed files with 515 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -195,4 +195,8 @@ public void dropAllCache() {
}
cacheTypeToCacheMap.clear();
}

public CarbonLRUCache getCarbonCache() {
return carbonLRUCache;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -305,4 +305,8 @@ public void clear() {
lruCacheMap.clear();
}
}

public Map<String, Cacheable> getCacheMap() {
return lruCacheMap;
}
}
21 changes: 21 additions & 0 deletions docs/ddl-of-carbondata.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ CarbonData DDL statements are documented here,which includes:
* [SPLIT PARTITION](#split-a-partition)
* [DROP PARTITION](#drop-a-partition)
* [BUCKETING](#bucketing)
* [CACHE](#cache)

## CREATE TABLE

Expand Down Expand Up @@ -1088,4 +1089,24 @@ Users can specify which columns to include and exclude for local dictionary gene
TBLPROPERTIES ('BUCKETNUMBER'='4', 'BUCKETCOLUMNS'='productName')
```

## CACHE

CarbonData internally uses LRU caching to improve the performance. The user can get information
about current cache used status in memory through the following command:

```sql
SHOW METADATA
```

This shows the overall memory consumed in the cache by categories - index files, dictionary and
datamaps. This also shows the cache usage by all the tables and children tables in the current
database.

```sql
SHOW METADATA ON TABLE tableName
```

This shows detailed information on cache usage by the table `tableName` and its carbonindex files,
its dictionary files, its datamaps and children tables.

This command is not allowed on child tables.
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.carbondata.sql.commands

import org.apache.spark.sql.Row
import org.apache.spark.sql.test.util.QueryTest
import org.scalatest.BeforeAndAfterAll

class TestCarbonShowCacheCommand extends QueryTest with BeforeAndAfterAll {
override protected def beforeAll(): Unit = {
// use new database
sql("drop database if exists cache_db cascade").collect()
sql("drop database if exists cache_empty_db cascade").collect()
sql("create database cache_db").collect()
sql("create database cache_empty_db").collect()
dropTable
sql("use cache_db").collect()
sql(
"""
| CREATE TABLE cache_db.cache_1
| (empno int, empname String, designation String, doj Timestamp, workgroupcategory int,
| workgroupcategoryname String, deptno int, deptname String, projectcode int,
| projectjoindate Timestamp, projectenddate Timestamp,attendance int,utilization int,
| salary int)
| STORED BY 'org.apache.carbondata.format'
| TBLPROPERTIES('DICTIONARY_INCLUDE'='deptname')
""".stripMargin)
// bloom
sql("CREATE DATAMAP IF NOT EXISTS cache_1_bloom ON TABLE cache_db.cache_1 USING 'bloomfilter' " +
"DMPROPERTIES('INDEX_COLUMNS'='deptno')")
sql(s"LOAD DATA INPATH '$resourcesPath/data.csv' INTO TABLE cache_1 ")

sql(
"""
| CREATE TABLE cache_2
| (empno int, empname String, designation String, doj Timestamp, workgroupcategory int,
| workgroupcategoryname String, deptno int, deptname String, projectcode int,
| projectjoindate Timestamp, projectenddate Timestamp,attendance int,utilization int,
| salary int)
| STORED BY 'org.apache.carbondata.format'
""".stripMargin)
sql(s"LOAD DATA INPATH '$resourcesPath/data.csv' INTO TABLE cache_db.cache_2 ")
sql("insert into table cache_2 select * from cache_1").collect()

sql(
"""
| CREATE TABLE cache_3
| (empno int, empname String, designation String, doj Timestamp, workgroupcategory int,
| workgroupcategoryname String, deptno int, deptname String, projectcode int,
| projectjoindate Timestamp, projectenddate Timestamp,attendance int,utilization int,
| salary int)
| STORED BY 'org.apache.carbondata.format'
""".stripMargin)
sql(s"LOAD DATA INPATH '$resourcesPath/data.csv' INTO TABLE cache_3 ")

// use default database
sql("use default").collect()
sql(
"""
| CREATE TABLE cache_4
| (empno int, empname String, designation String, doj Timestamp, workgroupcategory int,
| workgroupcategoryname String, deptno int, deptname String, projectcode int,
| projectjoindate Timestamp, projectenddate Timestamp,attendance int,utilization int,
| salary int)
| STORED BY 'org.apache.carbondata.format'
""".stripMargin)
sql("insert into table cache_4 select * from cache_db.cache_2").collect()

// standard partition table
sql(
"""
| CREATE TABLE cache_5
| (empno int, empname String, designation String, doj Timestamp, workgroupcategory int,
| workgroupcategoryname String, deptname String, projectcode int,
| projectjoindate Timestamp, projectenddate Timestamp,attendance int,utilization int,
| salary int)
| PARTITIONED BY (deptno int)
| STORED BY 'org.apache.carbondata.format'
""".stripMargin)
sql(
"insert into table cache_5 select empno,empname,designation,doj,workgroupcategory," +
"workgroupcategoryname,deptname,projectcode,projectjoindate,projectenddate,attendance," +
"utilization,salary,deptno from cache_4").collect()

// datamap
sql("create datamap cache_4_count on table cache_4 using 'preaggregate' as " +
"select workgroupcategoryname,count(empname) as count from cache_4 group by workgroupcategoryname")

// count star to cache index
sql("select max(deptname) from cache_db.cache_1").collect()
sql("SELECT deptno FROM cache_db.cache_1 where deptno=10").collect()
sql("select count(*) from cache_db.cache_2").collect()
sql("select count(*) from cache_4").collect()
sql("select count(*) from cache_5").collect()
sql("select workgroupcategoryname,count(empname) as count from cache_4 group by workgroupcategoryname").collect()
}


override protected def afterAll(): Unit = {
sql("use default").collect()
dropTable
}

private def dropTable = {
sql("DROP TABLE IF EXISTS cache_db.cache_1")
sql("DROP TABLE IF EXISTS cache_db.cache_2")
sql("DROP TABLE IF EXISTS cache_db.cache_3")
sql("DROP TABLE IF EXISTS default.cache_4")
sql("DROP TABLE IF EXISTS default.cache_5")
}

test("show cache") {
sql("use cache_empty_db").collect()
val result1 = sql("show metacache").collect()
assertResult(2)(result1.length)
assertResult(Row("cache_empty_db", "ALL", "0", "0", "0"))(result1(1))

sql("use cache_db").collect()
val result2 = sql("show metacache").collect()
assertResult(4)(result2.length)

sql("use default").collect()
val result3 = sql("show metacache").collect()
val dataMapCacheInfo = result3
.map(row => row.getString(1))
.filter(table => table.equals("cache_4_cache_4_count"))
assertResult(1)(dataMapCacheInfo.length)
}

test("show metacache on table") {
sql("use cache_db").collect()
val result1 = sql("show metacache on table cache_1").collect()
assertResult(3)(result1.length)

val result2 = sql("show metacache on table cache_db.cache_2").collect()
assertResult(2)(result2.length)

checkAnswer(sql("show metacache on table cache_db.cache_3"),
Seq(Row("Index", "0 bytes", "0/1 index files cached"), Row("Dictionary", "0 bytes", "")))

val result4 = sql("show metacache on table default.cache_4").collect()
assertResult(3)(result4.length)

sql("use default").collect()
val result5 = sql("show metacache on table cache_5").collect()
assertResult(2)(result5.length)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser {
protected val HISTORY = carbonKeyWord("HISTORY")
protected val SEGMENTS = carbonKeyWord("SEGMENTS")
protected val SEGMENT = carbonKeyWord("SEGMENT")
protected val METACACHE = carbonKeyWord("METACACHE")

protected val STRING = carbonKeyWord("STRING")
protected val INTEGER = carbonKeyWord("INTEGER")
Expand Down
Loading

0 comments on commit ceb78a7

Please sign in to comment.