Merge 3a12fb6 into 9051ab7

apache · Mar 4, 2019 · ceb78a7 · ceb78a7
2 parents 9051ab7 + 3a12fb6
commit ceb78a7
Show file tree

Hide file tree

Showing 7 changed files with 515 additions and 2 deletions.
diff --git a/core/src/main/java/org/apache/carbondata/core/cache/CacheProvider.java b/core/src/main/java/org/apache/carbondata/core/cache/CacheProvider.java
@@ -195,4 +195,8 @@ public void dropAllCache() {
     }
     cacheTypeToCacheMap.clear();
   }
+
+  public CarbonLRUCache getCarbonCache() {
+    return carbonLRUCache;
+  }
 }
diff --git a/core/src/main/java/org/apache/carbondata/core/cache/CarbonLRUCache.java b/core/src/main/java/org/apache/carbondata/core/cache/CarbonLRUCache.java
@@ -305,4 +305,8 @@ public void clear() {
       lruCacheMap.clear();
     }
   }
+
+  public Map<String, Cacheable> getCacheMap() {
+    return lruCacheMap;
+  }
 }
diff --git a/docs/ddl-of-carbondata.md b/docs/ddl-of-carbondata.md
@@ -67,6 +67,7 @@ CarbonData DDL statements are documented here,which includes:
   * [SPLIT PARTITION](#split-a-partition)
   * [DROP PARTITION](#drop-a-partition)
 * [BUCKETING](#bucketing)
+* [CACHE](#cache)
 
 ## CREATE TABLE
 
@@ -1088,4 +1089,24 @@ Users can specify which columns to include and exclude for local dictionary gene
   TBLPROPERTIES ('BUCKETNUMBER'='4', 'BUCKETCOLUMNS'='productName')
   ```
 
+## CACHE
 
+  CarbonData internally uses LRU caching to improve the performance. The user can get information 
+  about current cache used status in memory through the following command:
+
+  ```sql
+  SHOW METADATA
+  ``` 
+
+  This shows the overall memory consumed in the cache by categories - index files, dictionary and 
+  datamaps. This also shows the cache usage by all the tables and children tables in the current 
+  database.
+
+  ```sql
+  SHOW METADATA ON TABLE tableName
+  ```
+
+  This shows detailed information on cache usage by the table `tableName` and its carbonindex files, 
+  its dictionary files, its datamaps and children tables.
+
+  This command is not allowed on child tables.
diff --git a/...n-test/src/test/scala/org/apache/carbondata/sql/commands/TestCarbonShowCacheCommand.scala b/...n-test/src/test/scala/org/apache/carbondata/sql/commands/TestCarbonShowCacheCommand.scala
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.sql.commands
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.test.util.QueryTest
+import org.scalatest.BeforeAndAfterAll
+
+class TestCarbonShowCacheCommand extends QueryTest with BeforeAndAfterAll {
+  override protected def beforeAll(): Unit = {
+    // use new database
+    sql("drop database if exists cache_db cascade").collect()
+    sql("drop database if exists cache_empty_db cascade").collect()
+    sql("create database cache_db").collect()
+    sql("create database cache_empty_db").collect()
+    dropTable
+    sql("use cache_db").collect()
+    sql(
+      """
+        | CREATE TABLE cache_db.cache_1
+        | (empno int, empname String, designation String, doj Timestamp, workgroupcategory int,
+        |  workgroupcategoryname String, deptno int, deptname String, projectcode int,
+        |  projectjoindate Timestamp, projectenddate Timestamp,attendance int,utilization int,
+        |  salary int)
+        | STORED BY 'org.apache.carbondata.format'
+        | TBLPROPERTIES('DICTIONARY_INCLUDE'='deptname')
+      """.stripMargin)
+    // bloom
+    sql("CREATE DATAMAP IF NOT EXISTS cache_1_bloom ON TABLE cache_db.cache_1 USING 'bloomfilter' " +
+        "DMPROPERTIES('INDEX_COLUMNS'='deptno')")
+    sql(s"LOAD DATA INPATH '$resourcesPath/data.csv' INTO TABLE cache_1 ")
+
+    sql(
+      """
+        | CREATE TABLE cache_2
+        | (empno int, empname String, designation String, doj Timestamp, workgroupcategory int,
+        |  workgroupcategoryname String, deptno int, deptname String, projectcode int,
+        |  projectjoindate Timestamp, projectenddate Timestamp,attendance int,utilization int,
+        |  salary int)
+        | STORED BY 'org.apache.carbondata.format'
+      """.stripMargin)
+    sql(s"LOAD DATA INPATH '$resourcesPath/data.csv' INTO TABLE cache_db.cache_2 ")
+    sql("insert into table cache_2 select * from cache_1").collect()
+
+    sql(
+      """
+        | CREATE TABLE cache_3
+        | (empno int, empname String, designation String, doj Timestamp, workgroupcategory int,
+        |  workgroupcategoryname String, deptno int, deptname String, projectcode int,
+        |  projectjoindate Timestamp, projectenddate Timestamp,attendance int,utilization int,
+        |  salary int)
+        | STORED BY 'org.apache.carbondata.format'
+      """.stripMargin)
+    sql(s"LOAD DATA INPATH '$resourcesPath/data.csv' INTO TABLE cache_3 ")
+
+    // use default database
+    sql("use default").collect()
+    sql(
+      """
+        | CREATE TABLE cache_4
+        | (empno int, empname String, designation String, doj Timestamp, workgroupcategory int,
+        |  workgroupcategoryname String, deptno int, deptname String, projectcode int,
+        |  projectjoindate Timestamp, projectenddate Timestamp,attendance int,utilization int,
+        |  salary int)
+        | STORED BY 'org.apache.carbondata.format'
+      """.stripMargin)
+    sql("insert into table cache_4 select * from cache_db.cache_2").collect()
+
+    // standard partition table
+    sql(
+      """
+        | CREATE TABLE cache_5
+        | (empno int, empname String, designation String, doj Timestamp, workgroupcategory int,
+        |  workgroupcategoryname String, deptname String, projectcode int,
+        |  projectjoindate Timestamp, projectenddate Timestamp,attendance int,utilization int,
+        |  salary int)
+        | PARTITIONED BY (deptno int)
+        | STORED BY 'org.apache.carbondata.format'
+      """.stripMargin)
+    sql(
+      "insert into table cache_5 select empno,empname,designation,doj,workgroupcategory," +
+      "workgroupcategoryname,deptname,projectcode,projectjoindate,projectenddate,attendance," +
+      "utilization,salary,deptno from cache_4").collect()
+
+    // datamap
+    sql("create datamap cache_4_count on table cache_4 using 'preaggregate' as " +
+        "select workgroupcategoryname,count(empname) as count from cache_4 group by workgroupcategoryname")
+
+    // count star to cache index
+    sql("select max(deptname) from cache_db.cache_1").collect()
+    sql("SELECT deptno FROM cache_db.cache_1 where deptno=10").collect()
+    sql("select count(*) from cache_db.cache_2").collect()
+    sql("select count(*) from cache_4").collect()
+    sql("select count(*) from cache_5").collect()
+    sql("select workgroupcategoryname,count(empname) as count from cache_4 group by workgroupcategoryname").collect()
+  }
+
+
+  override protected def afterAll(): Unit = {
+    sql("use default").collect()
+    dropTable
+  }
+
+  private def dropTable = {
+    sql("DROP TABLE IF EXISTS cache_db.cache_1")
+    sql("DROP TABLE IF EXISTS cache_db.cache_2")
+    sql("DROP TABLE IF EXISTS cache_db.cache_3")
+    sql("DROP TABLE IF EXISTS default.cache_4")
+    sql("DROP TABLE IF EXISTS default.cache_5")
+  }
+
+  test("show cache") {
+    sql("use cache_empty_db").collect()
+    val result1 = sql("show metacache").collect()
+    assertResult(2)(result1.length)
+    assertResult(Row("cache_empty_db", "ALL", "0", "0", "0"))(result1(1))
+
+    sql("use cache_db").collect()
+    val result2 = sql("show metacache").collect()
+    assertResult(4)(result2.length)
+
+    sql("use default").collect()
+    val result3 = sql("show metacache").collect()
+    val dataMapCacheInfo = result3
+      .map(row => row.getString(1))
+      .filter(table => table.equals("cache_4_cache_4_count"))
+    assertResult(1)(dataMapCacheInfo.length)
+  }
+
+  test("show metacache on table") {
+    sql("use cache_db").collect()
+    val result1 = sql("show metacache on table cache_1").collect()
+    assertResult(3)(result1.length)
+
+    val result2 = sql("show metacache on table cache_db.cache_2").collect()
+    assertResult(2)(result2.length)
+
+    checkAnswer(sql("show metacache on table cache_db.cache_3"),
+      Seq(Row("Index", "0 bytes", "0/1 index files cached"), Row("Dictionary", "0 bytes", "")))
+
+    val result4 = sql("show metacache on table default.cache_4").collect()
+    assertResult(3)(result4.length)
+
+    sql("use default").collect()
+    val result5 = sql("show metacache on table cache_5").collect()
+    assertResult(2)(result5.length)
+  }
+}
diff --git a/...ration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala b/...ration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
@@ -155,6 +155,7 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser {
   protected val HISTORY = carbonKeyWord("HISTORY")
   protected val SEGMENTS = carbonKeyWord("SEGMENTS")
   protected val SEGMENT = carbonKeyWord("SEGMENT")
+  protected val METACACHE = carbonKeyWord("METACACHE")
 
   protected val STRING = carbonKeyWord("STRING")
   protected val INTEGER = carbonKeyWord("INTEGER")