Skip to content

Commit

Permalink
[CARBONDATA-3313] count(*) is not invalidating the invalid segments c…
Browse files Browse the repository at this point in the history
…ache

Problem:
If any segment is deleted the next query has to clear/invalidate the datamap cache for those invalid segments. But count(*) has not considered the invalid segments to clear the datamap cache.

Solution:
In count(*) flow, before clearing the datamap cache, check and add the invalid segments of that table.

This closes #3144
  • Loading branch information
dhatchayani authored and kunal642 committed Mar 14, 2019
1 parent c55a5d0 commit 89c3873
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 0 deletions.
Expand Up @@ -617,6 +617,8 @@ segment needs refreshing. same thing need for select count(*) flow also.
toBeCleanedSegments.add(eachSegment);
}
}
// remove entry in the segment index if there are invalid segments
toBeCleanedSegments.addAll(allSegments.getInvalidSegments());
if (toBeCleanedSegments.size() > 0) {
DataMapStoreManager.getInstance()
.clearInvalidSegments(getOrCreateCarbonTable(job.getConfiguration()),
Expand Down
Expand Up @@ -110,6 +110,28 @@ class TestCarbonShowCacheCommand extends QueryTest with BeforeAndAfterAll {
sql("select workgroupcategoryname,count(empname) as count from cache_4 group by workgroupcategoryname").collect()
}

test("test drop cache invalidation in case of invalid segments"){
sql(s"CREATE TABLE empTable(empno int, empname String, designation String, " +
s"doj Timestamp, workgroupcategory int, workgroupcategoryname String, deptno int, " +
s"deptname String, projectcode int, projectjoindate Timestamp, projectenddate Timestamp," +
s"attendance int, utilization int, salary int) stored by 'carbondata'")
sql(s"LOAD DATA INPATH '$resourcesPath/data.csv' INTO TABLE empTable")
sql(s"LOAD DATA INPATH '$resourcesPath/data.csv' INTO TABLE empTable")
sql(s"LOAD DATA INPATH '$resourcesPath/data.csv' INTO TABLE empTable")
sql("select count(*) from empTable").show()
var showCache = sql("SHOW METACACHE on table empTable").collect()
assert(showCache(0).get(2).toString.equalsIgnoreCase("3/3 index files cached"))
sql("delete from table empTable where segment.id in(0)").show()
// check whether count(*) query invalidates the cache for the invalid segments
sql("select count(*) from empTable").show()
showCache = sql("SHOW METACACHE on table empTable").collect()
assert(showCache(0).get(2).toString.equalsIgnoreCase("2/2 index files cached"))
sql("delete from table empTable where segment.id in(1)").show()
// check whether select * query invalidates the cache for the invalid segments
sql("select * from empTable").show()
showCache = sql("SHOW METACACHE on table empTable").collect()
assert(showCache(0).get(2).toString.equalsIgnoreCase("1/1 index files cached"))
}

override protected def afterAll(): Unit = {
sql("use default").collect()
Expand All @@ -122,6 +144,7 @@ class TestCarbonShowCacheCommand extends QueryTest with BeforeAndAfterAll {
sql("DROP TABLE IF EXISTS cache_db.cache_3")
sql("DROP TABLE IF EXISTS default.cache_4")
sql("DROP TABLE IF EXISTS default.cache_5")
sql("DROP TABLE IF EXISTS empTable")
}

test("show cache") {
Expand Down

0 comments on commit 89c3873

Please sign in to comment.