Skip to content

Commit

Permalink
[SPARK-10422] [SQL] String column in InMemoryColumnarCache needs to o…
Browse files Browse the repository at this point in the history
…verride clone method

https://issues.apache.org/jira/browse/SPARK-10422

Author: Yin Huai <yhuai@databricks.com>

Closes #8578 from yhuai/SPARK-10422.
  • Loading branch information
yhuai authored and davies committed Sep 3, 2015
1 parent 6cd98c1 commit 03f3e91
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 0 deletions.
Expand Up @@ -339,6 +339,8 @@ private[sql] object STRING extends NativeColumnType(StringType, 7, 8) {
override def copyField(from: InternalRow, fromOrdinal: Int, to: MutableRow, toOrdinal: Int) {
setField(to, toOrdinal, getField(from, fromOrdinal))
}

override def clone(v: UTF8String): UTF8String = v.clone()
}

private[sql] object DATE extends NativeColumnType(DateType, 8, 4) {
Expand Down
Expand Up @@ -191,4 +191,24 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
ctx.table("InMemoryCache_different_data_types").collect())
ctx.dropTempTable("InMemoryCache_different_data_types")
}

test("SPARK-10422: String column in InMemoryColumnarCache needs to override clone method") {
val df =
ctx.range(1, 100).selectExpr("id % 10 as id").rdd.map(id => Tuple1(s"str_$id")).toDF("i")
val cached = df.cache()
// count triggers the caching action. It should not throw.
cached.count()

// Make sure, the DataFrame is indeed cached.
assert(sqlContext.cacheManager.lookupCachedData(cached).nonEmpty)

// Check result.
checkAnswer(
cached,
ctx.range(1, 100).selectExpr("id % 10 as id").rdd.map(id => Tuple1(s"str_$id")).toDF("i")
)

// Drop the cache.
cached.unpersist()
}
}

0 comments on commit 03f3e91

Please sign in to comment.