From fa5004df3a6a41e8b6835532714ff7fa69933983 Mon Sep 17 00:00:00 2001 From: Abhishek Madav Date: Thu, 20 Oct 2016 18:00:06 -0700 Subject: [PATCH 1/2] SPARK-17922 ClassCastException java.lang.ClassCastException: org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator cannot be cast to org.apache.spark.sql.catalyst.expressions.UnsafeProjection --- .../expressions/codegen/CodeGenerator.scala | 10 ++--- .../catalyst/util/DelegateClassLoader.scala | 37 +++++++++++++++++++ 2 files changed, 41 insertions(+), 6 deletions(-) create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DelegateClassLoader.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 6cab50ae1bf8d..1980fce8646f3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -40,6 +40,7 @@ import org.apache.spark.sql.types._ import org.apache.spark.unsafe.Platform import org.apache.spark.unsafe.types._ import org.apache.spark.util.{ParentClassLoader, Utils} +import org.apache.spark.sql.catalyst.util.DelegateClassLoader /** * Java source for evaluating an [[Expression]] given a [[InternalRow]] of input. @@ -615,11 +616,8 @@ class CodegenContext { val blocks = new ArrayBuffer[String]() val blockBuilder = new StringBuilder() for (code <- expressions) { - // We can't know how many bytecode will be generated, so use the length of source code - // as metric. A method should not go beyond 8K, otherwise it will not be JITted, should - // also not be too small, or it will have many function calls (for wide table), see the - // results in BenchmarkWideTable. - if (blockBuilder.length > 1024) { + // We can't know how many byte code will be generated, so use number of bytes as limit + if (blockBuilder.length > 60 * 1000) { blocks += blockBuilder.toString() blockBuilder.clear() } @@ -874,7 +872,7 @@ object CodeGenerator extends Logging { // find other possible classes (see org.codehaus.janinoClassLoaderIClassLoader's // findIClass method). Please also see https://issues.apache.org/jira/browse/SPARK-15622 and // https://issues.apache.org/jira/browse/SPARK-11636. - val parentClassLoader = new ParentClassLoader(Utils.getContextOrSparkClassLoader) + val parentClassLoader = new DelegateClassLoader(Utils.getContextOrSparkClassLoader, "org.apache.spark.sql.catalyst.expressions.GeneratedClass") evaluator.setParentClassLoader(parentClassLoader) // Cannot be under package codegen, or fail with java.lang.InstantiationException evaluator.setClassName("org.apache.spark.sql.catalyst.expressions.GeneratedClass") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DelegateClassLoader.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DelegateClassLoader.scala new file mode 100644 index 0000000000000..dc4d31d024388 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DelegateClassLoader.scala @@ -0,0 +1,37 @@ +package org.apache.spark.sql.catalyst.util +/* + * See : https://issues.apache.org/jira/browse/SPARK-17922. + * Janino compiler internally creates a byteclassloader (http://grepcode.com/file/repo1.maven.org/maven2/org.codehaus.janino/janino/2.5.15/org/codehaus/janino/ByteArrayClassLoader.java#ByteArrayClassLoader) + * to load the compiled generated class. But this class loader doesnot override load class to load the class from byte array for the generated class. + * Instead the call first goes to parent class loader and if somehow the classloader finds the old generatedClass( all the generated class names are same) + * it will incorrectly load the old generated class. This class loader will be used to intercept delegation to parent if the class has to be loaded by the current byte class loader. + * This will be set as the parent class loader for janino compiler in CodeGenerator.doCompile + * Special classloader to skip delegating to parent class loader when the class name is same as the generated class name. + * Because that class should be loaded by the current class loader + */ +class DelegateClassLoader(parent:ClassLoader, skipClass: String) extends ClassLoader(parent) { + override def findClass(name: String): Class[_] = { + if(checkClassName(name)) { + return null + } + super.findClass(name) + } + + override def loadClass(name: String): Class[_] = { + if(checkClassName(name)) { + return null + } + super.loadClass(name) + } + + override def loadClass(name: String, resolve: Boolean): Class[_] = { + if(checkClassName(name)) { + return null + } + super.loadClass(name, resolve) + } + + def checkClassName(name: String): Boolean = { + skipClass.equals(name) + } +} From ed89b875fa7d868061337edf9cadb35d63a88d14 Mon Sep 17 00:00:00 2001 From: Abhishek Madav Date: Fri, 17 Feb 2017 15:59:23 -0800 Subject: [PATCH 2/2] addressing comments --- .../sql/catalyst/expressions/codegen/CodeGenerator.scala | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 1980fce8646f3..62b9751bfc5d4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -616,8 +616,11 @@ class CodegenContext { val blocks = new ArrayBuffer[String]() val blockBuilder = new StringBuilder() for (code <- expressions) { - // We can't know how many byte code will be generated, so use number of bytes as limit - if (blockBuilder.length > 60 * 1000) { + // We can't know how many bytecode will be generated, so use the length of source code + // as metric. A method should not go beyond 8K, otherwise it will not be JITted, should + // also not be too small, or it will have many function calls (for wide table), see the + // results in BenchmarkWideTable. + if (blockBuilder.length > 1024) { blocks += blockBuilder.toString() blockBuilder.clear() }