Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-25093][SQL] Avoid recompiling regexp for comments multiple times #22135

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -758,6 +758,7 @@ private[deploy] class Worker(
private[deploy] object Worker extends Logging {
val SYSTEM_NAME = "sparkWorker"
val ENDPOINT_NAME = "Worker"
private val SSL_NODE_LOCAL_CONFIG_PATTERN = """\-Dspark\.ssl\.useNodeLocalConf\=(.+)""".r

def main(argStrings: Array[String]) {
Thread.setDefaultUncaughtExceptionHandler(new SparkUncaughtExceptionHandler(
Expand Down Expand Up @@ -803,9 +804,8 @@ private[deploy] object Worker extends Logging {
}

def isUseLocalNodeSSLConfig(cmd: Command): Boolean = {
val pattern = """\-Dspark\.ssl\.useNodeLocalConf\=(.+)""".r
val result = cmd.javaOpts.collectFirst {
case pattern(_result) => _result.toBoolean
case SSL_NODE_LOCAL_CONFIG_PATTERN(_result) => _result.toBoolean
}
result.getOrElse(false)
}
Expand Down
11 changes: 6 additions & 5 deletions core/src/main/scala/org/apache/spark/util/Utils.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1409,13 +1409,14 @@ private[spark] object Utils extends Logging {
}
}

// A regular expression to match classes of the internal Spark API's
// that we want to skip when finding the call site of a method.
private val SPARK_CORE_CLASS_REGEX =
"""^org\.apache\.spark(\.api\.java)?(\.util)?(\.rdd)?(\.broadcast)?\.[A-Z]""".r
private val SPARK_SQL_CLASS_REGEX = """^org\.apache\.spark\.sql.*""".r

/** Default filtering function for finding call sites using `getCallSite`. */
private def sparkInternalExclusionFunction(className: String): Boolean = {
// A regular expression to match classes of the internal Spark API's
// that we want to skip when finding the call site of a method.
val SPARK_CORE_CLASS_REGEX =
"""^org\.apache\.spark(\.api\.java)?(\.util)?(\.rdd)?(\.broadcast)?\.[A-Z]""".r
val SPARK_SQL_CLASS_REGEX = """^org\.apache\.spark\.sql.*""".r
val SCALA_CORE_CLASS_PREFIX = "scala"
val isSparkClass = SPARK_CORE_CLASS_REGEX.findFirstIn(className).isDefined ||
SPARK_SQL_CLASS_REGEX.findFirstIn(className).isDefined
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,13 @@ private[r] class AFTSurvivalRegressionWrapper private (

private[r] object AFTSurvivalRegressionWrapper extends MLReadable[AFTSurvivalRegressionWrapper] {

private val FORMULA_REGEXP = """Surv\(([^,]+), ([^,]+)\) ~ (.+)""".r

private def formulaRewrite(formula: String): (String, String) = {
var rewritedFormula: String = null
var censorCol: String = null

val regex = """Surv\(([^,]+), ([^,]+)\) ~ (.+)""".r
try {
val regex(label, censor, features) = formula
val FORMULA_REGEXP(label, censor, features) = formula
// TODO: Support dot operator.
if (features.contains(".")) {
throw new UnsupportedOperationException(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ class SessionCatalog(
@GuardedBy("this")
protected var currentDb: String = formatDatabaseName(DEFAULT_DATABASE)

private val validNameFormat = "([\\w_]+)".r

/**
* Checks if the given name conforms the Hive standard ("[a-zA-Z_0-9]+"),
* i.e. if this name only contains characters, numbers, and _.
Expand All @@ -109,7 +111,6 @@ class SessionCatalog(
* org.apache.hadoop.hive.metastore.MetaStoreUtils.validateName.
*/
private def validateName(name: String): Unit = {
val validNameFormat = "([\\w_]+)".r
if (!validNameFormat.pattern.matcher(name).matches()) {
throw new AnalysisException(s"`$name` is not a valid name for tables/databases. " +
"Valid names only contain alphabet characters, numbers and _.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ import java.util.regex.Matcher
*/
object CodeFormatter {
val commentHolder = """\/\*(.+?)\*\/""".r
val commentRegexp =
("""([ |\t]*?\/\*[\s|\S]*?\*\/[ |\t]*?)|""" + // strip /*comment*/
"""([ |\t]*?\/\/[\s\S]*?\n)""").r // strip //comment
val extraNewLinesRegexp = """\n\s*\n""".r // strip extra newlines

def format(code: CodeAndComment, maxLines: Int = -1): String = {
val formatter = new CodeFormatter
Expand Down Expand Up @@ -91,11 +95,7 @@ object CodeFormatter {
}

def stripExtraNewLinesAndComments(input: String): String = {
val commentReg =
("""([ |\t]*?\/\*[\s|\S]*?\*\/[ |\t]*?)|""" + // strip /*comment*/
"""([ |\t]*?\/\/[\s\S]*?\n)""").r // strip //comment
val codeWithoutComment = commentReg.replaceAllIn(input, "")
codeWithoutComment.replaceAll("""\n\s*\n""", "\n") // strip ExtraNewLines
extraNewLinesRegexp.replaceAllIn(commentRegexp.replaceAllIn(input, ""), "\n")
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ abstract class DataType extends AbstractDataType {
@InterfaceStability.Stable
object DataType {

private val FIXED_DECIMAL = """decimal\(\s*(\d+)\s*,\s*(\-?\d+)\s*\)""".r

def fromDDL(ddl: String): DataType = {
try {
CatalystSqlParser.parseDataType(ddl)
Expand All @@ -132,7 +134,6 @@ object DataType {

/** Given the string representation of a type, return its DataType */
private def nameToType(name: String): DataType = {
val FIXED_DECIMAL = """decimal\(\s*(\d+)\s*,\s*(\-?\d+)\s*\)""".r
name match {
case "decimal" => DecimalType.USER_DEFAULT
case FIXED_DECIMAL(precision, scale) => DecimalType(precision.toInt, scale.toInt)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -940,6 +940,11 @@ abstract class DStream[T: ClassTag] (

object DStream {

private val SPARK_CLASS_REGEX = """^org\.apache\.spark""".r
private val SPARK_STREAMING_TESTCLASS_REGEX = """^org\.apache\.spark\.streaming\.test""".r
private val SPARK_EXAMPLES_CLASS_REGEX = """^org\.apache\.spark\.examples""".r
private val SCALA_CLASS_REGEX = """^scala""".r

// `toPairDStreamFunctions` was in SparkContext before 1.3 and users had to
// `import StreamingContext._` to enable it. Now we move it here to make the compiler find
// it automatically. However, we still keep the old function in StreamingContext for backward
Expand All @@ -953,11 +958,6 @@ object DStream {

/** Get the creation site of a DStream from the stack trace of when the DStream is created. */
private[streaming] def getCreationSite(): CallSite = {
val SPARK_CLASS_REGEX = """^org\.apache\.spark""".r
val SPARK_STREAMING_TESTCLASS_REGEX = """^org\.apache\.spark\.streaming\.test""".r
val SPARK_EXAMPLES_CLASS_REGEX = """^org\.apache\.spark\.examples""".r
val SCALA_CLASS_REGEX = """^scala""".r

/** Filtering function that excludes non-user classes for a streaming application */
def streamingExclustionFunction(className: String): Boolean = {
def doesMatch(r: Regex): Boolean = r.findFirstIn(className).isDefined
Expand Down