Skip to content
Permalink
Browse files
Add new TimeTracker for more advanced performance analysis
Because of our nested parsers, it can sometimes be difficult to
determine how much time each processor takes on average to complete it's
parse/unparse, even when using a profiler. This adds a new timer that
makes it much easier to determine how much total time particular parsers
take and on average how long they take. This can help to prioritize
performance optimizations.

The intended use is to modify the Parser.scala parse1() function to look
like this:

  try {
    TimeTracker.track(parserName) {
      parse(pstate)
    }
  }

The resulting output gives a break down of all the parsers and how
performant they are. Once slow processors are found, it can then be
useful to wrap subsections of the parse() method in a call to
TimeTracker.track() to determine which sections are slow. This incurs
less overhead than traditional profilers and give more control over
which sections are actually profiled.

DAFFODIL-1883
  • Loading branch information
stevedlawrence committed Feb 3, 2020
1 parent f388c5a commit aeafeccb4482a9c39ab7a0026060f8c6b2d7ab92
Showing 3 changed files with 163 additions and 1 deletion.
@@ -17,6 +17,8 @@

package org.apache.daffodil.util

import collection.JavaConverters._

object Timer extends Logging {

def printTime(message: String, nanos: Long, units: String) {
@@ -166,3 +168,96 @@ object TakTimer {
}

}


object TimeTracker extends Logging {

case class SectionTime(var time: Long, var count: Int)

/**
* A mapping of each section of code tracked
*/
val sectionTimes = new java.util.HashMap[String,SectionTime]()

/**
* Used to track the time of child tracked sections so they can be excluded
* from the parents tracked time. When a tracked section begins, we will push
* a zero onto the top of the stack. As each child section finishes and
* calculates their time, they will add their time to the value at the top of
* the stack. This effectively keeps track of how long all child tracked
* sections take. Then when the parent tracked section ends it can pop and
* subtract the value on the top of the stack to determine how much time just
* that section took, excluding the nested child track sections. This is
* necessary since we often want to track the time each parser takes to
* complete, but our parsers are nested making that difficult with standard
* profilers. This makes that much easier.
*/
val childrenTimeStack = scala.collection.mutable.Stack[Long]()

/**
* Used to measure a section of code that might get called multiple times.
* This keeps track of the total amount of time spent in that section and how
* many times that section was run. A name is provided by the caller to
* identify the section. Additionally, if a section that is being tracked
* calls another section that is being tracked (i.e. nested TimeTracker.track
* calls) the time spent in the inner section is not included in the outer
* section. Note that because of this feature, the use of the track function
* is not thread safe.
*
* For example,
*
* TimeTracker.track("code section") {
* // code to track goes here
* }
*
* Once processing is complete, a call to logTimes will dispaly the stats
* about the tracked sections.
*/
def track[A](name: String)(body: => A): A = macro TimeTrackerMacros.trackMacro

/**
* Output the results of the tracked sections in sorted columnar format.
*/
def logTimes(logLevel: LogLevel.Type): Unit = {
val stats = sectionTimes.asScala.toSeq.map { case (name, SectionTime(timeNS, count)) => {
val average = timeNS / count
(name, timeNS / 1000000000.0, average, count)
}}.sortBy(_._2).reverse

val totalTime = stats.map { _._2 }.sum

val stringStats = stats.map { case (name, time, average, count) =>
(
name,
"%.3f".format(time),
"%.2f%%".format(time * 100 / totalTime),
average.toString,
count.toString
)
}

val nameLen = stringStats.map(_._1.length).max
val timeLen = stringStats.map(_._2.length).max
val percentLen = stringStats.map(_._3.length).max
val averageLen = stringStats.map(_._4.length).max
val countLen = stringStats.map(_._5.length).max

val formatString =
"%-" + nameLen + "s " +
"%" + timeLen + "s " +
"%" + percentLen + "s " +
"%" + averageLen + "s " +
"%" + countLen + "s"

log(logLevel, formatString, "Name", "Time", "Pct", "Average", "Count")
stringStats.foreach { stats =>
log(logLevel, formatString, stats.productIterator.toList: _*)
}
log(logLevel, "Total Time: %.3f", totalTime)
}

def clear(): Unit = {
sectionTimes.clear()
childrenTimeStack.clear()
}
}
@@ -0,0 +1,67 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.daffodil.util

import scala.reflect.macros.blackbox.Context

object TimeTrackerMacros {

def trackMacro(c: Context)(name: c.Tree)(body: c.Tree) = {
import c.universe._

val startTime = TermName(c.freshName)
val endTime = TermName(c.freshName)
val childrenTime = TermName(c.freshName)
val timeTaken = TermName(c.freshName)
val selfTime = TermName(c.freshName)
val sectionTime = TermName(c.freshName)
val result = TermName(c.freshName)
val key = TermName(c.freshName)

q"""
{
val $startTime = System.nanoTime
TimeTracker.childrenTimeStack.push(0)
val $result = try {
$body
} finally {
val $endTime = System.nanoTime
val $timeTaken = $endTime - $startTime
val $childrenTime = TimeTracker.childrenTimeStack.pop()
val $selfTime = $timeTaken - $childrenTime

val $key = $name
val $sectionTime = TimeTracker.sectionTimes.get($key)
if ($sectionTime == null) {
TimeTracker.sectionTimes.put($key, new TimeTracker.SectionTime($selfTime, 1))
} else {
$sectionTime.time += $selfTime
$sectionTime.count += 1
}

if (!TimeTracker.childrenTimeStack.isEmpty) {
TimeTracker.childrenTimeStack.push(TimeTracker.childrenTimeStack.pop + $timeTaken)
}
}

$result
}
"""
}
}
@@ -54,7 +54,7 @@ sealed trait Parser

def isEmpty = false // override in NadaParser

protected def parserName = Misc.getNameFromClass(this)
protected lazy val parserName = Misc.getNameFromClass(this)

def PE(pstate: PState, s: String, args: Any*) = {
pstate.setFailed(new ParseError(One(context.schemaFileLocation), One(pstate.currentLocation), s, args: _*))

0 comments on commit aeafecc

Please sign in to comment.